1 /* 2 * Copyright (c) 2011-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/buf.h> 61 62 #include <crypto/sha2/sha2.h> 63 64 #include "hammer2.h" 65 66 static hammer2_chain_t *hammer2_chain_create_indirect( 67 hammer2_chain_t *parent, 68 hammer2_key_t key, int keybits, 69 hammer2_tid_t mtid, int for_type, int *errorp); 70 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 71 hammer2_chain_t *chain, 72 hammer2_tid_t mtid, int flags, 73 hammer2_blockref_t *obref); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **brefp); 80 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 81 int depth); 82 /* 83 * There are many degenerate situations where an extreme rate of console 84 * output can occur from warnings and errors. Make sure this output does 85 * not impede operations. 86 */ 87 static struct krate krate_h2chk = { .freq = 5 }; 88 static struct krate krate_h2me = { .freq = 1 }; 89 static struct krate krate_h2em = { .freq = 1 }; 90 91 /* 92 * Basic RBTree for chains (core.rbtree). 93 */ 94 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 95 96 int 97 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 98 { 99 hammer2_key_t c1_beg; 100 hammer2_key_t c1_end; 101 hammer2_key_t c2_beg; 102 hammer2_key_t c2_end; 103 104 /* 105 * Compare chains. Overlaps are not supposed to happen and catch 106 * any software issues early we count overlaps as a match. 107 */ 108 c1_beg = chain1->bref.key; 109 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 110 c2_beg = chain2->bref.key; 111 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 112 113 if (c1_end < c2_beg) /* fully to the left */ 114 return(-1); 115 if (c1_beg > c2_end) /* fully to the right */ 116 return(1); 117 return(0); /* overlap (must not cross edge boundary) */ 118 } 119 120 /* 121 * Assert that a chain has no media data associated with it. 122 */ 123 static __inline void 124 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 125 { 126 KKASSERT(chain->dio == NULL); 127 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 128 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 129 chain->data) { 130 panic("hammer2_chain_assert_no_data: chain %p still has data", 131 chain); 132 } 133 } 134 135 /* 136 * Make a chain visible to the flusher. The flusher operates using a top-down 137 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 138 * flushes them, and updates blocks back to the volume root. 139 * 140 * This routine sets the ONFLUSH flag upward from the triggering chain until 141 * it hits an inode root or the volume root. Inode chains serve as inflection 142 * points, requiring the flusher to bridge across trees. Inodes include 143 * regular inodes, PFS roots (pmp->iroot), and the media super root 144 * (spmp->iroot). 145 */ 146 void 147 hammer2_chain_setflush(hammer2_chain_t *chain) 148 { 149 hammer2_chain_t *parent; 150 151 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 152 hammer2_spin_sh(&chain->core.spin); 153 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 154 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 155 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 156 break; 157 if ((parent = chain->parent) == NULL) 158 break; 159 hammer2_spin_sh(&parent->core.spin); 160 hammer2_spin_unsh(&chain->core.spin); 161 chain = parent; 162 } 163 hammer2_spin_unsh(&chain->core.spin); 164 } 165 } 166 167 /* 168 * Allocate a new disconnected chain element representing the specified 169 * bref. chain->refs is set to 1 and the passed bref is copied to 170 * chain->bref. chain->bytes is derived from the bref. 171 * 172 * chain->pmp inherits pmp unless the chain is an inode (other than the 173 * super-root inode). 174 * 175 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 176 */ 177 hammer2_chain_t * 178 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 179 hammer2_blockref_t *bref) 180 { 181 hammer2_chain_t *chain; 182 u_int bytes; 183 184 /* 185 * Special case - radix of 0 indicates a chain that does not 186 * need a data reference (context is completely embedded in the 187 * bref). 188 */ 189 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 190 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 191 else 192 bytes = 0; 193 194 switch(bref->type) { 195 case HAMMER2_BREF_TYPE_INODE: 196 case HAMMER2_BREF_TYPE_INDIRECT: 197 case HAMMER2_BREF_TYPE_DATA: 198 case HAMMER2_BREF_TYPE_DIRENT: 199 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 200 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 201 case HAMMER2_BREF_TYPE_FREEMAP: 202 case HAMMER2_BREF_TYPE_VOLUME: 203 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 204 M_WAITOK | M_ZERO); 205 atomic_add_long(&hammer2_chain_allocs, 1); 206 break; 207 case HAMMER2_BREF_TYPE_EMPTY: 208 default: 209 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 210 bref->type); 211 break; 212 } 213 214 /* 215 * Initialize the new chain structure. pmp must be set to NULL for 216 * chains belonging to the super-root topology of a device mount. 217 */ 218 if (pmp == hmp->spmp) 219 chain->pmp = NULL; 220 else 221 chain->pmp = pmp; 222 223 chain->hmp = hmp; 224 chain->bref = *bref; 225 chain->bytes = bytes; 226 chain->refs = 1; 227 chain->flags = HAMMER2_CHAIN_ALLOCATED; 228 229 /* 230 * Set the PFS boundary flag if this chain represents a PFS root. 231 */ 232 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 233 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 234 hammer2_chain_init(chain); 235 236 return (chain); 237 } 238 239 /* 240 * A common function to initialize chains including fchain and vchain. 241 */ 242 void 243 hammer2_chain_init(hammer2_chain_t *chain) 244 { 245 RB_INIT(&chain->core.rbtree); /* live chains */ 246 hammer2_mtx_init(&chain->lock, "h2chain"); 247 hammer2_spin_init(&chain->core.spin, "h2chain"); 248 lockinit(&chain->diolk, "chdio", 0, 0); 249 } 250 251 /* 252 * Add a reference to a chain element, preventing its destruction. 253 * Undone via hammer2_chain_drop() 254 * 255 * (can be called with spinlock held) 256 */ 257 void 258 hammer2_chain_ref(hammer2_chain_t *chain) 259 { 260 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 261 /* NOP */ 262 } 263 } 264 265 /* 266 * Ref a locked chain and force the data to be held across an unlock. 267 * Chain must be currently locked. The user of the chain who desires 268 * to release the hold must call hammer2_chain_lock_unhold() to relock 269 * and unhold the chain, then unlock normally, or may simply call 270 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 271 */ 272 void 273 hammer2_chain_ref_hold(hammer2_chain_t *chain) 274 { 275 atomic_add_int(&chain->lockcnt, 1); 276 hammer2_chain_ref(chain); 277 } 278 279 /* 280 * Insert the chain in the core rbtree. 281 * 282 * Normal insertions are placed in the live rbtree. Insertion of a deleted 283 * chain is a special case used by the flush code that is placed on the 284 * unstaged deleted list to avoid confusing the live view. 285 */ 286 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 287 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 288 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 289 290 static 291 int 292 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 293 int flags, int generation) 294 { 295 hammer2_chain_t *xchain __debugvar; 296 int error = 0; 297 298 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 299 hammer2_spin_ex(&parent->core.spin); 300 301 /* 302 * Interlocked by spinlock, check for race 303 */ 304 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 305 parent->core.generation != generation) { 306 error = HAMMER2_ERROR_EAGAIN; 307 goto failed; 308 } 309 310 /* 311 * Insert chain 312 */ 313 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 314 KASSERT(xchain == NULL, 315 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 316 chain, xchain, chain->bref.key)); 317 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 318 chain->parent = parent; 319 ++parent->core.chain_count; 320 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 321 322 /* 323 * We have to keep track of the effective live-view blockref count 324 * so the create code knows when to push an indirect block. 325 */ 326 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 327 atomic_add_int(&parent->core.live_count, 1); 328 failed: 329 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 330 hammer2_spin_unex(&parent->core.spin); 331 return error; 332 } 333 334 /* 335 * Drop the caller's reference to the chain. When the ref count drops to 336 * zero this function will try to disassociate the chain from its parent and 337 * deallocate it, then recursely drop the parent using the implied ref 338 * from the chain's chain->parent. 339 * 340 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 341 * races an acquisition by another cpu. Therefore we can loop if we are 342 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 343 * race against another drop. 344 */ 345 void 346 hammer2_chain_drop(hammer2_chain_t *chain) 347 { 348 u_int refs; 349 350 KKASSERT(chain->refs > 0); 351 352 while (chain) { 353 refs = chain->refs; 354 cpu_ccfence(); 355 KKASSERT(refs > 0); 356 357 if (refs == 1) { 358 if (hammer2_mtx_ex_try(&chain->lock) == 0) 359 chain = hammer2_chain_lastdrop(chain, 0); 360 /* retry the same chain, or chain from lastdrop */ 361 } else { 362 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 363 break; 364 /* retry the same chain */ 365 } 366 cpu_pause(); 367 } 368 } 369 370 /* 371 * Unhold a held and probably not-locked chain, ensure that the data is 372 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 373 * lock and then simply unlocking the chain. 374 */ 375 void 376 hammer2_chain_unhold(hammer2_chain_t *chain) 377 { 378 u_int lockcnt; 379 int iter = 0; 380 381 for (;;) { 382 lockcnt = chain->lockcnt; 383 cpu_ccfence(); 384 if (lockcnt > 1) { 385 if (atomic_cmpset_int(&chain->lockcnt, 386 lockcnt, lockcnt - 1)) { 387 break; 388 } 389 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 390 hammer2_chain_unlock(chain); 391 break; 392 } else { 393 /* 394 * This situation can easily occur on SMP due to 395 * the gap inbetween the 1->0 transition and the 396 * final unlock. We cannot safely block on the 397 * mutex because lockcnt might go above 1. 398 * 399 * XXX Sleep for one tick if it takes too long. 400 */ 401 if (++iter > 1000) { 402 if (iter > 1000 + hz) { 403 kprintf("hammer2: h2race1 %p\n", chain); 404 iter = 1000; 405 } 406 tsleep(&iter, 0, "h2race1", 1); 407 } 408 cpu_pause(); 409 } 410 } 411 } 412 413 void 414 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 415 { 416 hammer2_chain_unhold(chain); 417 hammer2_chain_drop(chain); 418 } 419 420 void 421 hammer2_chain_rehold(hammer2_chain_t *chain) 422 { 423 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 424 atomic_add_int(&chain->lockcnt, 1); 425 hammer2_chain_unlock(chain); 426 } 427 428 /* 429 * Handles the (potential) last drop of chain->refs from 1->0. Called with 430 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 431 * possible against refs and lockcnt. We must dispose of the mutex on chain. 432 * 433 * This function returns an unlocked chain for recursive drop or NULL. It 434 * can return the same chain if it determines it has raced another ref. 435 * 436 * -- 437 * 438 * When two chains need to be recursively dropped we use the chain we 439 * would otherwise free to placehold the additional chain. It's a bit 440 * convoluted but we can't just recurse without potentially blowing out 441 * the kernel stack. 442 * 443 * The chain cannot be freed if it has any children. 444 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 445 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 446 * Any dedup registration can remain intact. 447 * 448 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 449 */ 450 static 451 hammer2_chain_t * 452 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 453 { 454 hammer2_dev_t *hmp; 455 hammer2_chain_t *parent; 456 hammer2_chain_t *rdrop; 457 458 /* 459 * We need chain's spinlock to interlock the sub-tree test. 460 * We already have chain's mutex, protecting chain->parent. 461 * 462 * Remember that chain->refs can be in flux. 463 */ 464 hammer2_spin_ex(&chain->core.spin); 465 466 if (chain->parent != NULL) { 467 /* 468 * If the chain has a parent the UPDATE bit prevents scrapping 469 * as the chain is needed to properly flush the parent. Try 470 * to complete the 1->0 transition and return NULL. Retry 471 * (return chain) if we are unable to complete the 1->0 472 * transition, else return NULL (nothing more to do). 473 * 474 * If the chain has a parent the MODIFIED bit prevents 475 * scrapping. 476 */ 477 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 478 HAMMER2_CHAIN_MODIFIED)) { 479 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 480 hammer2_spin_unex(&chain->core.spin); 481 hammer2_chain_assert_no_data(chain); 482 hammer2_mtx_unlock(&chain->lock); 483 chain = NULL; 484 } else { 485 hammer2_spin_unex(&chain->core.spin); 486 hammer2_mtx_unlock(&chain->lock); 487 } 488 return (chain); 489 } 490 /* spinlock still held */ 491 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 492 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 493 /* 494 * Retain the static vchain and fchain. Clear bits that 495 * are not relevant. Do not clear the MODIFIED bit, 496 * and certainly do not put it on the delayed-flush queue. 497 */ 498 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 499 } else { 500 /* 501 * The chain has no parent and can be flagged for destruction. 502 * Since it has no parent, UPDATE can also be cleared. 503 */ 504 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 505 if (chain->flags & HAMMER2_CHAIN_UPDATE) 506 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 507 508 /* 509 * If the chain has children we must propagate the DESTROY 510 * flag downward and rip the disconnected topology apart. 511 * This is accomplished by calling hammer2_flush() on the 512 * chain. 513 * 514 * Any dedup is already handled by the underlying DIO, so 515 * we do not have to specifically flush it here. 516 */ 517 if (chain->core.chain_count) { 518 hammer2_spin_unex(&chain->core.spin); 519 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 520 HAMMER2_FLUSH_ALL); 521 hammer2_mtx_unlock(&chain->lock); 522 523 return(chain); /* retry drop */ 524 } 525 526 /* 527 * Otherwise we can scrap the MODIFIED bit if it is set, 528 * and continue along the freeing path. 529 * 530 * Be sure to clean-out any dedup bits. Without a parent 531 * this chain will no longer be visible to the flush code. 532 * Easy check data_off to avoid the volume root. 533 */ 534 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 535 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 536 atomic_add_long(&hammer2_count_modified_chains, -1); 537 if (chain->pmp) 538 hammer2_pfs_memory_wakeup(chain->pmp, -1); 539 } 540 /* spinlock still held */ 541 } 542 543 /* spinlock still held */ 544 545 /* 546 * If any children exist we must leave the chain intact with refs == 0. 547 * They exist because chains are retained below us which have refs or 548 * may require flushing. 549 * 550 * Retry (return chain) if we fail to transition the refs to 0, else 551 * return NULL indication nothing more to do. 552 */ 553 if (chain->core.chain_count) { 554 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 555 hammer2_spin_unex(&chain->core.spin); 556 hammer2_chain_assert_no_data(chain); 557 hammer2_mtx_unlock(&chain->lock); 558 chain = NULL; 559 } else { 560 hammer2_spin_unex(&chain->core.spin); 561 hammer2_mtx_unlock(&chain->lock); 562 } 563 return (chain); 564 } 565 /* spinlock still held */ 566 /* no chains left under us */ 567 568 /* 569 * chain->core has no children left so no accessors can get to our 570 * chain from there. Now we have to lock the parent core to interlock 571 * remaining possible accessors that might bump chain's refs before 572 * we can safely drop chain's refs with intent to free the chain. 573 */ 574 hmp = chain->hmp; 575 rdrop = NULL; 576 577 parent = chain->parent; 578 579 /* 580 * WARNING! chain's spin lock is still held here, and other spinlocks 581 * will be acquired and released in the code below. We 582 * cannot be making fancy procedure calls! 583 */ 584 585 /* 586 * Spinlock the parent and try to drop the last ref on chain. 587 * On success determine if we should dispose of the chain 588 * (remove the chain from its parent, etc). 589 * 590 * (normal core locks are top-down recursive but we define 591 * core spinlocks as bottom-up recursive, so this is safe). 592 */ 593 if (parent) { 594 hammer2_spin_ex(&parent->core.spin); 595 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 596 /* 597 * 1->0 transition failed, retry. 598 */ 599 hammer2_spin_unex(&parent->core.spin); 600 hammer2_spin_unex(&chain->core.spin); 601 hammer2_mtx_unlock(&chain->lock); 602 603 return(chain); 604 } 605 606 /* 607 * 1->0 transition successful, parent spin held to prevent 608 * new lookups, chain spinlock held to protect parent field. 609 * Remove chain from the parent. 610 * 611 * If the chain is being removed from the parent's rbtree but 612 * is not blkmapped, we have to adjust live_count downward. If 613 * it is blkmapped then the blockref is retained in the parent 614 * as is its associated live_count. This case can occur when 615 * a chain added to the topology is unable to flush and is 616 * then later deleted. 617 */ 618 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 619 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 620 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 621 atomic_add_int(&parent->core.live_count, -1); 622 } 623 RB_REMOVE(hammer2_chain_tree, 624 &parent->core.rbtree, chain); 625 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 626 --parent->core.chain_count; 627 chain->parent = NULL; 628 } 629 630 /* 631 * If our chain was the last chain in the parent's core the 632 * core is now empty and its parent might have to be 633 * re-dropped if it has 0 refs. 634 */ 635 if (parent->core.chain_count == 0) { 636 rdrop = parent; 637 atomic_add_int(&rdrop->refs, 1); 638 /* 639 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 640 rdrop = NULL; 641 */ 642 } 643 hammer2_spin_unex(&parent->core.spin); 644 parent = NULL; /* safety */ 645 /* FALL THROUGH */ 646 } else { 647 /* 648 * No-parent case. 649 */ 650 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 651 /* 652 * 1->0 transition failed, retry. 653 */ 654 hammer2_spin_unex(&parent->core.spin); 655 hammer2_spin_unex(&chain->core.spin); 656 hammer2_mtx_unlock(&chain->lock); 657 658 return(chain); 659 } 660 } 661 662 /* 663 * Successful 1->0 transition, no parent, no children... no way for 664 * anyone to ref this chain any more. We can clean-up and free it. 665 * 666 * We still have the core spinlock, and core's chain_count is 0. 667 * Any parent spinlock is gone. 668 */ 669 hammer2_spin_unex(&chain->core.spin); 670 hammer2_chain_assert_no_data(chain); 671 hammer2_mtx_unlock(&chain->lock); 672 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 673 chain->core.chain_count == 0); 674 675 /* 676 * All locks are gone, no pointers remain to the chain, finish 677 * freeing it. 678 */ 679 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 680 HAMMER2_CHAIN_MODIFIED)) == 0); 681 682 /* 683 * Once chain resources are gone we can use the now dead chain 684 * structure to placehold what might otherwise require a recursive 685 * drop, because we have potentially two things to drop and can only 686 * return one directly. 687 */ 688 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 689 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 690 chain->hmp = NULL; 691 kfree_obj(chain, hmp->mchain); 692 atomic_add_long(&hammer2_chain_allocs, -1); 693 } 694 695 /* 696 * Possible chaining loop when parent re-drop needed. 697 */ 698 return(rdrop); 699 } 700 701 /* 702 * On last lock release. 703 */ 704 static hammer2_io_t * 705 hammer2_chain_drop_data(hammer2_chain_t *chain) 706 { 707 hammer2_io_t *dio; 708 709 if ((dio = chain->dio) != NULL) { 710 chain->dio = NULL; 711 chain->data = NULL; 712 } else { 713 switch(chain->bref.type) { 714 case HAMMER2_BREF_TYPE_VOLUME: 715 case HAMMER2_BREF_TYPE_FREEMAP: 716 break; 717 default: 718 if (chain->data != NULL) { 719 hammer2_spin_unex(&chain->core.spin); 720 panic("chain data not null: " 721 "chain %p bref %016jx.%02x " 722 "refs %d parent %p dio %p data %p", 723 chain, chain->bref.data_off, 724 chain->bref.type, chain->refs, 725 chain->parent, 726 chain->dio, chain->data); 727 } 728 KKASSERT(chain->data == NULL); 729 break; 730 } 731 } 732 return dio; 733 } 734 735 /* 736 * Lock a referenced chain element, acquiring its data with I/O if necessary, 737 * and specify how you would like the data to be resolved. 738 * 739 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 740 * 741 * The lock is allowed to recurse, multiple locking ops will aggregate 742 * the requested resolve types. Once data is assigned it will not be 743 * removed until the last unlock. 744 * 745 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 746 * (typically used to avoid device/logical buffer 747 * aliasing for data) 748 * 749 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 750 * the INITIAL-create state (indirect blocks only). 751 * 752 * Do not resolve data elements for DATA chains. 753 * (typically used to avoid device/logical buffer 754 * aliasing for data) 755 * 756 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 757 * 758 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 759 * it will be locked exclusive. 760 * 761 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 762 * the lock fails, EAGAIN is returned. 763 * 764 * NOTE: Embedded elements (volume header, inodes) are always resolved 765 * regardless. 766 * 767 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 768 * element will instantiate and zero its buffer, and flush it on 769 * release. 770 * 771 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 772 * so as not to instantiate a device buffer, which could alias against 773 * a logical file buffer. However, if ALWAYS is specified the 774 * device buffer will be instantiated anyway. 775 * 776 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 777 * case it can be either 0 or EAGAIN. 778 * 779 * WARNING! This function blocks on I/O if data needs to be fetched. This 780 * blocking can run concurrent with other compatible lock holders 781 * who do not need data returning. The lock is not upgraded to 782 * exclusive during a data fetch, a separate bit is used to 783 * interlock I/O. However, an exclusive lock holder can still count 784 * on being interlocked against an I/O fetch managed by a shared 785 * lock holder. 786 */ 787 int 788 hammer2_chain_lock(hammer2_chain_t *chain, int how) 789 { 790 KKASSERT(chain->refs > 0); 791 792 if (how & HAMMER2_RESOLVE_NONBLOCK) { 793 /* 794 * We still have to bump lockcnt before acquiring the lock, 795 * even for non-blocking operation, because the unlock code 796 * live-loops on lockcnt == 1 when dropping the last lock. 797 * 798 * If the non-blocking operation fails we have to use an 799 * unhold sequence to undo the mess. 800 * 801 * NOTE: LOCKAGAIN must always succeed without blocking, 802 * even if NONBLOCK is specified. 803 */ 804 atomic_add_int(&chain->lockcnt, 1); 805 if (how & HAMMER2_RESOLVE_SHARED) { 806 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 807 hammer2_mtx_sh_again(&chain->lock); 808 } else { 809 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 810 hammer2_chain_unhold(chain); 811 return EAGAIN; 812 } 813 } 814 } else { 815 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 816 hammer2_chain_unhold(chain); 817 return EAGAIN; 818 } 819 } 820 } else { 821 /* 822 * Get the appropriate lock. If LOCKAGAIN is flagged with 823 * SHARED the caller expects a shared lock to already be 824 * present and we are giving it another ref. This case must 825 * importantly not block if there is a pending exclusive lock 826 * request. 827 */ 828 atomic_add_int(&chain->lockcnt, 1); 829 if (how & HAMMER2_RESOLVE_SHARED) { 830 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 831 hammer2_mtx_sh_again(&chain->lock); 832 } else { 833 hammer2_mtx_sh(&chain->lock); 834 } 835 } else { 836 hammer2_mtx_ex(&chain->lock); 837 } 838 } 839 840 /* 841 * If we already have a valid data pointer make sure the data is 842 * synchronized to the current cpu, and then no further action is 843 * necessary. 844 */ 845 if (chain->data) { 846 if (chain->dio) 847 hammer2_io_bkvasync(chain->dio); 848 return 0; 849 } 850 851 /* 852 * Do we have to resolve the data? This is generally only 853 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 854 * Other BREF types expects the data to be there. 855 */ 856 switch(how & HAMMER2_RESOLVE_MASK) { 857 case HAMMER2_RESOLVE_NEVER: 858 return 0; 859 case HAMMER2_RESOLVE_MAYBE: 860 if (chain->flags & HAMMER2_CHAIN_INITIAL) 861 return 0; 862 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 863 return 0; 864 #if 0 865 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 866 return 0; 867 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 868 return 0; 869 #endif 870 /* fall through */ 871 case HAMMER2_RESOLVE_ALWAYS: 872 default: 873 break; 874 } 875 876 /* 877 * Caller requires data 878 */ 879 hammer2_chain_load_data(chain); 880 881 return 0; 882 } 883 884 #if 0 885 /* 886 * Lock the chain, retain the hold, and drop the data persistence count. 887 * The data should remain valid because we never transitioned lockcnt 888 * through 0. 889 */ 890 void 891 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 892 { 893 hammer2_chain_lock(chain, how); 894 atomic_add_int(&chain->lockcnt, -1); 895 } 896 897 /* 898 * Downgrade an exclusive chain lock to a shared chain lock. 899 * 900 * NOTE: There is no upgrade equivalent due to the ease of 901 * deadlocks in that direction. 902 */ 903 void 904 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 905 { 906 hammer2_mtx_downgrade(&chain->lock); 907 } 908 #endif 909 910 /* 911 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 912 * may be of any type. 913 * 914 * Once chain->data is set it cannot be disposed of until all locks are 915 * released. 916 * 917 * Make sure the data is synchronized to the current cpu. 918 */ 919 void 920 hammer2_chain_load_data(hammer2_chain_t *chain) 921 { 922 hammer2_blockref_t *bref; 923 hammer2_dev_t *hmp; 924 hammer2_io_t *dio; 925 char *bdata; 926 int error; 927 928 /* 929 * Degenerate case, data already present, or chain has no media 930 * reference to load. 931 */ 932 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 933 if (chain->data) { 934 if (chain->dio) 935 hammer2_io_bkvasync(chain->dio); 936 return; 937 } 938 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 939 return; 940 941 hmp = chain->hmp; 942 KKASSERT(hmp != NULL); 943 944 /* 945 * Gain the IOINPROG bit, interlocked block. 946 */ 947 for (;;) { 948 u_int oflags; 949 u_int nflags; 950 951 oflags = chain->flags; 952 cpu_ccfence(); 953 if (oflags & HAMMER2_CHAIN_IOINPROG) { 954 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 955 tsleep_interlock(&chain->flags, 0); 956 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 957 tsleep(&chain->flags, PINTERLOCKED, 958 "h2iocw", 0); 959 } 960 /* retry */ 961 } else { 962 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 963 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 964 break; 965 } 966 /* retry */ 967 } 968 } 969 970 /* 971 * We own CHAIN_IOINPROG 972 * 973 * Degenerate case if we raced another load. 974 */ 975 if (chain->data) { 976 if (chain->dio) 977 hammer2_io_bkvasync(chain->dio); 978 goto done; 979 } 980 981 /* 982 * We must resolve to a device buffer, either by issuing I/O or 983 * by creating a zero-fill element. We do not mark the buffer 984 * dirty when creating a zero-fill element (the hammer2_chain_modify() 985 * API must still be used to do that). 986 */ 987 bref = &chain->bref; 988 989 /* 990 * The getblk() optimization can only be used on newly created 991 * elements if the physical block size matches the request. 992 */ 993 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 994 error = hammer2_io_new(hmp, bref->type, 995 bref->data_off, chain->bytes, 996 &chain->dio); 997 } else { 998 error = hammer2_io_bread(hmp, bref->type, 999 bref->data_off, chain->bytes, 1000 &chain->dio); 1001 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1002 } 1003 if (error) { 1004 chain->error = HAMMER2_ERROR_EIO; 1005 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1006 (intmax_t)bref->data_off, error); 1007 hammer2_io_bqrelse(&chain->dio); 1008 goto done; 1009 } 1010 chain->error = 0; 1011 1012 /* 1013 * This isn't perfect and can be ignored on OSs which do not have 1014 * an indication as to whether a buffer is coming from cache or 1015 * if I/O was actually issued for the read. TESTEDGOOD will work 1016 * pretty well without the B_IOISSUED logic because chains are 1017 * cached, but in that situation (without B_IOISSUED) it will not 1018 * detect whether a re-read via I/O is corrupted verses the original 1019 * read. 1020 * 1021 * We can't re-run the CRC on every fresh lock. That would be 1022 * insanely expensive. 1023 * 1024 * If the underlying kernel buffer covers the entire chain we can 1025 * use the B_IOISSUED indication to determine if we have to re-run 1026 * the CRC on chain data for chains that managed to stay cached 1027 * across the kernel disposal of the original buffer. 1028 */ 1029 if ((dio = chain->dio) != NULL && dio->bp) { 1030 struct buf *bp = dio->bp; 1031 1032 if (dio->psize == chain->bytes && 1033 (bp->b_flags & B_IOISSUED)) { 1034 atomic_clear_int(&chain->flags, 1035 HAMMER2_CHAIN_TESTEDGOOD); 1036 bp->b_flags &= ~B_IOISSUED; 1037 } 1038 } 1039 1040 /* 1041 * NOTE: A locked chain's data cannot be modified without first 1042 * calling hammer2_chain_modify(). 1043 */ 1044 1045 /* 1046 * NOTE: hammer2_io_data() call issues bkvasync() 1047 */ 1048 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1049 1050 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1051 /* 1052 * Clear INITIAL. In this case we used io_new() and the 1053 * buffer has been zero'd and marked dirty. 1054 * 1055 * CHAIN_MODIFIED has not been set yet, and we leave it 1056 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1057 * to prevent hammer2_chain_testcheck() from trying to match 1058 * a check code that has not yet been generated. This bit 1059 * should NOT end up on the actual media. 1060 */ 1061 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1062 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1063 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1064 /* 1065 * check data not currently synchronized due to 1066 * modification. XXX assumes data stays in the buffer 1067 * cache, which might not be true (need biodep on flush 1068 * to calculate crc? or simple crc?). 1069 */ 1070 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1071 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1072 chain->error = HAMMER2_ERROR_CHECK; 1073 } else { 1074 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1075 } 1076 } 1077 1078 /* 1079 * Setup the data pointer by pointing it into the buffer. 1080 * WARNING! Other threads can start using the data the instant we 1081 * set chain->data non-NULL. 1082 */ 1083 switch (bref->type) { 1084 case HAMMER2_BREF_TYPE_VOLUME: 1085 case HAMMER2_BREF_TYPE_FREEMAP: 1086 panic("hammer2_chain_load_data: unresolved volume header"); 1087 break; 1088 case HAMMER2_BREF_TYPE_DIRENT: 1089 KKASSERT(chain->bytes != 0); 1090 /* fall through */ 1091 case HAMMER2_BREF_TYPE_INODE: 1092 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1093 case HAMMER2_BREF_TYPE_INDIRECT: 1094 case HAMMER2_BREF_TYPE_DATA: 1095 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1096 default: 1097 /* 1098 * Point data at the device buffer and leave dio intact. 1099 */ 1100 chain->data = (void *)bdata; 1101 break; 1102 } 1103 1104 /* 1105 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1106 */ 1107 done: 1108 for (;;) { 1109 u_int oflags; 1110 u_int nflags; 1111 1112 oflags = chain->flags; 1113 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1114 HAMMER2_CHAIN_IOSIGNAL); 1115 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1116 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1117 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1118 wakeup(&chain->flags); 1119 break; 1120 } 1121 } 1122 } 1123 1124 /* 1125 * Unlock and deref a chain element. 1126 * 1127 * Remember that the presence of children under chain prevent the chain's 1128 * destruction but do not add additional references, so the dio will still 1129 * be dropped. 1130 */ 1131 void 1132 hammer2_chain_unlock(hammer2_chain_t *chain) 1133 { 1134 hammer2_io_t *dio; 1135 u_int lockcnt; 1136 int iter = 0; 1137 1138 /* 1139 * If multiple locks are present (or being attempted) on this 1140 * particular chain we can just unlock, drop refs, and return. 1141 * 1142 * Otherwise fall-through on the 1->0 transition. 1143 */ 1144 for (;;) { 1145 lockcnt = chain->lockcnt; 1146 KKASSERT(lockcnt > 0); 1147 cpu_ccfence(); 1148 if (lockcnt > 1) { 1149 if (atomic_cmpset_int(&chain->lockcnt, 1150 lockcnt, lockcnt - 1)) { 1151 hammer2_mtx_unlock(&chain->lock); 1152 return; 1153 } 1154 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1155 /* while holding the mutex exclusively */ 1156 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1157 break; 1158 } else { 1159 /* 1160 * This situation can easily occur on SMP due to 1161 * the gap inbetween the 1->0 transition and the 1162 * final unlock. We cannot safely block on the 1163 * mutex because lockcnt might go above 1. 1164 * 1165 * XXX Sleep for one tick if it takes too long. 1166 */ 1167 if (++iter > 1000) { 1168 if (iter > 1000 + hz) { 1169 kprintf("hammer2: h2race2 %p\n", chain); 1170 iter = 1000; 1171 } 1172 tsleep(&iter, 0, "h2race2", 1); 1173 } 1174 cpu_pause(); 1175 } 1176 /* retry */ 1177 } 1178 1179 /* 1180 * Last unlock / mutex upgraded to exclusive. Drop the data 1181 * reference. 1182 */ 1183 dio = hammer2_chain_drop_data(chain); 1184 if (dio) 1185 hammer2_io_bqrelse(&dio); 1186 hammer2_mtx_unlock(&chain->lock); 1187 } 1188 1189 #if 0 1190 /* 1191 * Unlock and hold chain data intact 1192 */ 1193 void 1194 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1195 { 1196 atomic_add_int(&chain->lockcnt, 1); 1197 hammer2_chain_unlock(chain); 1198 } 1199 #endif 1200 1201 /* 1202 * Helper to obtain the blockref[] array base and count for a chain. 1203 * 1204 * XXX Not widely used yet, various use cases need to be validated and 1205 * converted to use this function. 1206 */ 1207 static 1208 hammer2_blockref_t * 1209 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1210 { 1211 hammer2_blockref_t *base; 1212 int count; 1213 1214 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1215 base = NULL; 1216 1217 switch(parent->bref.type) { 1218 case HAMMER2_BREF_TYPE_INODE: 1219 count = HAMMER2_SET_COUNT; 1220 break; 1221 case HAMMER2_BREF_TYPE_INDIRECT: 1222 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1223 count = parent->bytes / sizeof(hammer2_blockref_t); 1224 break; 1225 case HAMMER2_BREF_TYPE_VOLUME: 1226 count = HAMMER2_SET_COUNT; 1227 break; 1228 case HAMMER2_BREF_TYPE_FREEMAP: 1229 count = HAMMER2_SET_COUNT; 1230 break; 1231 default: 1232 panic("hammer2_chain_base_and_count: " 1233 "unrecognized blockref type: %d", 1234 parent->bref.type); 1235 count = 0; 1236 break; 1237 } 1238 } else { 1239 switch(parent->bref.type) { 1240 case HAMMER2_BREF_TYPE_INODE: 1241 base = &parent->data->ipdata.u.blockset.blockref[0]; 1242 count = HAMMER2_SET_COUNT; 1243 break; 1244 case HAMMER2_BREF_TYPE_INDIRECT: 1245 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1246 base = &parent->data->npdata[0]; 1247 count = parent->bytes / sizeof(hammer2_blockref_t); 1248 break; 1249 case HAMMER2_BREF_TYPE_VOLUME: 1250 base = &parent->data->voldata. 1251 sroot_blockset.blockref[0]; 1252 count = HAMMER2_SET_COUNT; 1253 break; 1254 case HAMMER2_BREF_TYPE_FREEMAP: 1255 base = &parent->data->blkset.blockref[0]; 1256 count = HAMMER2_SET_COUNT; 1257 break; 1258 default: 1259 panic("hammer2_chain_base_and_count: " 1260 "unrecognized blockref type: %d", 1261 parent->bref.type); 1262 base = NULL; 1263 count = 0; 1264 break; 1265 } 1266 } 1267 *countp = count; 1268 1269 return base; 1270 } 1271 1272 /* 1273 * This counts the number of live blockrefs in a block array and 1274 * also calculates the point at which all remaining blockrefs are empty. 1275 * This routine can only be called on a live chain. 1276 * 1277 * Caller holds the chain locked, but possibly with a shared lock. We 1278 * must use an exclusive spinlock to prevent corruption. 1279 * 1280 * NOTE: Flag is not set until after the count is complete, allowing 1281 * callers to test the flag without holding the spinlock. 1282 * 1283 * NOTE: If base is NULL the related chain is still in the INITIAL 1284 * state and there are no blockrefs to count. 1285 * 1286 * NOTE: live_count may already have some counts accumulated due to 1287 * creation and deletion and could even be initially negative. 1288 */ 1289 void 1290 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1291 hammer2_blockref_t *base, int count) 1292 { 1293 hammer2_spin_ex(&chain->core.spin); 1294 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1295 if (base) { 1296 while (--count >= 0) { 1297 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1298 break; 1299 } 1300 chain->core.live_zero = count + 1; 1301 while (count >= 0) { 1302 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1303 atomic_add_int(&chain->core.live_count, 1304 1); 1305 --count; 1306 } 1307 } else { 1308 chain->core.live_zero = 0; 1309 } 1310 /* else do not modify live_count */ 1311 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1312 } 1313 hammer2_spin_unex(&chain->core.spin); 1314 } 1315 1316 /* 1317 * Resize the chain's physical storage allocation in-place. This function does 1318 * not usually adjust the data pointer and must be followed by (typically) a 1319 * hammer2_chain_modify() call to copy any old data over and adjust the 1320 * data pointer. 1321 * 1322 * Chains can be resized smaller without reallocating the storage. Resizing 1323 * larger will reallocate the storage. Excess or prior storage is reclaimed 1324 * asynchronously at a later time. 1325 * 1326 * An nradix value of 0 is special-cased to mean that the storage should 1327 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1328 * byte). 1329 * 1330 * Must be passed an exclusively locked parent and chain. 1331 * 1332 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1333 * to avoid instantiating a device buffer that conflicts with the vnode data 1334 * buffer. However, because H2 can compress or encrypt data, the chain may 1335 * have a dio assigned to it in those situations, and they do not conflict. 1336 * 1337 * XXX return error if cannot resize. 1338 */ 1339 int 1340 hammer2_chain_resize(hammer2_chain_t *chain, 1341 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1342 int nradix, int flags) 1343 { 1344 hammer2_dev_t *hmp; 1345 size_t obytes; 1346 size_t nbytes; 1347 int error; 1348 1349 hmp = chain->hmp; 1350 1351 /* 1352 * Only data and indirect blocks can be resized for now. 1353 * (The volu root, inodes, and freemap elements use a fixed size). 1354 */ 1355 KKASSERT(chain != &hmp->vchain); 1356 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1357 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1358 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1359 1360 /* 1361 * Nothing to do if the element is already the proper size 1362 */ 1363 obytes = chain->bytes; 1364 nbytes = (nradix) ? (1U << nradix) : 0; 1365 if (obytes == nbytes) 1366 return (chain->error); 1367 1368 /* 1369 * Make sure the old data is instantiated so we can copy it. If this 1370 * is a data block, the device data may be superfluous since the data 1371 * might be in a logical block, but compressed or encrypted data is 1372 * another matter. 1373 * 1374 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1375 */ 1376 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1377 if (error) 1378 return error; 1379 1380 /* 1381 * Reallocate the block, even if making it smaller (because different 1382 * block sizes may be in different regions). 1383 * 1384 * NOTE: Operation does not copy the data and may only be used 1385 * to resize data blocks in-place, or directory entry blocks 1386 * which are about to be modified in some manner. 1387 */ 1388 error = hammer2_freemap_alloc(chain, nbytes); 1389 if (error) 1390 return error; 1391 1392 chain->bytes = nbytes; 1393 1394 /* 1395 * We don't want the followup chain_modify() to try to copy data 1396 * from the old (wrong-sized) buffer. It won't know how much to 1397 * copy. This case should only occur during writes when the 1398 * originator already has the data to write in-hand. 1399 */ 1400 if (chain->dio) { 1401 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1402 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1403 hammer2_io_brelse(&chain->dio); 1404 chain->data = NULL; 1405 } 1406 return (chain->error); 1407 } 1408 1409 /* 1410 * Set the chain modified so its data can be changed by the caller, or 1411 * install deduplicated data. The caller must call this routine for each 1412 * set of modifications it makes, even if the chain is already flagged 1413 * MODIFIED. 1414 * 1415 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1416 * is a CLC (cluster level change) field and is not updated by parent 1417 * propagation during a flush. 1418 * 1419 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1420 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1421 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1422 * remains unmodified with its old data ref intact and chain->error 1423 * unchanged. 1424 * 1425 * Dedup Handling 1426 * 1427 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1428 * even if the chain is still flagged MODIFIED. In this case the chain's 1429 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1430 * 1431 * If the caller passes a non-zero dedup_off we will use it to assign the 1432 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1433 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1434 * must not modify the data content upon return. 1435 */ 1436 int 1437 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1438 hammer2_off_t dedup_off, int flags) 1439 { 1440 hammer2_dev_t *hmp; 1441 hammer2_io_t *dio; 1442 int error; 1443 int wasinitial; 1444 int setmodified; 1445 int setupdate; 1446 int newmod; 1447 char *bdata; 1448 1449 hmp = chain->hmp; 1450 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1451 1452 /* 1453 * Data is not optional for freemap chains (we must always be sure 1454 * to copy the data on COW storage allocations). 1455 */ 1456 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1457 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1458 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1459 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1460 } 1461 1462 /* 1463 * Data must be resolved if already assigned, unless explicitly 1464 * flagged otherwise. If we cannot safety load the data the 1465 * modification fails and we return early. 1466 */ 1467 if (chain->data == NULL && chain->bytes != 0 && 1468 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1469 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1470 hammer2_chain_load_data(chain); 1471 if (chain->error) 1472 return (chain->error); 1473 } 1474 error = 0; 1475 1476 /* 1477 * Set MODIFIED to indicate that the chain has been modified. A new 1478 * allocation is required when modifying a chain. 1479 * 1480 * Set UPDATE to ensure that the blockref is updated in the parent. 1481 * 1482 * If MODIFIED is already set determine if we can reuse the assigned 1483 * data block or if we need a new data block. 1484 */ 1485 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1486 /* 1487 * Must set modified bit. 1488 */ 1489 atomic_add_long(&hammer2_count_modified_chains, 1); 1490 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1491 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1492 setmodified = 1; 1493 1494 /* 1495 * We may be able to avoid a copy-on-write if the chain's 1496 * check mode is set to NONE and the chain's current 1497 * modify_tid is beyond the last explicit snapshot tid. 1498 * 1499 * This implements HAMMER2's overwrite-in-place feature. 1500 * 1501 * NOTE! This data-block cannot be used as a de-duplication 1502 * source when the check mode is set to NONE. 1503 */ 1504 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1505 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1506 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1507 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1508 HAMMER2_DEC_CHECK(chain->bref.methods) == 1509 HAMMER2_CHECK_NONE && 1510 chain->pmp && 1511 chain->bref.modify_tid > 1512 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1513 /* 1514 * Sector overwrite allowed. 1515 */ 1516 newmod = 0; 1517 } else if ((hmp->hflags & HMNT2_EMERG) && 1518 chain->pmp && 1519 chain->bref.modify_tid > 1520 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1521 /* 1522 * If in emergency delete mode then do a modify-in- 1523 * place on any chain type belonging to the PFS as 1524 * long as it doesn't mess up a snapshot. We might 1525 * be forced to do this anyway a little further down 1526 * in the code if the allocation fails. 1527 * 1528 * Also note that in emergency mode, these modify-in- 1529 * place operations are NOT SAFE. A storage failure, 1530 * power failure, or panic can corrupt the filesystem. 1531 */ 1532 newmod = 0; 1533 } else { 1534 /* 1535 * Sector overwrite not allowed, must copy-on-write. 1536 */ 1537 newmod = 1; 1538 } 1539 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1540 /* 1541 * If the modified chain was registered for dedup we need 1542 * a new allocation. This only happens for delayed-flush 1543 * chains (i.e. which run through the front-end buffer 1544 * cache). 1545 */ 1546 newmod = 1; 1547 setmodified = 0; 1548 } else { 1549 /* 1550 * Already flagged modified, no new allocation is needed. 1551 */ 1552 newmod = 0; 1553 setmodified = 0; 1554 } 1555 1556 /* 1557 * Flag parent update required. 1558 */ 1559 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1560 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1561 setupdate = 1; 1562 } else { 1563 setupdate = 0; 1564 } 1565 1566 /* 1567 * The XOP code returns held but unlocked focus chains. This 1568 * prevents the chain from being destroyed but does not prevent 1569 * it from being modified. diolk is used to interlock modifications 1570 * against XOP frontend accesses to the focus. 1571 * 1572 * This allows us to theoretically avoid deadlocking the frontend 1573 * if one of the backends lock up by not formally locking the 1574 * focused chain in the frontend. In addition, the synchronization 1575 * code relies on this mechanism to avoid deadlocking concurrent 1576 * synchronization threads. 1577 */ 1578 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1579 1580 /* 1581 * The modification or re-modification requires an allocation and 1582 * possible COW. If an error occurs, the previous content and data 1583 * reference is retained and the modification fails. 1584 * 1585 * If dedup_off is non-zero, the caller is requesting a deduplication 1586 * rather than a modification. The MODIFIED bit is not set and the 1587 * data offset is set to the deduplication offset. The data cannot 1588 * be modified. 1589 * 1590 * NOTE: The dedup offset is allowed to be in a partially free state 1591 * and we must be sure to reset it to a fully allocated state 1592 * to force two bulkfree passes to free it again. 1593 * 1594 * NOTE: Only applicable when chain->bytes != 0. 1595 * 1596 * XXX can a chain already be marked MODIFIED without a data 1597 * assignment? If not, assert here instead of testing the case. 1598 */ 1599 if (chain != &hmp->vchain && chain != &hmp->fchain && 1600 chain->bytes) { 1601 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1602 newmod 1603 ) { 1604 /* 1605 * NOTE: We do not have to remove the dedup 1606 * registration because the area is still 1607 * allocated and the underlying DIO will 1608 * still be flushed. 1609 */ 1610 if (dedup_off) { 1611 chain->bref.data_off = dedup_off; 1612 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1613 chain->bytes = 1 << 1614 (int)(dedup_off & 1615 HAMMER2_OFF_MASK_RADIX); 1616 else 1617 chain->bytes = 0; 1618 chain->error = 0; 1619 atomic_clear_int(&chain->flags, 1620 HAMMER2_CHAIN_MODIFIED); 1621 atomic_add_long(&hammer2_count_modified_chains, 1622 -1); 1623 if (chain->pmp) { 1624 hammer2_pfs_memory_wakeup( 1625 chain->pmp, -1); 1626 } 1627 hammer2_freemap_adjust(hmp, &chain->bref, 1628 HAMMER2_FREEMAP_DORECOVER); 1629 atomic_set_int(&chain->flags, 1630 HAMMER2_CHAIN_DEDUPABLE); 1631 } else { 1632 error = hammer2_freemap_alloc(chain, 1633 chain->bytes); 1634 atomic_clear_int(&chain->flags, 1635 HAMMER2_CHAIN_DEDUPABLE); 1636 1637 /* 1638 * If we are unable to allocate a new block 1639 * but we are in emergency mode, issue a 1640 * warning to the console and reuse the same 1641 * block. 1642 * 1643 * We behave as if the allocation were 1644 * successful. 1645 * 1646 * THIS IS IMPORTANT: These modifications 1647 * are virtually guaranteed to corrupt any 1648 * snapshots related to this filesystem. 1649 */ 1650 if (error && (hmp->hflags & HMNT2_EMERG)) { 1651 error = 0; 1652 chain->bref.flags |= 1653 HAMMER2_BREF_FLAG_EMERG_MIP; 1654 1655 krateprintf(&krate_h2em, 1656 "hammer2: Emergency Mode WARNING: " 1657 "Operation will likely corrupt " 1658 "related snapshot: " 1659 "%016jx.%02x key=%016jx\n", 1660 chain->bref.data_off, 1661 chain->bref.type, 1662 chain->bref.key); 1663 } else if (error == 0) { 1664 chain->bref.flags &= 1665 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1666 } 1667 } 1668 } 1669 } 1670 1671 /* 1672 * Stop here if error. We have to undo any flag bits we might 1673 * have set above. 1674 */ 1675 if (error) { 1676 if (setmodified) { 1677 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1678 atomic_add_long(&hammer2_count_modified_chains, -1); 1679 if (chain->pmp) 1680 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1681 } 1682 if (setupdate) { 1683 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1684 } 1685 lockmgr(&chain->diolk, LK_RELEASE); 1686 1687 return error; 1688 } 1689 1690 /* 1691 * Update mirror_tid and modify_tid. modify_tid is only updated 1692 * if not passed as zero (during flushes, parent propagation passes 1693 * the value 0). 1694 * 1695 * NOTE: chain->pmp could be the device spmp. 1696 */ 1697 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1698 if (mtid) 1699 chain->bref.modify_tid = mtid; 1700 1701 /* 1702 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1703 * requires updating as well as to tell the delete code that the 1704 * chain's blockref might not exactly match (in terms of physical size 1705 * or block offset) the one in the parent's blocktable. The base key 1706 * of course will still match. 1707 */ 1708 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1709 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1710 1711 /* 1712 * Short-cut data block handling when the caller does not need an 1713 * actual data reference to (aka OPTDATA), as long as the chain does 1714 * not already have a data pointer to the data and no de-duplication 1715 * occurred. 1716 * 1717 * This generally means that the modifications are being done via the 1718 * logical buffer cache. 1719 * 1720 * NOTE: If deduplication occurred we have to run through the data 1721 * stuff to clear INITIAL, and the caller will likely want to 1722 * assign the check code anyway. Leaving INITIAL set on a 1723 * dedup can be deadly (it can cause the block to be zero'd!). 1724 * 1725 * This code also handles bytes == 0 (most dirents). 1726 */ 1727 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1728 (flags & HAMMER2_MODIFY_OPTDATA) && 1729 chain->data == NULL) { 1730 if (dedup_off == 0) { 1731 KKASSERT(chain->dio == NULL); 1732 goto skip2; 1733 } 1734 } 1735 1736 /* 1737 * Clearing the INITIAL flag (for indirect blocks) indicates that 1738 * we've processed the uninitialized storage allocation. 1739 * 1740 * If this flag is already clear we are likely in a copy-on-write 1741 * situation but we have to be sure NOT to bzero the storage if 1742 * no data is present. 1743 * 1744 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1745 */ 1746 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1747 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1748 wasinitial = 1; 1749 } else { 1750 wasinitial = 0; 1751 } 1752 1753 /* 1754 * Instantiate data buffer and possibly execute COW operation 1755 */ 1756 switch(chain->bref.type) { 1757 case HAMMER2_BREF_TYPE_VOLUME: 1758 case HAMMER2_BREF_TYPE_FREEMAP: 1759 /* 1760 * The data is embedded, no copy-on-write operation is 1761 * needed. 1762 */ 1763 KKASSERT(chain->dio == NULL); 1764 break; 1765 case HAMMER2_BREF_TYPE_DIRENT: 1766 /* 1767 * The data might be fully embedded. 1768 */ 1769 if (chain->bytes == 0) { 1770 KKASSERT(chain->dio == NULL); 1771 break; 1772 } 1773 /* fall through */ 1774 case HAMMER2_BREF_TYPE_INODE: 1775 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1776 case HAMMER2_BREF_TYPE_DATA: 1777 case HAMMER2_BREF_TYPE_INDIRECT: 1778 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1779 /* 1780 * Perform the copy-on-write operation 1781 * 1782 * zero-fill or copy-on-write depending on whether 1783 * chain->data exists or not and set the dirty state for 1784 * the new buffer. hammer2_io_new() will handle the 1785 * zero-fill. 1786 * 1787 * If a dedup_off was supplied this is an existing block 1788 * and no COW, copy, or further modification is required. 1789 */ 1790 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1791 1792 if (wasinitial && dedup_off == 0) { 1793 error = hammer2_io_new(hmp, chain->bref.type, 1794 chain->bref.data_off, 1795 chain->bytes, &dio); 1796 } else { 1797 error = hammer2_io_bread(hmp, chain->bref.type, 1798 chain->bref.data_off, 1799 chain->bytes, &dio); 1800 } 1801 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1802 1803 /* 1804 * If an I/O error occurs make sure callers cannot accidently 1805 * modify the old buffer's contents and corrupt the filesystem. 1806 * 1807 * NOTE: hammer2_io_data() call issues bkvasync() 1808 */ 1809 if (error) { 1810 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1811 hmp); 1812 chain->error = HAMMER2_ERROR_EIO; 1813 hammer2_io_brelse(&dio); 1814 hammer2_io_brelse(&chain->dio); 1815 chain->data = NULL; 1816 break; 1817 } 1818 chain->error = 0; 1819 bdata = hammer2_io_data(dio, chain->bref.data_off); 1820 1821 if (chain->data) { 1822 /* 1823 * COW (unless a dedup). 1824 */ 1825 KKASSERT(chain->dio != NULL); 1826 if (chain->data != (void *)bdata && dedup_off == 0) { 1827 bcopy(chain->data, bdata, chain->bytes); 1828 } 1829 } else if (wasinitial == 0 && dedup_off == 0) { 1830 /* 1831 * We have a problem. We were asked to COW but 1832 * we don't have any data to COW with! 1833 */ 1834 panic("hammer2_chain_modify: having a COW %p\n", 1835 chain); 1836 } 1837 1838 /* 1839 * Retire the old buffer, replace with the new. Dirty or 1840 * redirty the new buffer. 1841 * 1842 * WARNING! The system buffer cache may have already flushed 1843 * the buffer, so we must be sure to [re]dirty it 1844 * for further modification. 1845 * 1846 * If dedup_off was supplied, the caller is not 1847 * expected to make any further modification to the 1848 * buffer. 1849 * 1850 * WARNING! hammer2_get_gdata() assumes dio never transitions 1851 * through NULL in order to optimize away unnecessary 1852 * diolk operations. 1853 */ 1854 { 1855 hammer2_io_t *tio; 1856 1857 if ((tio = chain->dio) != NULL) 1858 hammer2_io_bqrelse(&tio); 1859 chain->data = (void *)bdata; 1860 chain->dio = dio; 1861 if (dedup_off == 0) 1862 hammer2_io_setdirty(dio); 1863 } 1864 break; 1865 default: 1866 panic("hammer2_chain_modify: illegal non-embedded type %d", 1867 chain->bref.type); 1868 break; 1869 1870 } 1871 skip2: 1872 /* 1873 * setflush on parent indicating that the parent must recurse down 1874 * to us. Do not call on chain itself which might already have it 1875 * set. 1876 */ 1877 if (chain->parent) 1878 hammer2_chain_setflush(chain->parent); 1879 lockmgr(&chain->diolk, LK_RELEASE); 1880 1881 return (chain->error); 1882 } 1883 1884 /* 1885 * Modify the chain associated with an inode. 1886 */ 1887 int 1888 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1889 hammer2_tid_t mtid, int flags) 1890 { 1891 int error; 1892 1893 hammer2_inode_modify(ip); 1894 error = hammer2_chain_modify(chain, mtid, 0, flags); 1895 1896 return error; 1897 } 1898 1899 /* 1900 * This function returns the chain at the nearest key within the specified 1901 * range. The returned chain will be referenced but not locked. 1902 * 1903 * This function will recurse through chain->rbtree as necessary and will 1904 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1905 * the iteration value is less than the current value of *key_nextp. 1906 * 1907 * The caller should use (*key_nextp) to calculate the actual range of 1908 * the returned element, which will be (key_beg to *key_nextp - 1), because 1909 * there might be another element which is superior to the returned element 1910 * and overlaps it. 1911 * 1912 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1913 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1914 * it will wind up being (key_end + 1). 1915 * 1916 * WARNING! Must be called with child's spinlock held. Spinlock remains 1917 * held through the operation. 1918 */ 1919 struct hammer2_chain_find_info { 1920 hammer2_chain_t *best; 1921 hammer2_key_t key_beg; 1922 hammer2_key_t key_end; 1923 hammer2_key_t key_next; 1924 }; 1925 1926 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1927 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1928 1929 static 1930 hammer2_chain_t * 1931 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1932 hammer2_key_t key_beg, hammer2_key_t key_end) 1933 { 1934 struct hammer2_chain_find_info info; 1935 1936 info.best = NULL; 1937 info.key_beg = key_beg; 1938 info.key_end = key_end; 1939 info.key_next = *key_nextp; 1940 1941 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1942 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1943 &info); 1944 *key_nextp = info.key_next; 1945 #if 0 1946 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1947 parent, key_beg, key_end, *key_nextp); 1948 #endif 1949 1950 return (info.best); 1951 } 1952 1953 static 1954 int 1955 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1956 { 1957 struct hammer2_chain_find_info *info = data; 1958 hammer2_key_t child_beg; 1959 hammer2_key_t child_end; 1960 1961 child_beg = child->bref.key; 1962 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1963 1964 if (child_end < info->key_beg) 1965 return(-1); 1966 if (child_beg > info->key_end) 1967 return(1); 1968 return(0); 1969 } 1970 1971 static 1972 int 1973 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1974 { 1975 struct hammer2_chain_find_info *info = data; 1976 hammer2_chain_t *best; 1977 hammer2_key_t child_end; 1978 1979 if ((best = info->best) == NULL) { 1980 /* 1981 * No previous best. Assign best 1982 */ 1983 info->best = child; 1984 } else if (best->bref.key <= info->key_beg && 1985 child->bref.key <= info->key_beg) { 1986 /* 1987 * Illegal overlap. 1988 */ 1989 KKASSERT(0); 1990 /*info->best = child;*/ 1991 } else if (child->bref.key < best->bref.key) { 1992 /* 1993 * Child has a nearer key and best is not flush with key_beg. 1994 * Set best to child. Truncate key_next to the old best key. 1995 */ 1996 info->best = child; 1997 if (info->key_next > best->bref.key || info->key_next == 0) 1998 info->key_next = best->bref.key; 1999 } else if (child->bref.key == best->bref.key) { 2000 /* 2001 * If our current best is flush with the child then this 2002 * is an illegal overlap. 2003 * 2004 * key_next will automatically be limited to the smaller of 2005 * the two end-points. 2006 */ 2007 KKASSERT(0); 2008 info->best = child; 2009 } else { 2010 /* 2011 * Keep the current best but truncate key_next to the child's 2012 * base. 2013 * 2014 * key_next will also automatically be limited to the smaller 2015 * of the two end-points (probably not necessary for this case 2016 * but we do it anyway). 2017 */ 2018 if (info->key_next > child->bref.key || info->key_next == 0) 2019 info->key_next = child->bref.key; 2020 } 2021 2022 /* 2023 * Always truncate key_next based on child's end-of-range. 2024 */ 2025 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2026 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2027 info->key_next = child_end; 2028 2029 return(0); 2030 } 2031 2032 /* 2033 * Retrieve the specified chain from a media blockref, creating the 2034 * in-memory chain structure which reflects it. The returned chain is 2035 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2036 * handle crc-checks and so forth, and should check chain->error before 2037 * assuming that the data is good. 2038 * 2039 * To handle insertion races pass the INSERT_RACE flag along with the 2040 * generation number of the core. NULL will be returned if the generation 2041 * number changes before we have a chance to insert the chain. Insert 2042 * races can occur because the parent might be held shared. 2043 * 2044 * Caller must hold the parent locked shared or exclusive since we may 2045 * need the parent's bref array to find our block. 2046 * 2047 * WARNING! chain->pmp is always set to NULL for any chain representing 2048 * part of the super-root topology. 2049 */ 2050 hammer2_chain_t * 2051 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2052 hammer2_blockref_t *bref, int how) 2053 { 2054 hammer2_dev_t *hmp = parent->hmp; 2055 hammer2_chain_t *chain; 2056 int error; 2057 2058 /* 2059 * Allocate a chain structure representing the existing media 2060 * entry. Resulting chain has one ref and is not locked. 2061 */ 2062 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2063 chain = hammer2_chain_alloc(hmp, NULL, bref); 2064 else 2065 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2066 /* ref'd chain returned */ 2067 2068 /* 2069 * Flag that the chain is in the parent's blockmap so delete/flush 2070 * knows what to do with it. 2071 */ 2072 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2073 2074 /* 2075 * chain must be locked to avoid unexpected ripouts 2076 */ 2077 hammer2_chain_lock(chain, how); 2078 2079 /* 2080 * Link the chain into its parent. A spinlock is required to safely 2081 * access the RBTREE, and it is possible to collide with another 2082 * hammer2_chain_get() operation because the caller might only hold 2083 * a shared lock on the parent. 2084 * 2085 * NOTE: Get races can occur quite often when we distribute 2086 * asynchronous read-aheads across multiple threads. 2087 */ 2088 KKASSERT(parent->refs > 0); 2089 error = hammer2_chain_insert(parent, chain, 2090 HAMMER2_CHAIN_INSERT_SPIN | 2091 HAMMER2_CHAIN_INSERT_RACE, 2092 generation); 2093 if (error) { 2094 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2095 /*kprintf("chain %p get race\n", chain);*/ 2096 hammer2_chain_unlock(chain); 2097 hammer2_chain_drop(chain); 2098 chain = NULL; 2099 } else { 2100 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2101 } 2102 2103 /* 2104 * Return our new chain referenced but not locked, or NULL if 2105 * a race occurred. 2106 */ 2107 return (chain); 2108 } 2109 2110 /* 2111 * Lookup initialization/completion API 2112 */ 2113 hammer2_chain_t * 2114 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2115 { 2116 hammer2_chain_ref(parent); 2117 if (flags & HAMMER2_LOOKUP_SHARED) { 2118 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2119 HAMMER2_RESOLVE_SHARED); 2120 } else { 2121 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2122 } 2123 return (parent); 2124 } 2125 2126 void 2127 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2128 { 2129 if (parent) { 2130 hammer2_chain_unlock(parent); 2131 hammer2_chain_drop(parent); 2132 } 2133 } 2134 2135 /* 2136 * Take the locked chain and return a locked parent. The chain remains 2137 * locked on return, but may have to be temporarily unlocked to acquire 2138 * the parent. Because of this, (chain) must be stable and cannot be 2139 * deleted while it was temporarily unlocked (typically means that (chain) 2140 * is an inode). 2141 * 2142 * Pass HAMMER2_RESOLVE_* flags in flags. 2143 * 2144 * This will work even if the chain is errored, and the caller can check 2145 * parent->error on return if desired since the parent will be locked. 2146 * 2147 * This function handles the lock order reversal. 2148 */ 2149 hammer2_chain_t * 2150 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2151 { 2152 hammer2_chain_t *parent; 2153 2154 /* 2155 * Be careful of order, chain must be unlocked before parent 2156 * is locked below to avoid a deadlock. Try it trivially first. 2157 */ 2158 parent = chain->parent; 2159 if (parent == NULL) 2160 panic("hammer2_chain_getparent: no parent"); 2161 hammer2_chain_ref(parent); 2162 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2163 return parent; 2164 2165 for (;;) { 2166 hammer2_chain_unlock(chain); 2167 hammer2_chain_lock(parent, flags); 2168 hammer2_chain_lock(chain, flags); 2169 2170 /* 2171 * Parent relinking races are quite common. We have to get 2172 * it right or we will blow up the block table. 2173 */ 2174 if (chain->parent == parent) 2175 break; 2176 hammer2_chain_unlock(parent); 2177 hammer2_chain_drop(parent); 2178 cpu_ccfence(); 2179 parent = chain->parent; 2180 if (parent == NULL) 2181 panic("hammer2_chain_getparent: no parent"); 2182 hammer2_chain_ref(parent); 2183 } 2184 return parent; 2185 } 2186 2187 /* 2188 * Take the locked chain and return a locked parent. The chain is unlocked 2189 * and dropped. *chainp is set to the returned parent as a convenience. 2190 * Pass HAMMER2_RESOLVE_* flags in flags. 2191 * 2192 * This will work even if the chain is errored, and the caller can check 2193 * parent->error on return if desired since the parent will be locked. 2194 * 2195 * The chain does NOT need to be stable. We use a tracking structure 2196 * to track the expected parent if the chain is deleted out from under us. 2197 * 2198 * This function handles the lock order reversal. 2199 */ 2200 hammer2_chain_t * 2201 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2202 { 2203 hammer2_chain_t *chain; 2204 hammer2_chain_t *parent; 2205 struct hammer2_reptrack reptrack; 2206 struct hammer2_reptrack **repp; 2207 2208 /* 2209 * Be careful of order, chain must be unlocked before parent 2210 * is locked below to avoid a deadlock. Try it trivially first. 2211 */ 2212 chain = *chainp; 2213 parent = chain->parent; 2214 if (parent == NULL) { 2215 hammer2_spin_unex(&chain->core.spin); 2216 panic("hammer2_chain_repparent: no parent"); 2217 } 2218 hammer2_chain_ref(parent); 2219 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2220 hammer2_chain_unlock(chain); 2221 hammer2_chain_drop(chain); 2222 *chainp = parent; 2223 2224 return parent; 2225 } 2226 2227 /* 2228 * Ok, now it gets a bit nasty. There are multiple situations where 2229 * the parent might be in the middle of a deletion, or where the child 2230 * (chain) might be deleted the instant we let go of its lock. 2231 * We can potentially end up in a no-win situation! 2232 * 2233 * In particular, the indirect_maintenance() case can cause these 2234 * situations. 2235 * 2236 * To deal with this we install a reptrack structure in the parent 2237 * This reptrack structure 'owns' the parent ref and will automatically 2238 * migrate to the parent's parent if the parent is deleted permanently. 2239 */ 2240 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2241 reptrack.chain = parent; 2242 hammer2_chain_ref(parent); /* for the reptrack */ 2243 2244 hammer2_spin_ex(&parent->core.spin); 2245 reptrack.next = parent->core.reptrack; 2246 parent->core.reptrack = &reptrack; 2247 hammer2_spin_unex(&parent->core.spin); 2248 2249 hammer2_chain_unlock(chain); 2250 hammer2_chain_drop(chain); 2251 chain = NULL; /* gone */ 2252 2253 /* 2254 * At the top of this loop, chain is gone and parent is refd both 2255 * by us explicitly AND via our reptrack. We are attempting to 2256 * lock parent. 2257 */ 2258 for (;;) { 2259 hammer2_chain_lock(parent, flags); 2260 2261 if (reptrack.chain == parent) 2262 break; 2263 hammer2_chain_unlock(parent); 2264 hammer2_chain_drop(parent); 2265 2266 kprintf("hammer2: debug REPTRACK %p->%p\n", 2267 parent, reptrack.chain); 2268 hammer2_spin_ex(&reptrack.spin); 2269 parent = reptrack.chain; 2270 hammer2_chain_ref(parent); 2271 hammer2_spin_unex(&reptrack.spin); 2272 } 2273 2274 /* 2275 * Once parent is locked and matches our reptrack, our reptrack 2276 * will be stable and we have our parent. We can unlink our 2277 * reptrack. 2278 * 2279 * WARNING! Remember that the chain lock might be shared. Chains 2280 * locked shared have stable parent linkages. 2281 */ 2282 hammer2_spin_ex(&parent->core.spin); 2283 repp = &parent->core.reptrack; 2284 while (*repp != &reptrack) 2285 repp = &(*repp)->next; 2286 *repp = reptrack.next; 2287 hammer2_spin_unex(&parent->core.spin); 2288 2289 hammer2_chain_drop(parent); /* reptrack ref */ 2290 *chainp = parent; /* return parent lock+ref */ 2291 2292 return parent; 2293 } 2294 2295 /* 2296 * Dispose of any linked reptrack structures in (chain) by shifting them to 2297 * (parent). Both (chain) and (parent) must be exclusively locked. 2298 * 2299 * This is interlocked against any children of (chain) on the other side. 2300 * No children so remain as-of when this is called so we can test 2301 * core.reptrack without holding the spin-lock. 2302 * 2303 * Used whenever the caller intends to permanently delete chains related 2304 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2305 * where the chains underneath the node being deleted are given a new parent 2306 * above the node being deleted. 2307 */ 2308 static 2309 void 2310 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2311 { 2312 struct hammer2_reptrack *reptrack; 2313 2314 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2315 while (chain->core.reptrack) { 2316 hammer2_spin_ex(&parent->core.spin); 2317 hammer2_spin_ex(&chain->core.spin); 2318 reptrack = chain->core.reptrack; 2319 if (reptrack == NULL) { 2320 hammer2_spin_unex(&chain->core.spin); 2321 hammer2_spin_unex(&parent->core.spin); 2322 break; 2323 } 2324 hammer2_spin_ex(&reptrack->spin); 2325 chain->core.reptrack = reptrack->next; 2326 reptrack->chain = parent; 2327 reptrack->next = parent->core.reptrack; 2328 parent->core.reptrack = reptrack; 2329 hammer2_chain_ref(parent); /* reptrack */ 2330 2331 hammer2_spin_unex(&chain->core.spin); 2332 hammer2_spin_unex(&parent->core.spin); 2333 kprintf("hammer2: debug repchange %p %p->%p\n", 2334 reptrack, chain, parent); 2335 hammer2_chain_drop(chain); /* reptrack */ 2336 } 2337 } 2338 2339 /* 2340 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2341 * (*parentp) typically points to an inode but can also point to a related 2342 * indirect block and this function will recurse upwards and find the inode 2343 * or the nearest undeleted indirect block covering the key range. 2344 * 2345 * This function unconditionally sets *errorp, replacing any previous value. 2346 * 2347 * (*parentp) must be exclusive or shared locked (depending on flags) and 2348 * referenced and can be an inode or an existing indirect block within the 2349 * inode. 2350 * 2351 * If (*parent) is errored out, this function will not attempt to recurse 2352 * the radix tree and will return NULL along with an appropriate *errorp. 2353 * If NULL is returned and *errorp is 0, the requested lookup could not be 2354 * located. 2355 * 2356 * On return (*parentp) will be modified to point at the deepest parent chain 2357 * element encountered during the search, as a helper for an insertion or 2358 * deletion. 2359 * 2360 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2361 * and referenced, and the old will be unlocked and dereferenced (no change 2362 * if they are both the same). This is particularly important if the caller 2363 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2364 * is returned, as long as no error occurred. 2365 * 2366 * The matching chain will be returned locked according to flags. 2367 * 2368 * -- 2369 * 2370 * NULL is returned if no match was found, but (*parentp) will still 2371 * potentially be adjusted. 2372 * 2373 * On return (*key_nextp) will point to an iterative value for key_beg. 2374 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2375 * 2376 * This function will also recurse up the chain if the key is not within the 2377 * current parent's range. (*parentp) can never be set to NULL. An iteration 2378 * can simply allow (*parentp) to float inside the loop. 2379 * 2380 * NOTE! chain->data is not always resolved. By default it will not be 2381 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2382 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2383 * BREF_TYPE_DATA as the device buffer can alias the logical file 2384 * buffer). 2385 */ 2386 hammer2_chain_t * 2387 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2388 hammer2_key_t key_beg, hammer2_key_t key_end, 2389 int *errorp, int flags) 2390 { 2391 hammer2_chain_t *parent; 2392 hammer2_chain_t *chain; 2393 hammer2_blockref_t *base; 2394 hammer2_blockref_t *bref; 2395 hammer2_blockref_t bsave; 2396 hammer2_key_t scan_beg; 2397 hammer2_key_t scan_end; 2398 int count = 0; 2399 int how_always = HAMMER2_RESOLVE_ALWAYS; 2400 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2401 int how; 2402 int generation; 2403 int maxloops = 300000; 2404 2405 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2406 how_maybe = how_always; 2407 how = HAMMER2_RESOLVE_ALWAYS; 2408 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2409 how = HAMMER2_RESOLVE_NEVER; 2410 } else { 2411 how = HAMMER2_RESOLVE_MAYBE; 2412 } 2413 if (flags & HAMMER2_LOOKUP_SHARED) { 2414 how_maybe |= HAMMER2_RESOLVE_SHARED; 2415 how_always |= HAMMER2_RESOLVE_SHARED; 2416 how |= HAMMER2_RESOLVE_SHARED; 2417 } 2418 2419 /* 2420 * Recurse (*parentp) upward if necessary until the parent completely 2421 * encloses the key range or we hit the inode. 2422 * 2423 * Handle races against the flusher deleting indirect nodes on its 2424 * way back up by continuing to recurse upward past the deletion. 2425 */ 2426 parent = *parentp; 2427 *errorp = 0; 2428 2429 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2430 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2431 scan_beg = parent->bref.key; 2432 scan_end = scan_beg + 2433 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2434 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2435 if (key_beg >= scan_beg && key_end <= scan_end) 2436 break; 2437 } 2438 parent = hammer2_chain_repparent(parentp, how_maybe); 2439 } 2440 again: 2441 if (--maxloops == 0) 2442 panic("hammer2_chain_lookup: maxloops"); 2443 2444 /* 2445 * MATCHIND case that does not require parent->data (do prior to 2446 * parent->error check). 2447 */ 2448 switch(parent->bref.type) { 2449 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2450 case HAMMER2_BREF_TYPE_INDIRECT: 2451 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2452 scan_beg = parent->bref.key; 2453 scan_end = scan_beg + 2454 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2455 if (key_beg == scan_beg && key_end == scan_end) { 2456 chain = parent; 2457 hammer2_chain_ref(chain); 2458 hammer2_chain_lock(chain, how_maybe); 2459 *key_nextp = scan_end + 1; 2460 goto done; 2461 } 2462 } 2463 break; 2464 default: 2465 break; 2466 } 2467 2468 /* 2469 * No lookup is possible if the parent is errored. We delayed 2470 * this check as long as we could to ensure that the parent backup, 2471 * embedded data, and MATCHIND code could still execute. 2472 */ 2473 if (parent->error) { 2474 *errorp = parent->error; 2475 return NULL; 2476 } 2477 2478 /* 2479 * Locate the blockref array. Currently we do a fully associative 2480 * search through the array. 2481 */ 2482 switch(parent->bref.type) { 2483 case HAMMER2_BREF_TYPE_INODE: 2484 /* 2485 * Special shortcut for embedded data returns the inode 2486 * itself. Callers must detect this condition and access 2487 * the embedded data (the strategy code does this for us). 2488 * 2489 * This is only applicable to regular files and softlinks. 2490 * 2491 * We need a second lock on parent. Since we already have 2492 * a lock we must pass LOCKAGAIN to prevent unexpected 2493 * blocking (we don't want to block on a second shared 2494 * ref if an exclusive lock is pending) 2495 */ 2496 if (parent->data->ipdata.meta.op_flags & 2497 HAMMER2_OPFLAG_DIRECTDATA) { 2498 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2499 chain = NULL; 2500 *key_nextp = key_end + 1; 2501 goto done; 2502 } 2503 hammer2_chain_ref(parent); 2504 hammer2_chain_lock(parent, how_always | 2505 HAMMER2_RESOLVE_LOCKAGAIN); 2506 *key_nextp = key_end + 1; 2507 return (parent); 2508 } 2509 base = &parent->data->ipdata.u.blockset.blockref[0]; 2510 count = HAMMER2_SET_COUNT; 2511 break; 2512 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2513 case HAMMER2_BREF_TYPE_INDIRECT: 2514 /* 2515 * Optimize indirect blocks in the INITIAL state to avoid 2516 * I/O. 2517 * 2518 * Debugging: Enter permanent wait state instead of 2519 * panicing on unexpectedly NULL data for the moment. 2520 */ 2521 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2522 base = NULL; 2523 } else { 2524 if (parent->data == NULL) { 2525 kprintf("hammer2: unexpected NULL data " 2526 "on %p\n", parent); 2527 while (1) 2528 tsleep(parent, 0, "xxx", 0); 2529 } 2530 base = &parent->data->npdata[0]; 2531 } 2532 count = parent->bytes / sizeof(hammer2_blockref_t); 2533 break; 2534 case HAMMER2_BREF_TYPE_VOLUME: 2535 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2536 count = HAMMER2_SET_COUNT; 2537 break; 2538 case HAMMER2_BREF_TYPE_FREEMAP: 2539 base = &parent->data->blkset.blockref[0]; 2540 count = HAMMER2_SET_COUNT; 2541 break; 2542 default: 2543 panic("hammer2_chain_lookup: unrecognized " 2544 "blockref(B) type: %d", 2545 parent->bref.type); 2546 base = NULL; /* safety */ 2547 count = 0; /* safety */ 2548 break; 2549 } 2550 2551 /* 2552 * Merged scan to find next candidate. 2553 * 2554 * hammer2_base_*() functions require the parent->core.live_* fields 2555 * to be synchronized. 2556 * 2557 * We need to hold the spinlock to access the block array and RB tree 2558 * and to interlock chain creation. 2559 */ 2560 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2561 hammer2_chain_countbrefs(parent, base, count); 2562 2563 /* 2564 * Combined search 2565 */ 2566 hammer2_spin_ex(&parent->core.spin); 2567 chain = hammer2_combined_find(parent, base, count, 2568 key_nextp, 2569 key_beg, key_end, 2570 &bref); 2571 generation = parent->core.generation; 2572 2573 /* 2574 * Exhausted parent chain, iterate. 2575 */ 2576 if (bref == NULL) { 2577 KKASSERT(chain == NULL); 2578 hammer2_spin_unex(&parent->core.spin); 2579 if (key_beg == key_end) /* short cut single-key case */ 2580 return (NULL); 2581 2582 /* 2583 * Stop if we reached the end of the iteration. 2584 */ 2585 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2586 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2587 return (NULL); 2588 } 2589 2590 /* 2591 * Calculate next key, stop if we reached the end of the 2592 * iteration, otherwise go up one level and loop. 2593 */ 2594 key_beg = parent->bref.key + 2595 ((hammer2_key_t)1 << parent->bref.keybits); 2596 if (key_beg == 0 || key_beg > key_end) 2597 return (NULL); 2598 parent = hammer2_chain_repparent(parentp, how_maybe); 2599 goto again; 2600 } 2601 2602 /* 2603 * Selected from blockref or in-memory chain. 2604 */ 2605 bsave = *bref; 2606 if (chain == NULL) { 2607 hammer2_spin_unex(&parent->core.spin); 2608 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2609 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2610 chain = hammer2_chain_get(parent, generation, 2611 &bsave, how_maybe); 2612 } else { 2613 chain = hammer2_chain_get(parent, generation, 2614 &bsave, how); 2615 } 2616 if (chain == NULL) 2617 goto again; 2618 } else { 2619 hammer2_chain_ref(chain); 2620 hammer2_spin_unex(&parent->core.spin); 2621 2622 /* 2623 * chain is referenced but not locked. We must lock the 2624 * chain to obtain definitive state. 2625 */ 2626 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2627 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2628 hammer2_chain_lock(chain, how_maybe); 2629 } else { 2630 hammer2_chain_lock(chain, how); 2631 } 2632 KKASSERT(chain->parent == parent); 2633 } 2634 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2635 chain->parent != parent) { 2636 hammer2_chain_unlock(chain); 2637 hammer2_chain_drop(chain); 2638 chain = NULL; /* SAFETY */ 2639 goto again; 2640 } 2641 2642 2643 /* 2644 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2645 * 2646 * NOTE: Chain's key range is not relevant as there might be 2647 * one-offs within the range that are not deleted. 2648 * 2649 * NOTE: Lookups can race delete-duplicate because 2650 * delete-duplicate does not lock the parent's core 2651 * (they just use the spinlock on the core). 2652 */ 2653 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2654 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2655 chain->bref.data_off, chain->bref.type, 2656 chain->bref.key); 2657 hammer2_chain_unlock(chain); 2658 hammer2_chain_drop(chain); 2659 chain = NULL; /* SAFETY */ 2660 key_beg = *key_nextp; 2661 if (key_beg == 0 || key_beg > key_end) 2662 return(NULL); 2663 goto again; 2664 } 2665 2666 /* 2667 * If the chain element is an indirect block it becomes the new 2668 * parent and we loop on it. We must maintain our top-down locks 2669 * to prevent the flusher from interfering (i.e. doing a 2670 * delete-duplicate and leaving us recursing down a deleted chain). 2671 * 2672 * The parent always has to be locked with at least RESOLVE_MAYBE 2673 * so we can access its data. It might need a fixup if the caller 2674 * passed incompatible flags. Be careful not to cause a deadlock 2675 * as a data-load requires an exclusive lock. 2676 * 2677 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2678 * range is within the requested key range we return the indirect 2679 * block and do NOT loop. This is usually only used to acquire 2680 * freemap nodes. 2681 */ 2682 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2683 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2684 hammer2_chain_unlock(parent); 2685 hammer2_chain_drop(parent); 2686 *parentp = parent = chain; 2687 chain = NULL; /* SAFETY */ 2688 goto again; 2689 } 2690 done: 2691 /* 2692 * All done, return the locked chain. 2693 * 2694 * If the caller does not want a locked chain, replace the lock with 2695 * a ref. Perhaps this can eventually be optimized to not obtain the 2696 * lock in the first place for situations where the data does not 2697 * need to be resolved. 2698 * 2699 * NOTE! A chain->error must be tested by the caller upon return. 2700 * *errorp is only set based on issues which occur while 2701 * trying to reach the chain. 2702 */ 2703 return (chain); 2704 } 2705 2706 /* 2707 * After having issued a lookup we can iterate all matching keys. 2708 * 2709 * If chain is non-NULL we continue the iteration from just after it's index. 2710 * 2711 * If chain is NULL we assume the parent was exhausted and continue the 2712 * iteration at the next parent. 2713 * 2714 * If a fatal error occurs (typically an I/O error), a dummy chain is 2715 * returned with chain->error and error-identifying information set. This 2716 * chain will assert if you try to do anything fancy with it. 2717 * 2718 * XXX Depending on where the error occurs we should allow continued iteration. 2719 * 2720 * parent must be locked on entry and remains locked throughout. chain's 2721 * lock status must match flags. Chain is always at least referenced. 2722 * 2723 * WARNING! The MATCHIND flag does not apply to this function. 2724 */ 2725 hammer2_chain_t * 2726 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2727 hammer2_key_t *key_nextp, 2728 hammer2_key_t key_beg, hammer2_key_t key_end, 2729 int *errorp, int flags) 2730 { 2731 hammer2_chain_t *parent; 2732 int how_maybe; 2733 2734 /* 2735 * Calculate locking flags for upward recursion. 2736 */ 2737 how_maybe = HAMMER2_RESOLVE_MAYBE; 2738 if (flags & HAMMER2_LOOKUP_SHARED) 2739 how_maybe |= HAMMER2_RESOLVE_SHARED; 2740 2741 parent = *parentp; 2742 *errorp = 0; 2743 2744 /* 2745 * Calculate the next index and recalculate the parent if necessary. 2746 */ 2747 if (chain) { 2748 key_beg = chain->bref.key + 2749 ((hammer2_key_t)1 << chain->bref.keybits); 2750 hammer2_chain_unlock(chain); 2751 hammer2_chain_drop(chain); 2752 2753 /* 2754 * chain invalid past this point, but we can still do a 2755 * pointer comparison w/parent. 2756 * 2757 * Any scan where the lookup returned degenerate data embedded 2758 * in the inode has an invalid index and must terminate. 2759 */ 2760 if (chain == parent) 2761 return(NULL); 2762 if (key_beg == 0 || key_beg > key_end) 2763 return(NULL); 2764 chain = NULL; 2765 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2766 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2767 /* 2768 * We reached the end of the iteration. 2769 */ 2770 return (NULL); 2771 } else { 2772 /* 2773 * Continue iteration with next parent unless the current 2774 * parent covers the range. 2775 * 2776 * (This also handles the case of a deleted, empty indirect 2777 * node). 2778 */ 2779 key_beg = parent->bref.key + 2780 ((hammer2_key_t)1 << parent->bref.keybits); 2781 if (key_beg == 0 || key_beg > key_end) 2782 return (NULL); 2783 parent = hammer2_chain_repparent(parentp, how_maybe); 2784 } 2785 2786 /* 2787 * And execute 2788 */ 2789 return (hammer2_chain_lookup(parentp, key_nextp, 2790 key_beg, key_end, 2791 errorp, flags)); 2792 } 2793 2794 /* 2795 * Caller wishes to iterate chains under parent, loading new chains into 2796 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2797 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2798 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2799 * with the returned chain for the scan. The returned *chainp will be 2800 * locked and referenced. Any prior contents will be unlocked and dropped. 2801 * 2802 * Caller should check the return value. A normal scan EOF will return 2803 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2804 * error trying to access parent data. Any error in the returned chain 2805 * must be tested separately by the caller. 2806 * 2807 * (*chainp) is dropped on each scan, but will only be set if the returned 2808 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 2809 * returned via *chainp. The caller will get their bref only. 2810 * 2811 * The raw scan function is similar to lookup/next but does not seek to a key. 2812 * Blockrefs are iterated via first_bref = (parent, NULL) and 2813 * next_chain = (parent, bref). 2814 * 2815 * The passed-in parent must be locked and its data resolved. The function 2816 * nominally returns a locked and referenced *chainp != NULL for chains 2817 * the caller might need to recurse on (and will dipose of any *chainp passed 2818 * in). The caller must check the chain->bref.type either way. 2819 */ 2820 int 2821 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2822 hammer2_blockref_t *bref, int *firstp, 2823 int flags) 2824 { 2825 hammer2_blockref_t *base; 2826 hammer2_blockref_t *bref_ptr; 2827 hammer2_key_t key; 2828 hammer2_key_t next_key; 2829 hammer2_chain_t *chain = NULL; 2830 int count = 0; 2831 int how; 2832 int generation; 2833 int maxloops = 300000; 2834 int error; 2835 2836 error = 0; 2837 2838 /* 2839 * Scan flags borrowed from lookup. 2840 */ 2841 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2842 how = HAMMER2_RESOLVE_ALWAYS; 2843 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2844 how = HAMMER2_RESOLVE_NEVER; 2845 } else { 2846 how = HAMMER2_RESOLVE_MAYBE; 2847 } 2848 if (flags & HAMMER2_LOOKUP_SHARED) { 2849 how |= HAMMER2_RESOLVE_SHARED; 2850 } 2851 2852 /* 2853 * Calculate key to locate first/next element, unlocking the previous 2854 * element as we go. Be careful, the key calculation can overflow. 2855 * 2856 * (also reset bref to NULL) 2857 */ 2858 if (*firstp) { 2859 key = 0; 2860 *firstp = 0; 2861 } else { 2862 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2863 if ((chain = *chainp) != NULL) { 2864 *chainp = NULL; 2865 hammer2_chain_unlock(chain); 2866 hammer2_chain_drop(chain); 2867 chain = NULL; 2868 } 2869 if (key == 0) { 2870 error |= HAMMER2_ERROR_EOF; 2871 goto done; 2872 } 2873 } 2874 2875 again: 2876 if (parent->error) { 2877 error = parent->error; 2878 goto done; 2879 } 2880 if (--maxloops == 0) 2881 panic("hammer2_chain_scan: maxloops"); 2882 2883 /* 2884 * Locate the blockref array. Currently we do a fully associative 2885 * search through the array. 2886 */ 2887 switch(parent->bref.type) { 2888 case HAMMER2_BREF_TYPE_INODE: 2889 /* 2890 * An inode with embedded data has no sub-chains. 2891 * 2892 * WARNING! Bulk scan code may pass a static chain marked 2893 * as BREF_TYPE_INODE with a copy of the volume 2894 * root blockset to snapshot the volume. 2895 */ 2896 if (parent->data->ipdata.meta.op_flags & 2897 HAMMER2_OPFLAG_DIRECTDATA) { 2898 error |= HAMMER2_ERROR_EOF; 2899 goto done; 2900 } 2901 base = &parent->data->ipdata.u.blockset.blockref[0]; 2902 count = HAMMER2_SET_COUNT; 2903 break; 2904 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2905 case HAMMER2_BREF_TYPE_INDIRECT: 2906 /* 2907 * Optimize indirect blocks in the INITIAL state to avoid 2908 * I/O. 2909 */ 2910 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2911 base = NULL; 2912 } else { 2913 if (parent->data == NULL) 2914 panic("parent->data is NULL"); 2915 base = &parent->data->npdata[0]; 2916 } 2917 count = parent->bytes / sizeof(hammer2_blockref_t); 2918 break; 2919 case HAMMER2_BREF_TYPE_VOLUME: 2920 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2921 count = HAMMER2_SET_COUNT; 2922 break; 2923 case HAMMER2_BREF_TYPE_FREEMAP: 2924 base = &parent->data->blkset.blockref[0]; 2925 count = HAMMER2_SET_COUNT; 2926 break; 2927 default: 2928 panic("hammer2_chain_scan: unrecognized blockref type: %d", 2929 parent->bref.type); 2930 base = NULL; /* safety */ 2931 count = 0; /* safety */ 2932 break; 2933 } 2934 2935 /* 2936 * Merged scan to find next candidate. 2937 * 2938 * hammer2_base_*() functions require the parent->core.live_* fields 2939 * to be synchronized. 2940 * 2941 * We need to hold the spinlock to access the block array and RB tree 2942 * and to interlock chain creation. 2943 */ 2944 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2945 hammer2_chain_countbrefs(parent, base, count); 2946 2947 next_key = 0; 2948 bref_ptr = NULL; 2949 hammer2_spin_ex(&parent->core.spin); 2950 chain = hammer2_combined_find(parent, base, count, 2951 &next_key, 2952 key, HAMMER2_KEY_MAX, 2953 &bref_ptr); 2954 generation = parent->core.generation; 2955 2956 /* 2957 * Exhausted parent chain, we're done. 2958 */ 2959 if (bref_ptr == NULL) { 2960 hammer2_spin_unex(&parent->core.spin); 2961 KKASSERT(chain == NULL); 2962 error |= HAMMER2_ERROR_EOF; 2963 goto done; 2964 } 2965 2966 /* 2967 * Copy into the supplied stack-based blockref. 2968 */ 2969 *bref = *bref_ptr; 2970 2971 /* 2972 * Selected from blockref or in-memory chain. 2973 */ 2974 if (chain == NULL) { 2975 switch(bref->type) { 2976 case HAMMER2_BREF_TYPE_INODE: 2977 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2978 case HAMMER2_BREF_TYPE_INDIRECT: 2979 case HAMMER2_BREF_TYPE_VOLUME: 2980 case HAMMER2_BREF_TYPE_FREEMAP: 2981 /* 2982 * Recursion, always get the chain 2983 */ 2984 hammer2_spin_unex(&parent->core.spin); 2985 chain = hammer2_chain_get(parent, generation, 2986 bref, how); 2987 if (chain == NULL) 2988 goto again; 2989 break; 2990 default: 2991 /* 2992 * No recursion, do not waste time instantiating 2993 * a chain, just iterate using the bref. 2994 */ 2995 hammer2_spin_unex(&parent->core.spin); 2996 break; 2997 } 2998 } else { 2999 /* 3000 * Recursion or not we need the chain in order to supply 3001 * the bref. 3002 */ 3003 hammer2_chain_ref(chain); 3004 hammer2_spin_unex(&parent->core.spin); 3005 hammer2_chain_lock(chain, how); 3006 } 3007 if (chain && 3008 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3009 chain->parent != parent)) { 3010 hammer2_chain_unlock(chain); 3011 hammer2_chain_drop(chain); 3012 chain = NULL; 3013 goto again; 3014 } 3015 3016 /* 3017 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3018 * 3019 * NOTE: chain's key range is not relevant as there might be 3020 * one-offs within the range that are not deleted. 3021 * 3022 * NOTE: XXX this could create problems with scans used in 3023 * situations other than mount-time recovery. 3024 * 3025 * NOTE: Lookups can race delete-duplicate because 3026 * delete-duplicate does not lock the parent's core 3027 * (they just use the spinlock on the core). 3028 */ 3029 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3030 hammer2_chain_unlock(chain); 3031 hammer2_chain_drop(chain); 3032 chain = NULL; 3033 3034 key = next_key; 3035 if (key == 0) { 3036 error |= HAMMER2_ERROR_EOF; 3037 goto done; 3038 } 3039 goto again; 3040 } 3041 3042 done: 3043 /* 3044 * All done, return the bref or NULL, supply chain if necessary. 3045 */ 3046 if (chain) 3047 *chainp = chain; 3048 return (error); 3049 } 3050 3051 /* 3052 * Create and return a new hammer2 system memory structure of the specified 3053 * key, type and size and insert it under (*parentp). This is a full 3054 * insertion, based on the supplied key/keybits, and may involve creating 3055 * indirect blocks and moving other chains around via delete/duplicate. 3056 * 3057 * This call can be made with parent == NULL as long as a non -1 methods 3058 * is supplied. hmp must also be supplied in this situation (otherwise 3059 * hmp is extracted from the supplied parent). The chain will be detached 3060 * from the topology. A later call with both parent and chain can be made 3061 * to attach it. 3062 * 3063 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3064 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3065 * FULL. This typically means that the caller is creating the chain after 3066 * doing a hammer2_chain_lookup(). 3067 * 3068 * (*parentp) must be exclusive locked and may be replaced on return 3069 * depending on how much work the function had to do. 3070 * 3071 * (*parentp) must not be errored or this function will assert. 3072 * 3073 * (*chainp) usually starts out NULL and returns the newly created chain, 3074 * but if the caller desires the caller may allocate a disconnected chain 3075 * and pass it in instead. 3076 * 3077 * This function should NOT be used to insert INDIRECT blocks. It is 3078 * typically used to create/insert inodes and data blocks. 3079 * 3080 * Caller must pass-in an exclusively locked parent the new chain is to 3081 * be inserted under, and optionally pass-in a disconnected, exclusively 3082 * locked chain to insert (else we create a new chain). The function will 3083 * adjust (*parentp) as necessary, create or connect the chain, and 3084 * return an exclusively locked chain in *chainp. 3085 * 3086 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3087 * and will be reassigned. 3088 * 3089 * NOTE: returns HAMMER_ERROR_* flags 3090 */ 3091 int 3092 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3093 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3094 hammer2_key_t key, int keybits, int type, size_t bytes, 3095 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3096 { 3097 hammer2_chain_t *chain; 3098 hammer2_chain_t *parent; 3099 hammer2_blockref_t *base; 3100 hammer2_blockref_t dummy; 3101 int allocated = 0; 3102 int error = 0; 3103 int count; 3104 int maxloops = 300000; 3105 3106 /* 3107 * Topology may be crossing a PFS boundary. 3108 */ 3109 parent = *parentp; 3110 if (parent) { 3111 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3112 KKASSERT(parent->error == 0); 3113 hmp = parent->hmp; 3114 } 3115 chain = *chainp; 3116 3117 if (chain == NULL) { 3118 /* 3119 * First allocate media space and construct the dummy bref, 3120 * then allocate the in-memory chain structure. Set the 3121 * INITIAL flag for fresh chains which do not have embedded 3122 * data. 3123 */ 3124 bzero(&dummy, sizeof(dummy)); 3125 dummy.type = type; 3126 dummy.key = key; 3127 dummy.keybits = keybits; 3128 dummy.data_off = hammer2_getradix(bytes); 3129 3130 /* 3131 * Inherit methods from parent by default. Primarily used 3132 * for BREF_TYPE_DATA. Non-data types *must* be set to 3133 * a non-NONE check algorithm. 3134 */ 3135 if (methods == HAMMER2_METH_DEFAULT) 3136 dummy.methods = parent->bref.methods; 3137 else 3138 dummy.methods = (uint8_t)methods; 3139 3140 if (type != HAMMER2_BREF_TYPE_DATA && 3141 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3142 dummy.methods |= 3143 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3144 } 3145 3146 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3147 3148 /* 3149 * Lock the chain manually, chain_lock will load the chain 3150 * which we do NOT want to do. (note: chain->refs is set 3151 * to 1 by chain_alloc() for us, but lockcnt is not). 3152 */ 3153 chain->lockcnt = 1; 3154 hammer2_mtx_ex(&chain->lock); 3155 allocated = 1; 3156 3157 /* 3158 * Set INITIAL to optimize I/O. The flag will generally be 3159 * processed when we call hammer2_chain_modify(). 3160 */ 3161 switch(type) { 3162 case HAMMER2_BREF_TYPE_VOLUME: 3163 case HAMMER2_BREF_TYPE_FREEMAP: 3164 panic("hammer2_chain_create: called with volume type"); 3165 break; 3166 case HAMMER2_BREF_TYPE_INDIRECT: 3167 panic("hammer2_chain_create: cannot be used to" 3168 "create indirect block"); 3169 break; 3170 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3171 panic("hammer2_chain_create: cannot be used to" 3172 "create freemap root or node"); 3173 break; 3174 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3175 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3176 /* fall through */ 3177 case HAMMER2_BREF_TYPE_DIRENT: 3178 case HAMMER2_BREF_TYPE_INODE: 3179 case HAMMER2_BREF_TYPE_DATA: 3180 default: 3181 /* 3182 * leave chain->data NULL, set INITIAL 3183 */ 3184 KKASSERT(chain->data == NULL); 3185 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3186 break; 3187 } 3188 } else { 3189 /* 3190 * We are reattaching a previously deleted chain, possibly 3191 * under a new parent and possibly with a new key/keybits. 3192 * The chain does not have to be in a modified state. The 3193 * UPDATE flag will be set later on in this routine. 3194 * 3195 * Do NOT mess with the current state of the INITIAL flag. 3196 */ 3197 chain->bref.key = key; 3198 chain->bref.keybits = keybits; 3199 if (chain->flags & HAMMER2_CHAIN_DELETED) 3200 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3201 KKASSERT(chain->parent == NULL); 3202 } 3203 3204 /* 3205 * Set the appropriate bref flag if requested. 3206 * 3207 * NOTE! Callers can call this function to move chains without 3208 * knowing about special flags, so don't clear bref flags 3209 * here! 3210 */ 3211 if (flags & HAMMER2_INSERT_PFSROOT) 3212 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3213 3214 if (parent == NULL) 3215 goto skip; 3216 3217 /* 3218 * Calculate how many entries we have in the blockref array and 3219 * determine if an indirect block is required when inserting into 3220 * the parent. 3221 */ 3222 again: 3223 if (--maxloops == 0) 3224 panic("hammer2_chain_create: maxloops"); 3225 3226 switch(parent->bref.type) { 3227 case HAMMER2_BREF_TYPE_INODE: 3228 if ((parent->data->ipdata.meta.op_flags & 3229 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3230 kprintf("hammer2: parent set for direct-data! " 3231 "pkey=%016jx ckey=%016jx\n", 3232 parent->bref.key, 3233 chain->bref.key); 3234 } 3235 KKASSERT((parent->data->ipdata.meta.op_flags & 3236 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3237 KKASSERT(parent->data != NULL); 3238 base = &parent->data->ipdata.u.blockset.blockref[0]; 3239 count = HAMMER2_SET_COUNT; 3240 break; 3241 case HAMMER2_BREF_TYPE_INDIRECT: 3242 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3243 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3244 base = NULL; 3245 else 3246 base = &parent->data->npdata[0]; 3247 count = parent->bytes / sizeof(hammer2_blockref_t); 3248 break; 3249 case HAMMER2_BREF_TYPE_VOLUME: 3250 KKASSERT(parent->data != NULL); 3251 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3252 count = HAMMER2_SET_COUNT; 3253 break; 3254 case HAMMER2_BREF_TYPE_FREEMAP: 3255 KKASSERT(parent->data != NULL); 3256 base = &parent->data->blkset.blockref[0]; 3257 count = HAMMER2_SET_COUNT; 3258 break; 3259 default: 3260 panic("hammer2_chain_create: unrecognized blockref type: %d", 3261 parent->bref.type); 3262 base = NULL; 3263 count = 0; 3264 break; 3265 } 3266 3267 /* 3268 * Make sure we've counted the brefs 3269 */ 3270 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3271 hammer2_chain_countbrefs(parent, base, count); 3272 3273 KASSERT(parent->core.live_count >= 0 && 3274 parent->core.live_count <= count, 3275 ("bad live_count %d/%d (%02x, %d)", 3276 parent->core.live_count, count, 3277 parent->bref.type, parent->bytes)); 3278 3279 /* 3280 * If no free blockref could be found we must create an indirect 3281 * block and move a number of blockrefs into it. With the parent 3282 * locked we can safely lock each child in order to delete+duplicate 3283 * it without causing a deadlock. 3284 * 3285 * This may return the new indirect block or the old parent depending 3286 * on where the key falls. NULL is returned on error. 3287 */ 3288 if (parent->core.live_count == count) { 3289 hammer2_chain_t *nparent; 3290 3291 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3292 3293 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3294 mtid, type, &error); 3295 if (nparent == NULL) { 3296 if (allocated) 3297 hammer2_chain_drop(chain); 3298 chain = NULL; 3299 goto done; 3300 } 3301 if (parent != nparent) { 3302 hammer2_chain_unlock(parent); 3303 hammer2_chain_drop(parent); 3304 parent = *parentp = nparent; 3305 } 3306 goto again; 3307 } 3308 3309 /* 3310 * fall through if parent, or skip to here if no parent. 3311 */ 3312 skip: 3313 if (chain->flags & HAMMER2_CHAIN_DELETED) 3314 kprintf("Inserting deleted chain @%016jx\n", 3315 chain->bref.key); 3316 3317 /* 3318 * Link the chain into its parent. 3319 */ 3320 if (chain->parent != NULL) 3321 panic("hammer2: hammer2_chain_create: chain already connected"); 3322 KKASSERT(chain->parent == NULL); 3323 if (parent) { 3324 KKASSERT(parent->core.live_count < count); 3325 hammer2_chain_insert(parent, chain, 3326 HAMMER2_CHAIN_INSERT_SPIN | 3327 HAMMER2_CHAIN_INSERT_LIVE, 3328 0); 3329 } 3330 3331 if (allocated) { 3332 /* 3333 * Mark the newly created chain modified. This will cause 3334 * UPDATE to be set and process the INITIAL flag. 3335 * 3336 * Device buffers are not instantiated for DATA elements 3337 * as these are handled by logical buffers. 3338 * 3339 * Indirect and freemap node indirect blocks are handled 3340 * by hammer2_chain_create_indirect() and not by this 3341 * function. 3342 * 3343 * Data for all other bref types is expected to be 3344 * instantiated (INODE, LEAF). 3345 */ 3346 switch(chain->bref.type) { 3347 case HAMMER2_BREF_TYPE_DATA: 3348 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3349 case HAMMER2_BREF_TYPE_DIRENT: 3350 case HAMMER2_BREF_TYPE_INODE: 3351 error = hammer2_chain_modify(chain, mtid, dedup_off, 3352 HAMMER2_MODIFY_OPTDATA); 3353 break; 3354 default: 3355 /* 3356 * Remaining types are not supported by this function. 3357 * In particular, INDIRECT and LEAF_NODE types are 3358 * handled by create_indirect(). 3359 */ 3360 panic("hammer2_chain_create: bad type: %d", 3361 chain->bref.type); 3362 /* NOT REACHED */ 3363 break; 3364 } 3365 } else { 3366 /* 3367 * When reconnecting a chain we must set UPDATE and 3368 * setflush so the flush recognizes that it must update 3369 * the bref in the parent. 3370 */ 3371 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3372 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3373 } 3374 3375 /* 3376 * We must setflush(parent) to ensure that it recurses through to 3377 * chain. setflush(chain) might not work because ONFLUSH is possibly 3378 * already set in the chain (so it won't recurse up to set it in the 3379 * parent). 3380 */ 3381 if (parent) 3382 hammer2_chain_setflush(parent); 3383 3384 done: 3385 *chainp = chain; 3386 3387 return (error); 3388 } 3389 3390 /* 3391 * Move the chain from its old parent to a new parent. The chain must have 3392 * already been deleted or already disconnected (or never associated) with 3393 * a parent. The chain is reassociated with the new parent and the deleted 3394 * flag will be cleared (no longer deleted). The chain's modification state 3395 * is not altered. 3396 * 3397 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3398 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3399 * FULL. This typically means that the caller is creating the chain after 3400 * doing a hammer2_chain_lookup(). 3401 * 3402 * Neither (parent) or (chain) can be errored. 3403 * 3404 * If (parent) is non-NULL then the chain is inserted under the parent. 3405 * 3406 * If (parent) is NULL then the newly duplicated chain is not inserted 3407 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3408 * passing into hammer2_chain_create() after this function returns). 3409 * 3410 * WARNING! This function calls create which means it can insert indirect 3411 * blocks. This can cause other unrelated chains in the parent to 3412 * be moved to a newly inserted indirect block in addition to the 3413 * specific chain. 3414 */ 3415 void 3416 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3417 hammer2_tid_t mtid, int flags) 3418 { 3419 hammer2_blockref_t *bref; 3420 hammer2_chain_t *parent; 3421 3422 /* 3423 * WARNING! We should never resolve DATA to device buffers 3424 * (XXX allow it if the caller did?), and since 3425 * we currently do not have the logical buffer cache 3426 * buffer in-hand to fix its cached physical offset 3427 * we also force the modify code to not COW it. XXX 3428 * 3429 * NOTE! We allow error'd chains to be renamed. The bref itself 3430 * is good and can be renamed. The content, however, may 3431 * be inaccessible. 3432 */ 3433 KKASSERT(chain->parent == NULL); 3434 /*KKASSERT(chain->error == 0); allow */ 3435 bref = &chain->bref; 3436 3437 /* 3438 * If parent is not NULL the duplicated chain will be entered under 3439 * the parent and the UPDATE bit set to tell flush to update 3440 * the blockref. 3441 * 3442 * We must setflush(parent) to ensure that it recurses through to 3443 * chain. setflush(chain) might not work because ONFLUSH is possibly 3444 * already set in the chain (so it won't recurse up to set it in the 3445 * parent). 3446 * 3447 * Having both chains locked is extremely important for atomicy. 3448 */ 3449 if (parentp && (parent = *parentp) != NULL) { 3450 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3451 KKASSERT(parent->refs > 0); 3452 KKASSERT(parent->error == 0); 3453 3454 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3455 HAMMER2_METH_DEFAULT, 3456 bref->key, bref->keybits, bref->type, 3457 chain->bytes, mtid, 0, flags); 3458 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3459 hammer2_chain_setflush(*parentp); 3460 } 3461 } 3462 3463 /* 3464 * This works in tandem with delete_obref() to install a blockref in 3465 * (typically) an indirect block that is associated with the chain being 3466 * moved to *parentp. 3467 * 3468 * The reason we need this function is that the caller needs to maintain 3469 * the blockref as it was, and not generate a new blockref for what might 3470 * be a modified chain. Otherwise stuff will leak into the flush that 3471 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3472 * 3473 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3474 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3475 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3476 * it does. Otherwise we can end up in a situation where H2 is unable to 3477 * clean up the in-memory chain topology. 3478 * 3479 * The reason for this is that flushes do not generally flush through 3480 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3481 * or sideq to properly flush and dispose of the related inode chain's flags. 3482 * Situations where the inode is not actually modified by the frontend, 3483 * but where we have to move the related chains around as we insert or cleanup 3484 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3485 * inode chain that does not have a hammer2_inode_t associated with it. 3486 */ 3487 static void 3488 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3489 hammer2_tid_t mtid, int flags, 3490 hammer2_blockref_t *obref) 3491 { 3492 hammer2_chain_rename(parentp, chain, mtid, flags); 3493 3494 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3495 hammer2_blockref_t *tbase; 3496 int tcount; 3497 3498 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3499 hammer2_chain_modify(*parentp, mtid, 0, 0); 3500 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3501 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3502 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3503 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3504 HAMMER2_CHAIN_UPDATE); 3505 } else { 3506 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3507 } 3508 } 3509 } 3510 3511 /* 3512 * Helper function for deleting chains. 3513 * 3514 * The chain is removed from the live view (the RBTREE) as well as the parent's 3515 * blockmap. Both chain and its parent must be locked. 3516 * 3517 * parent may not be errored. chain can be errored. 3518 */ 3519 static int 3520 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3521 hammer2_tid_t mtid, int flags, 3522 hammer2_blockref_t *obref) 3523 { 3524 int error = 0; 3525 3526 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3527 KKASSERT(chain->parent == parent); 3528 3529 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3530 /* 3531 * Chain is blockmapped, so there must be a parent. 3532 * Atomically remove the chain from the parent and remove 3533 * the blockmap entry. The parent must be set modified 3534 * to remove the blockmap entry. 3535 */ 3536 hammer2_blockref_t *base; 3537 int count; 3538 3539 KKASSERT(parent != NULL); 3540 KKASSERT(parent->error == 0); 3541 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3542 error = hammer2_chain_modify(parent, mtid, 0, 0); 3543 if (error) 3544 goto done; 3545 3546 /* 3547 * Calculate blockmap pointer 3548 */ 3549 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3550 hammer2_spin_ex(&chain->core.spin); 3551 hammer2_spin_ex(&parent->core.spin); 3552 3553 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3554 atomic_add_int(&parent->core.live_count, -1); 3555 ++parent->core.generation; 3556 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3557 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3558 --parent->core.chain_count; 3559 chain->parent = NULL; 3560 3561 switch(parent->bref.type) { 3562 case HAMMER2_BREF_TYPE_INODE: 3563 /* 3564 * Access the inode's block array. However, there 3565 * is no block array if the inode is flagged 3566 * DIRECTDATA. 3567 */ 3568 if (parent->data && 3569 (parent->data->ipdata.meta.op_flags & 3570 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3571 base = 3572 &parent->data->ipdata.u.blockset.blockref[0]; 3573 } else { 3574 base = NULL; 3575 } 3576 count = HAMMER2_SET_COUNT; 3577 break; 3578 case HAMMER2_BREF_TYPE_INDIRECT: 3579 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3580 if (parent->data) 3581 base = &parent->data->npdata[0]; 3582 else 3583 base = NULL; 3584 count = parent->bytes / sizeof(hammer2_blockref_t); 3585 break; 3586 case HAMMER2_BREF_TYPE_VOLUME: 3587 base = &parent->data->voldata. 3588 sroot_blockset.blockref[0]; 3589 count = HAMMER2_SET_COUNT; 3590 break; 3591 case HAMMER2_BREF_TYPE_FREEMAP: 3592 base = &parent->data->blkset.blockref[0]; 3593 count = HAMMER2_SET_COUNT; 3594 break; 3595 default: 3596 base = NULL; 3597 count = 0; 3598 panic("_hammer2_chain_delete_helper: " 3599 "unrecognized blockref type: %d", 3600 parent->bref.type); 3601 break; 3602 } 3603 3604 /* 3605 * delete blockmapped chain from its parent. 3606 */ 3607 if (base) { 3608 hammer2_base_delete(parent, base, count, chain, obref); 3609 } 3610 hammer2_spin_unex(&parent->core.spin); 3611 hammer2_spin_unex(&chain->core.spin); 3612 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3613 /* 3614 * Chain is not blockmapped but a parent is present. 3615 * Atomically remove the chain from the parent. There is 3616 * no blockmap entry to remove. 3617 */ 3618 hammer2_spin_ex(&chain->core.spin); 3619 hammer2_spin_ex(&parent->core.spin); 3620 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3621 atomic_add_int(&parent->core.live_count, -1); 3622 ++parent->core.generation; 3623 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3624 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3625 --parent->core.chain_count; 3626 chain->parent = NULL; 3627 hammer2_spin_unex(&parent->core.spin); 3628 hammer2_spin_unex(&chain->core.spin); 3629 } else { 3630 /* 3631 * Chain is not blockmapped and has no parent. This 3632 * is a degenerate case. 3633 */ 3634 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3635 } 3636 done: 3637 return error; 3638 } 3639 3640 /* 3641 * Create an indirect block that covers one or more of the elements in the 3642 * current parent. Either returns the existing parent with no locking or 3643 * ref changes or returns the new indirect block locked and referenced 3644 * and leaving the original parent lock/ref intact as well. 3645 * 3646 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3647 * 3648 * The returned chain depends on where the specified key falls. 3649 * 3650 * The key/keybits for the indirect mode only needs to follow three rules: 3651 * 3652 * (1) That all elements underneath it fit within its key space and 3653 * 3654 * (2) That all elements outside it are outside its key space. 3655 * 3656 * (3) When creating the new indirect block any elements in the current 3657 * parent that fit within the new indirect block's keyspace must be 3658 * moved into the new indirect block. 3659 * 3660 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3661 * keyspace the the current parent, but lookup/iteration rules will 3662 * ensure (and must ensure) that rule (2) for all parents leading up 3663 * to the nearest inode or the root volume header is adhered to. This 3664 * is accomplished by always recursing through matching keyspaces in 3665 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3666 * 3667 * The current implementation calculates the current worst-case keyspace by 3668 * iterating the current parent and then divides it into two halves, choosing 3669 * whichever half has the most elements (not necessarily the half containing 3670 * the requested key). 3671 * 3672 * We can also opt to use the half with the least number of elements. This 3673 * causes lower-numbered keys (aka logical file offsets) to recurse through 3674 * fewer indirect blocks and higher-numbered keys to recurse through more. 3675 * This also has the risk of not moving enough elements to the new indirect 3676 * block and being forced to create several indirect blocks before the element 3677 * can be inserted. 3678 * 3679 * Must be called with an exclusively locked parent. 3680 * 3681 * NOTE: *errorp set to HAMMER_ERROR_* flags 3682 */ 3683 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3684 hammer2_key_t *keyp, int keybits, 3685 hammer2_blockref_t *base, int count); 3686 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3687 hammer2_key_t *keyp, int keybits, 3688 hammer2_blockref_t *base, int count, 3689 int ncount); 3690 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3691 hammer2_key_t *keyp, int keybits, 3692 hammer2_blockref_t *base, int count, 3693 int ncount); 3694 static 3695 hammer2_chain_t * 3696 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3697 hammer2_key_t create_key, int create_bits, 3698 hammer2_tid_t mtid, int for_type, int *errorp) 3699 { 3700 hammer2_dev_t *hmp; 3701 hammer2_blockref_t *base; 3702 hammer2_blockref_t *bref; 3703 hammer2_blockref_t bsave; 3704 hammer2_blockref_t dummy; 3705 hammer2_chain_t *chain; 3706 hammer2_chain_t *ichain; 3707 hammer2_key_t key = create_key; 3708 hammer2_key_t key_beg; 3709 hammer2_key_t key_end; 3710 hammer2_key_t key_next; 3711 int keybits = create_bits; 3712 int count; 3713 int ncount; 3714 int nbytes; 3715 int loops; 3716 int error; 3717 int reason; 3718 int generation; 3719 int maxloops = 300000; 3720 3721 /* 3722 * Calculate the base blockref pointer or NULL if the chain 3723 * is known to be empty. We need to calculate the array count 3724 * for RB lookups either way. 3725 */ 3726 hmp = parent->hmp; 3727 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3728 3729 /* 3730 * Pre-modify the parent now to avoid having to deal with error 3731 * processing if we tried to later (in the middle of our loop). 3732 * 3733 * We are going to be moving bref's around, the indirect blocks 3734 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3735 */ 3736 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3737 if (*errorp) { 3738 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3739 *errorp, hammer2_error_str(*errorp)); 3740 return NULL; 3741 } 3742 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3743 3744 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3745 base = hammer2_chain_base_and_count(parent, &count); 3746 3747 /* 3748 * How big should our new indirect block be? It has to be at least 3749 * as large as its parent for splits to work properly. 3750 * 3751 * The freemap uses a specific indirect block size. The number of 3752 * levels are built dynamically and ultimately depend on the size 3753 * volume. Because freemap blocks are taken from the reserved areas 3754 * of the volume our goal is efficiency (fewer levels) and not so 3755 * much to save disk space. 3756 * 3757 * The first indirect block level for a directory usually uses 3758 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3759 * the hash mechanism, this typically gives us a nominal 3760 * 32 * 4 entries with one level of indirection. 3761 * 3762 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3763 * indirect blocks. The initial 4 entries in the inode gives us 3764 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3765 * of indirection gives us 137GB, and so forth. H2 can support 3766 * huge file sizes but they are not typical, so we try to stick 3767 * with compactness and do not use a larger indirect block size. 3768 * 3769 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3770 * due to the way indirect blocks are created this usually winds 3771 * up being extremely inefficient for small files. Even though 3772 * 16KB requires more levels of indirection for very large files, 3773 * the 16KB records can be ganged together into 64KB DIOs. 3774 */ 3775 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3776 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3777 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3778 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3779 if (parent->data->ipdata.meta.type == 3780 HAMMER2_OBJTYPE_DIRECTORY) 3781 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3782 else 3783 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3784 3785 } else { 3786 nbytes = HAMMER2_IND_BYTES_NOM; 3787 } 3788 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3789 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3790 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3791 nbytes = count * sizeof(hammer2_blockref_t); 3792 } 3793 ncount = nbytes / sizeof(hammer2_blockref_t); 3794 3795 /* 3796 * When creating an indirect block for a freemap node or leaf 3797 * the key/keybits must be fitted to static radix levels because 3798 * particular radix levels use particular reserved blocks in the 3799 * related zone. 3800 * 3801 * This routine calculates the key/radix of the indirect block 3802 * we need to create, and whether it is on the high-side or the 3803 * low-side. 3804 */ 3805 switch(for_type) { 3806 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3807 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3808 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3809 base, count); 3810 break; 3811 case HAMMER2_BREF_TYPE_DATA: 3812 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3813 base, count, ncount); 3814 break; 3815 case HAMMER2_BREF_TYPE_DIRENT: 3816 case HAMMER2_BREF_TYPE_INODE: 3817 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3818 base, count, ncount); 3819 break; 3820 default: 3821 panic("illegal indirect block for bref type %d", for_type); 3822 break; 3823 } 3824 3825 /* 3826 * Normalize the key for the radix being represented, keeping the 3827 * high bits and throwing away the low bits. 3828 */ 3829 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3830 3831 /* 3832 * Ok, create our new indirect block 3833 */ 3834 bzero(&dummy, sizeof(dummy)); 3835 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3836 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3837 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3838 } else { 3839 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 3840 } 3841 dummy.key = key; 3842 dummy.keybits = keybits; 3843 dummy.data_off = hammer2_getradix(nbytes); 3844 dummy.methods = 3845 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 3846 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 3847 3848 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 3849 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3850 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3851 /* ichain has one ref at this point */ 3852 3853 /* 3854 * We have to mark it modified to allocate its block, but use 3855 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3856 * it won't be acted upon by the flush code. 3857 * 3858 * XXX remove OPTDATA, we need a fully initialized indirect block to 3859 * be able to move the original blockref. 3860 */ 3861 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 3862 if (*errorp) { 3863 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3864 *errorp, hammer2_error_str(*errorp)); 3865 hammer2_chain_unlock(ichain); 3866 hammer2_chain_drop(ichain); 3867 return NULL; 3868 } 3869 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3870 3871 /* 3872 * Iterate the original parent and move the matching brefs into 3873 * the new indirect block. 3874 * 3875 * XXX handle flushes. 3876 */ 3877 key_beg = 0; 3878 key_end = HAMMER2_KEY_MAX; 3879 key_next = 0; /* avoid gcc warnings */ 3880 hammer2_spin_ex(&parent->core.spin); 3881 loops = 0; 3882 reason = 0; 3883 3884 for (;;) { 3885 /* 3886 * Parent may have been modified, relocating its block array. 3887 * Reload the base pointer. 3888 */ 3889 base = hammer2_chain_base_and_count(parent, &count); 3890 3891 if (++loops > 100000) { 3892 hammer2_spin_unex(&parent->core.spin); 3893 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3894 reason, parent, base, count, key_next); 3895 } 3896 3897 /* 3898 * NOTE: spinlock stays intact, returned chain (if not NULL) 3899 * is not referenced or locked which means that we 3900 * cannot safely check its flagged / deletion status 3901 * until we lock it. 3902 */ 3903 chain = hammer2_combined_find(parent, base, count, 3904 &key_next, 3905 key_beg, key_end, 3906 &bref); 3907 generation = parent->core.generation; 3908 if (bref == NULL) 3909 break; 3910 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3911 3912 /* 3913 * Skip keys that are not within the key/radix of the new 3914 * indirect block. They stay in the parent. 3915 */ 3916 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 3917 goto next_key_spinlocked; 3918 } 3919 3920 /* 3921 * Load the new indirect block by acquiring the related 3922 * chains (potentially from media as it might not be 3923 * in-memory). Then move it to the new parent (ichain). 3924 * 3925 * chain is referenced but not locked. We must lock the 3926 * chain to obtain definitive state. 3927 */ 3928 bsave = *bref; 3929 if (chain) { 3930 /* 3931 * Use chain already present in the RBTREE 3932 */ 3933 hammer2_chain_ref(chain); 3934 hammer2_spin_unex(&parent->core.spin); 3935 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3936 } else { 3937 /* 3938 * Get chain for blockref element. _get returns NULL 3939 * on insertion race. 3940 */ 3941 hammer2_spin_unex(&parent->core.spin); 3942 chain = hammer2_chain_get(parent, generation, &bsave, 3943 HAMMER2_RESOLVE_NEVER); 3944 if (chain == NULL) { 3945 reason = 1; 3946 hammer2_spin_ex(&parent->core.spin); 3947 continue; 3948 } 3949 } 3950 3951 /* 3952 * This is always live so if the chain has been deleted 3953 * we raced someone and we have to retry. 3954 * 3955 * NOTE: Lookups can race delete-duplicate because 3956 * delete-duplicate does not lock the parent's core 3957 * (they just use the spinlock on the core). 3958 * 3959 * (note reversed logic for this one) 3960 */ 3961 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 3962 chain->parent != parent || 3963 (chain->flags & HAMMER2_CHAIN_DELETED)) { 3964 hammer2_chain_unlock(chain); 3965 hammer2_chain_drop(chain); 3966 if (hammer2_debug & 0x0040) { 3967 kprintf("LOST PARENT RETRY " 3968 "RETRY (%p,%p)->%p %08x\n", 3969 parent, chain->parent, chain, chain->flags); 3970 } 3971 hammer2_spin_ex(&parent->core.spin); 3972 continue; 3973 } 3974 3975 /* 3976 * Shift the chain to the indirect block. 3977 * 3978 * WARNING! The (parent, chain) deletion may modify the parent 3979 * and invalidate the base pointer. 3980 * 3981 * WARNING! Parent must already be marked modified, so we 3982 * can assume that chain_delete always suceeds. 3983 * 3984 * WARNING! hammer2_chain_repchange() does not have to be 3985 * called (and doesn't work anyway because we are 3986 * only doing a partial shift). A recursion that is 3987 * in-progress can continue at the current parent 3988 * and will be able to properly find its next key. 3989 */ 3990 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 3991 &bsave); 3992 KKASSERT(error == 0); 3993 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 3994 hammer2_chain_unlock(chain); 3995 hammer2_chain_drop(chain); 3996 KKASSERT(parent->refs > 0); 3997 chain = NULL; 3998 base = NULL; /* safety */ 3999 hammer2_spin_ex(&parent->core.spin); 4000 next_key_spinlocked: 4001 if (--maxloops == 0) 4002 panic("hammer2_chain_create_indirect: maxloops"); 4003 reason = 4; 4004 if (key_next == 0 || key_next > key_end) 4005 break; 4006 key_beg = key_next; 4007 /* loop */ 4008 } 4009 hammer2_spin_unex(&parent->core.spin); 4010 4011 /* 4012 * Insert the new indirect block into the parent now that we've 4013 * cleared out some entries in the parent. We calculated a good 4014 * insertion index in the loop above (ichain->index). 4015 * 4016 * We don't have to set UPDATE here because we mark ichain 4017 * modified down below (so the normal modified -> flush -> set-moved 4018 * sequence applies). 4019 * 4020 * The insertion shouldn't race as this is a completely new block 4021 * and the parent is locked. 4022 */ 4023 base = NULL; /* safety, parent modify may change address */ 4024 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4025 KKASSERT(parent->core.live_count < count); 4026 hammer2_chain_insert(parent, ichain, 4027 HAMMER2_CHAIN_INSERT_SPIN | 4028 HAMMER2_CHAIN_INSERT_LIVE, 4029 0); 4030 4031 /* 4032 * Make sure flushes propogate after our manual insertion. 4033 */ 4034 hammer2_chain_setflush(ichain); 4035 hammer2_chain_setflush(parent); 4036 4037 /* 4038 * Figure out what to return. 4039 */ 4040 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits) != 0) { 4041 /* 4042 * Key being created is outside the key range, 4043 * return the original parent. 4044 */ 4045 hammer2_chain_unlock(ichain); 4046 hammer2_chain_drop(ichain); 4047 } else { 4048 /* 4049 * Otherwise its in the range, return the new parent. 4050 * (leave both the new and old parent locked). 4051 */ 4052 parent = ichain; 4053 } 4054 4055 return(parent); 4056 } 4057 4058 /* 4059 * Do maintenance on an indirect chain. Both parent and chain are locked. 4060 * 4061 * Returns non-zero if (chain) is deleted, either due to being empty or 4062 * because its children were safely moved into the parent. 4063 */ 4064 int 4065 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4066 hammer2_chain_t *chain) 4067 { 4068 hammer2_blockref_t *chain_base; 4069 hammer2_blockref_t *base; 4070 hammer2_blockref_t *bref; 4071 hammer2_blockref_t bsave; 4072 hammer2_key_t key_next; 4073 hammer2_key_t key_beg; 4074 hammer2_key_t key_end; 4075 hammer2_chain_t *sub; 4076 int chain_count; 4077 int count; 4078 int error; 4079 int generation; 4080 4081 /* 4082 * Make sure we have an accurate live_count 4083 */ 4084 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4085 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4086 base = &chain->data->npdata[0]; 4087 count = chain->bytes / sizeof(hammer2_blockref_t); 4088 hammer2_chain_countbrefs(chain, base, count); 4089 } 4090 4091 /* 4092 * If the indirect block is empty we can delete it. 4093 * (ignore deletion error) 4094 */ 4095 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4096 hammer2_chain_delete(parent, chain, 4097 chain->bref.modify_tid, 4098 HAMMER2_DELETE_PERMANENT); 4099 hammer2_chain_repchange(parent, chain); 4100 return 1; 4101 } 4102 4103 base = hammer2_chain_base_and_count(parent, &count); 4104 4105 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4106 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4107 hammer2_chain_countbrefs(parent, base, count); 4108 } 4109 4110 /* 4111 * Determine if we can collapse chain into parent, calculate 4112 * hysteresis for chain emptiness. 4113 */ 4114 if (parent->core.live_count + chain->core.live_count - 1 > count) 4115 return 0; 4116 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4117 if (chain->core.live_count > chain_count * 3 / 4) 4118 return 0; 4119 4120 /* 4121 * Ok, theoretically we can collapse chain's contents into 4122 * parent. chain is locked, but any in-memory children of chain 4123 * are not. For this to work, we must be able to dispose of any 4124 * in-memory children of chain. 4125 * 4126 * For now require that there are no in-memory children of chain. 4127 * 4128 * WARNING! Both chain and parent must remain locked across this 4129 * entire operation. 4130 */ 4131 4132 /* 4133 * Parent must be marked modified. Don't try to collapse it if we 4134 * can't mark it modified. Once modified, destroy chain to make room 4135 * and to get rid of what will be a conflicting key (this is included 4136 * in the calculation above). Finally, move the children of chain 4137 * into chain's parent. 4138 * 4139 * This order creates an accounting problem for bref.embed.stats 4140 * because we destroy chain before we remove its children. Any 4141 * elements whos blockref is already synchronized will be counted 4142 * twice. To deal with the problem we clean out chain's stats prior 4143 * to deleting it. 4144 */ 4145 error = hammer2_chain_modify(parent, 0, 0, 0); 4146 if (error) { 4147 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4148 hammer2_error_str(error)); 4149 return 0; 4150 } 4151 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4152 if (error) { 4153 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4154 hammer2_error_str(error)); 4155 return 0; 4156 } 4157 4158 chain->bref.embed.stats.inode_count = 0; 4159 chain->bref.embed.stats.data_count = 0; 4160 error = hammer2_chain_delete(parent, chain, 4161 chain->bref.modify_tid, 4162 HAMMER2_DELETE_PERMANENT); 4163 KKASSERT(error == 0); 4164 4165 /* 4166 * The combined_find call requires core.spin to be held. One would 4167 * think there wouldn't be any conflicts since we hold chain 4168 * exclusively locked, but the caching mechanism for 0-ref children 4169 * does not require a chain lock. 4170 */ 4171 hammer2_spin_ex(&chain->core.spin); 4172 4173 key_next = 0; 4174 key_beg = 0; 4175 key_end = HAMMER2_KEY_MAX; 4176 for (;;) { 4177 chain_base = &chain->data->npdata[0]; 4178 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4179 sub = hammer2_combined_find(chain, chain_base, chain_count, 4180 &key_next, 4181 key_beg, key_end, 4182 &bref); 4183 generation = chain->core.generation; 4184 if (bref == NULL) 4185 break; 4186 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4187 4188 bsave = *bref; 4189 if (sub) { 4190 hammer2_chain_ref(sub); 4191 hammer2_spin_unex(&chain->core.spin); 4192 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4193 } else { 4194 hammer2_spin_unex(&chain->core.spin); 4195 sub = hammer2_chain_get(chain, generation, &bsave, 4196 HAMMER2_RESOLVE_NEVER); 4197 if (sub == NULL) { 4198 hammer2_spin_ex(&chain->core.spin); 4199 continue; 4200 } 4201 } 4202 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4203 sub->parent != chain || 4204 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4205 hammer2_chain_unlock(sub); 4206 hammer2_chain_drop(sub); 4207 hammer2_spin_ex(&chain->core.spin); 4208 sub = NULL; /* safety */ 4209 continue; 4210 } 4211 error = hammer2_chain_delete_obref(chain, sub, 4212 sub->bref.modify_tid, 0, 4213 &bsave); 4214 KKASSERT(error == 0); 4215 hammer2_chain_rename_obref(&parent, sub, 4216 sub->bref.modify_tid, 4217 HAMMER2_INSERT_SAMEPARENT, &bsave); 4218 hammer2_chain_unlock(sub); 4219 hammer2_chain_drop(sub); 4220 hammer2_spin_ex(&chain->core.spin); 4221 4222 if (key_next == 0) 4223 break; 4224 key_beg = key_next; 4225 } 4226 hammer2_spin_unex(&chain->core.spin); 4227 4228 hammer2_chain_repchange(parent, chain); 4229 4230 return 1; 4231 } 4232 4233 /* 4234 * Freemap indirect blocks 4235 * 4236 * Calculate the keybits and highside/lowside of the freemap node the 4237 * caller is creating. 4238 * 4239 * This routine will specify the next higher-level freemap key/radix 4240 * representing the lowest-ordered set. By doing so, eventually all 4241 * low-ordered sets will be moved one level down. 4242 * 4243 * We have to be careful here because the freemap reserves a limited 4244 * number of blocks for a limited number of levels. So we can't just 4245 * push indiscriminately. 4246 */ 4247 int 4248 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4249 int keybits, hammer2_blockref_t *base, int count) 4250 { 4251 hammer2_chain_t *chain; 4252 hammer2_blockref_t *bref; 4253 hammer2_key_t key; 4254 hammer2_key_t key_beg; 4255 hammer2_key_t key_end; 4256 hammer2_key_t key_next; 4257 int maxloops = 300000; 4258 4259 key = *keyp; 4260 keybits = 64; 4261 4262 /* 4263 * Calculate the range of keys in the array being careful to skip 4264 * slots which are overridden with a deletion. 4265 */ 4266 key_beg = 0; 4267 key_end = HAMMER2_KEY_MAX; 4268 hammer2_spin_ex(&parent->core.spin); 4269 4270 for (;;) { 4271 if (--maxloops == 0) { 4272 panic("indkey_freemap shit %p %p:%d\n", 4273 parent, base, count); 4274 } 4275 chain = hammer2_combined_find(parent, base, count, 4276 &key_next, 4277 key_beg, key_end, 4278 &bref); 4279 4280 /* 4281 * Exhausted search 4282 */ 4283 if (bref == NULL) 4284 break; 4285 4286 /* 4287 * Skip deleted chains. 4288 */ 4289 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4290 if (key_next == 0 || key_next > key_end) 4291 break; 4292 key_beg = key_next; 4293 continue; 4294 } 4295 4296 /* 4297 * Use the full live (not deleted) element for the scan 4298 * iteration. HAMMER2 does not allow partial replacements. 4299 * 4300 * XXX should be built into hammer2_combined_find(). 4301 */ 4302 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4303 4304 if (keybits > bref->keybits) { 4305 key = bref->key; 4306 keybits = bref->keybits; 4307 } else if (keybits == bref->keybits && bref->key < key) { 4308 key = bref->key; 4309 } 4310 if (key_next == 0) 4311 break; 4312 key_beg = key_next; 4313 } 4314 hammer2_spin_unex(&parent->core.spin); 4315 4316 /* 4317 * Return the keybits for a higher-level FREEMAP_NODE covering 4318 * this node. 4319 */ 4320 switch(keybits) { 4321 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4322 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4323 break; 4324 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4325 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4326 break; 4327 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4328 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4329 break; 4330 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4331 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4332 break; 4333 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4334 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4335 break; 4336 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4337 panic("hammer2_chain_indkey_freemap: level too high"); 4338 break; 4339 default: 4340 panic("hammer2_chain_indkey_freemap: bad radix"); 4341 break; 4342 } 4343 *keyp = key; 4344 4345 return (keybits); 4346 } 4347 4348 /* 4349 * File indirect blocks 4350 * 4351 * Calculate the key/keybits for the indirect block to create by scanning 4352 * existing keys. The key being created is also passed in *keyp and can be 4353 * inside or outside the indirect block. Regardless, the indirect block 4354 * must hold at least two keys in order to guarantee sufficient space. 4355 * 4356 * We use a modified version of the freemap's fixed radix tree, but taylored 4357 * for file data. Basically we configure an indirect block encompassing the 4358 * smallest key. 4359 */ 4360 static int 4361 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4362 int keybits, hammer2_blockref_t *base, int count, 4363 int ncount) 4364 { 4365 hammer2_chain_t *chain; 4366 hammer2_blockref_t *bref; 4367 hammer2_key_t key; 4368 hammer2_key_t key_beg; 4369 hammer2_key_t key_end; 4370 hammer2_key_t key_next; 4371 int nradix; 4372 int maxloops = 300000; 4373 4374 key = *keyp; 4375 keybits = 64; 4376 4377 /* 4378 * Calculate the range of keys in the array being careful to skip 4379 * slots which are overridden with a deletion. 4380 * 4381 * Locate the smallest key. 4382 */ 4383 key_beg = 0; 4384 key_end = HAMMER2_KEY_MAX; 4385 hammer2_spin_ex(&parent->core.spin); 4386 4387 for (;;) { 4388 if (--maxloops == 0) { 4389 panic("indkey_freemap shit %p %p:%d\n", 4390 parent, base, count); 4391 } 4392 chain = hammer2_combined_find(parent, base, count, 4393 &key_next, 4394 key_beg, key_end, 4395 &bref); 4396 4397 /* 4398 * Exhausted search 4399 */ 4400 if (bref == NULL) 4401 break; 4402 4403 /* 4404 * Skip deleted chains. 4405 */ 4406 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4407 if (key_next == 0 || key_next > key_end) 4408 break; 4409 key_beg = key_next; 4410 continue; 4411 } 4412 4413 /* 4414 * Use the full live (not deleted) element for the scan 4415 * iteration. HAMMER2 does not allow partial replacements. 4416 * 4417 * XXX should be built into hammer2_combined_find(). 4418 */ 4419 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4420 4421 if (keybits > bref->keybits) { 4422 key = bref->key; 4423 keybits = bref->keybits; 4424 } else if (keybits == bref->keybits && bref->key < key) { 4425 key = bref->key; 4426 } 4427 if (key_next == 0) 4428 break; 4429 key_beg = key_next; 4430 } 4431 hammer2_spin_unex(&parent->core.spin); 4432 4433 /* 4434 * Calculate the static keybits for a higher-level indirect block 4435 * that contains the key. 4436 */ 4437 *keyp = key; 4438 4439 switch(ncount) { 4440 case HAMMER2_IND_COUNT_MIN: 4441 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4442 break; 4443 case HAMMER2_IND_COUNT_NOM: 4444 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4445 break; 4446 case HAMMER2_IND_COUNT_MAX: 4447 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4448 break; 4449 default: 4450 panic("bad ncount %d\n", ncount); 4451 nradix = 0; 4452 break; 4453 } 4454 4455 /* 4456 * The largest radix that can be returned for an indirect block is 4457 * 63 bits. (The largest practical indirect block radix is actually 4458 * 62 bits because the top-level inode or volume root contains four 4459 * entries, but allow 63 to be returned). 4460 */ 4461 if (nradix >= 64) 4462 nradix = 63; 4463 4464 return keybits + nradix; 4465 } 4466 4467 #if 1 4468 4469 /* 4470 * Directory indirect blocks. 4471 * 4472 * Covers both the inode index (directory of inodes), and directory contents 4473 * (filenames hardlinked to inodes). 4474 * 4475 * Because directory keys are hashed we generally try to cut the space in 4476 * half. We accomodate the inode index (which tends to have linearly 4477 * increasing inode numbers) by ensuring that the keyspace is at least large 4478 * enough to fill up the indirect block being created. 4479 */ 4480 static int 4481 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4482 int keybits, hammer2_blockref_t *base, int count, 4483 int ncount) 4484 { 4485 hammer2_blockref_t *bref; 4486 hammer2_chain_t *chain; 4487 hammer2_key_t key_beg; 4488 hammer2_key_t key_end; 4489 hammer2_key_t key_next; 4490 hammer2_key_t key; 4491 int nkeybits; 4492 int locount; 4493 int hicount; 4494 int maxloops = 300000; 4495 4496 /* 4497 * NOTE: We can't take a shortcut here anymore for inodes because 4498 * the root directory can contain a mix of inodes and directory 4499 * entries (we used to just return 63 if parent->bref.type was 4500 * HAMMER2_BREF_TYPE_INODE. 4501 */ 4502 key = *keyp; 4503 locount = 0; 4504 hicount = 0; 4505 4506 /* 4507 * Calculate the range of keys in the array being careful to skip 4508 * slots which are overridden with a deletion. 4509 */ 4510 key_beg = 0; 4511 key_end = HAMMER2_KEY_MAX; 4512 hammer2_spin_ex(&parent->core.spin); 4513 4514 for (;;) { 4515 if (--maxloops == 0) { 4516 panic("indkey_freemap shit %p %p:%d\n", 4517 parent, base, count); 4518 } 4519 chain = hammer2_combined_find(parent, base, count, 4520 &key_next, 4521 key_beg, key_end, 4522 &bref); 4523 4524 /* 4525 * Exhausted search 4526 */ 4527 if (bref == NULL) 4528 break; 4529 4530 /* 4531 * Deleted object 4532 */ 4533 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4534 if (key_next == 0 || key_next > key_end) 4535 break; 4536 key_beg = key_next; 4537 continue; 4538 } 4539 4540 /* 4541 * Use the full live (not deleted) element for the scan 4542 * iteration. HAMMER2 does not allow partial replacements. 4543 * 4544 * XXX should be built into hammer2_combined_find(). 4545 */ 4546 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4547 4548 /* 4549 * Expand our calculated key range (key, keybits) to fit 4550 * the scanned key. nkeybits represents the full range 4551 * that we will later cut in half (two halves @ nkeybits - 1). 4552 */ 4553 nkeybits = keybits; 4554 if (nkeybits < bref->keybits) { 4555 if (bref->keybits > 64) { 4556 kprintf("bad bref chain %p bref %p\n", 4557 chain, bref); 4558 Debugger("fubar"); 4559 } 4560 nkeybits = bref->keybits; 4561 } 4562 while (nkeybits < 64 && 4563 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4564 ++nkeybits; 4565 } 4566 4567 /* 4568 * If the new key range is larger we have to determine 4569 * which side of the new key range the existing keys fall 4570 * under by checking the high bit, then collapsing the 4571 * locount into the hicount or vise-versa. 4572 */ 4573 if (keybits != nkeybits) { 4574 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4575 hicount += locount; 4576 locount = 0; 4577 } else { 4578 locount += hicount; 4579 hicount = 0; 4580 } 4581 keybits = nkeybits; 4582 } 4583 4584 /* 4585 * The newly scanned key will be in the lower half or the 4586 * upper half of the (new) key range. 4587 */ 4588 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4589 ++hicount; 4590 else 4591 ++locount; 4592 4593 if (key_next == 0) 4594 break; 4595 key_beg = key_next; 4596 } 4597 hammer2_spin_unex(&parent->core.spin); 4598 bref = NULL; /* now invalid (safety) */ 4599 4600 /* 4601 * Adjust keybits to represent half of the full range calculated 4602 * above (radix 63 max) for our new indirect block. 4603 */ 4604 --keybits; 4605 4606 /* 4607 * Expand keybits to hold at least ncount elements. ncount will be 4608 * a power of 2. This is to try to completely fill leaf nodes (at 4609 * least for keys which are not hashes). 4610 * 4611 * We aren't counting 'in' or 'out', we are counting 'high side' 4612 * and 'low side' based on the bit at (1LL << keybits). We want 4613 * everything to be inside in these cases so shift it all to 4614 * the low or high side depending on the new high bit. 4615 */ 4616 while (((hammer2_key_t)1 << keybits) < ncount) { 4617 ++keybits; 4618 if (key & ((hammer2_key_t)1 << keybits)) { 4619 hicount += locount; 4620 locount = 0; 4621 } else { 4622 locount += hicount; 4623 hicount = 0; 4624 } 4625 } 4626 4627 if (hicount > locount) 4628 key |= (hammer2_key_t)1 << keybits; 4629 else 4630 key &= ~(hammer2_key_t)1 << keybits; 4631 4632 *keyp = key; 4633 4634 return (keybits); 4635 } 4636 4637 #else 4638 4639 /* 4640 * Directory indirect blocks. 4641 * 4642 * Covers both the inode index (directory of inodes), and directory contents 4643 * (filenames hardlinked to inodes). 4644 * 4645 * Because directory keys are hashed we generally try to cut the space in 4646 * half. We accomodate the inode index (which tends to have linearly 4647 * increasing inode numbers) by ensuring that the keyspace is at least large 4648 * enough to fill up the indirect block being created. 4649 */ 4650 static int 4651 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4652 int keybits, hammer2_blockref_t *base, int count, 4653 int ncount) 4654 { 4655 hammer2_blockref_t *bref; 4656 hammer2_chain_t *chain; 4657 hammer2_key_t key_beg; 4658 hammer2_key_t key_end; 4659 hammer2_key_t key_next; 4660 hammer2_key_t key; 4661 int nkeybits; 4662 int locount; 4663 int hicount; 4664 int maxloops = 300000; 4665 4666 /* 4667 * Shortcut if the parent is the inode. In this situation the 4668 * parent has 4+1 directory entries and we are creating an indirect 4669 * block capable of holding many more. 4670 */ 4671 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4672 return 63; 4673 } 4674 4675 key = *keyp; 4676 locount = 0; 4677 hicount = 0; 4678 4679 /* 4680 * Calculate the range of keys in the array being careful to skip 4681 * slots which are overridden with a deletion. 4682 */ 4683 key_beg = 0; 4684 key_end = HAMMER2_KEY_MAX; 4685 hammer2_spin_ex(&parent->core.spin); 4686 4687 for (;;) { 4688 if (--maxloops == 0) { 4689 panic("indkey_freemap shit %p %p:%d\n", 4690 parent, base, count); 4691 } 4692 chain = hammer2_combined_find(parent, base, count, 4693 &key_next, 4694 key_beg, key_end, 4695 &bref); 4696 4697 /* 4698 * Exhausted search 4699 */ 4700 if (bref == NULL) 4701 break; 4702 4703 /* 4704 * Deleted object 4705 */ 4706 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4707 if (key_next == 0 || key_next > key_end) 4708 break; 4709 key_beg = key_next; 4710 continue; 4711 } 4712 4713 /* 4714 * Use the full live (not deleted) element for the scan 4715 * iteration. HAMMER2 does not allow partial replacements. 4716 * 4717 * XXX should be built into hammer2_combined_find(). 4718 */ 4719 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4720 4721 /* 4722 * Expand our calculated key range (key, keybits) to fit 4723 * the scanned key. nkeybits represents the full range 4724 * that we will later cut in half (two halves @ nkeybits - 1). 4725 */ 4726 nkeybits = keybits; 4727 if (nkeybits < bref->keybits) { 4728 if (bref->keybits > 64) { 4729 kprintf("bad bref chain %p bref %p\n", 4730 chain, bref); 4731 Debugger("fubar"); 4732 } 4733 nkeybits = bref->keybits; 4734 } 4735 while (nkeybits < 64 && 4736 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4737 (key ^ bref->key)) != 0) { 4738 ++nkeybits; 4739 } 4740 4741 /* 4742 * If the new key range is larger we have to determine 4743 * which side of the new key range the existing keys fall 4744 * under by checking the high bit, then collapsing the 4745 * locount into the hicount or vise-versa. 4746 */ 4747 if (keybits != nkeybits) { 4748 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4749 hicount += locount; 4750 locount = 0; 4751 } else { 4752 locount += hicount; 4753 hicount = 0; 4754 } 4755 keybits = nkeybits; 4756 } 4757 4758 /* 4759 * The newly scanned key will be in the lower half or the 4760 * upper half of the (new) key range. 4761 */ 4762 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4763 ++hicount; 4764 else 4765 ++locount; 4766 4767 if (key_next == 0) 4768 break; 4769 key_beg = key_next; 4770 } 4771 hammer2_spin_unex(&parent->core.spin); 4772 bref = NULL; /* now invalid (safety) */ 4773 4774 /* 4775 * Adjust keybits to represent half of the full range calculated 4776 * above (radix 63 max) for our new indirect block. 4777 */ 4778 --keybits; 4779 4780 /* 4781 * Expand keybits to hold at least ncount elements. ncount will be 4782 * a power of 2. This is to try to completely fill leaf nodes (at 4783 * least for keys which are not hashes). 4784 * 4785 * We aren't counting 'in' or 'out', we are counting 'high side' 4786 * and 'low side' based on the bit at (1LL << keybits). We want 4787 * everything to be inside in these cases so shift it all to 4788 * the low or high side depending on the new high bit. 4789 */ 4790 while (((hammer2_key_t)1 << keybits) < ncount) { 4791 ++keybits; 4792 if (key & ((hammer2_key_t)1 << keybits)) { 4793 hicount += locount; 4794 locount = 0; 4795 } else { 4796 locount += hicount; 4797 hicount = 0; 4798 } 4799 } 4800 4801 if (hicount > locount) 4802 key |= (hammer2_key_t)1 << keybits; 4803 else 4804 key &= ~(hammer2_key_t)1 << keybits; 4805 4806 *keyp = key; 4807 4808 return (keybits); 4809 } 4810 4811 #endif 4812 4813 /* 4814 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 4815 * it exists. 4816 * 4817 * Both parent and chain must be locked exclusively. 4818 * 4819 * This function will modify the parent if the blockref requires removal 4820 * from the parent's block table. 4821 * 4822 * This function is NOT recursive. Any entity already pushed into the 4823 * chain (such as an inode) may still need visibility into its contents, 4824 * as well as the ability to read and modify the contents. For example, 4825 * for an unlinked file which is still open. 4826 * 4827 * Also note that the flusher is responsible for cleaning up empty 4828 * indirect blocks. 4829 */ 4830 int 4831 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 4832 hammer2_tid_t mtid, int flags) 4833 { 4834 int error = 0; 4835 4836 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4837 4838 /* 4839 * Nothing to do if already marked. 4840 * 4841 * We need the spinlock on the core whos RBTREE contains chain 4842 * to protect against races. 4843 */ 4844 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4845 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4846 chain->parent == parent); 4847 error = _hammer2_chain_delete_helper(parent, chain, 4848 mtid, flags, NULL); 4849 } 4850 4851 /* 4852 * Permanent deletions mark the chain as destroyed. 4853 * 4854 * NOTE: We do not setflush the chain unless the deletion is 4855 * permanent, since the deletion of a chain does not actually 4856 * require it to be flushed. 4857 */ 4858 if (error == 0) { 4859 if (flags & HAMMER2_DELETE_PERMANENT) { 4860 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4861 hammer2_chain_setflush(chain); 4862 } 4863 } 4864 4865 return error; 4866 } 4867 4868 static int 4869 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 4870 hammer2_tid_t mtid, int flags, 4871 hammer2_blockref_t *obref) 4872 { 4873 int error = 0; 4874 4875 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4876 4877 /* 4878 * Nothing to do if already marked. 4879 * 4880 * We need the spinlock on the core whos RBTREE contains chain 4881 * to protect against races. 4882 */ 4883 obref->type = HAMMER2_BREF_TYPE_EMPTY; 4884 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4885 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4886 chain->parent == parent); 4887 error = _hammer2_chain_delete_helper(parent, chain, 4888 mtid, flags, obref); 4889 } 4890 4891 /* 4892 * Permanent deletions mark the chain as destroyed. 4893 * 4894 * NOTE: We do not setflush the chain unless the deletion is 4895 * permanent, since the deletion of a chain does not actually 4896 * require it to be flushed. 4897 */ 4898 if (error == 0) { 4899 if (flags & HAMMER2_DELETE_PERMANENT) { 4900 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4901 hammer2_chain_setflush(chain); 4902 } 4903 } 4904 4905 return error; 4906 } 4907 4908 /* 4909 * Returns the index of the nearest element in the blockref array >= elm. 4910 * Returns (count) if no element could be found. 4911 * 4912 * Sets *key_nextp to the next key for loop purposes but does not modify 4913 * it if the next key would be higher than the current value of *key_nextp. 4914 * Note that *key_nexp can overflow to 0, which should be tested by the 4915 * caller. 4916 * 4917 * WARNING! Must be called with parent's spinlock held. Spinlock remains 4918 * held through the operation. 4919 */ 4920 static int 4921 hammer2_base_find(hammer2_chain_t *parent, 4922 hammer2_blockref_t *base, int count, 4923 hammer2_key_t *key_nextp, 4924 hammer2_key_t key_beg, hammer2_key_t key_end) 4925 { 4926 hammer2_blockref_t *scan; 4927 hammer2_key_t scan_end; 4928 int i; 4929 int limit; 4930 4931 /* 4932 * Require the live chain's already have their core's counted 4933 * so we can optimize operations. 4934 */ 4935 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 4936 4937 /* 4938 * Degenerate case 4939 */ 4940 if (count == 0 || base == NULL) 4941 return(count); 4942 4943 /* 4944 * Sequential optimization using parent->cache_index. This is 4945 * the most likely scenario. 4946 * 4947 * We can avoid trailing empty entries on live chains, otherwise 4948 * we might have to check the whole block array. 4949 */ 4950 i = parent->cache_index; /* SMP RACE OK */ 4951 cpu_ccfence(); 4952 limit = parent->core.live_zero; 4953 if (i >= limit) 4954 i = limit - 1; 4955 if (i < 0) 4956 i = 0; 4957 KKASSERT(i < count); 4958 4959 /* 4960 * Search backwards 4961 */ 4962 scan = &base[i]; 4963 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 4964 scan->key > key_beg)) { 4965 --scan; 4966 --i; 4967 } 4968 parent->cache_index = i; 4969 4970 /* 4971 * Search forwards, stop when we find a scan element which 4972 * encloses the key or until we know that there are no further 4973 * elements. 4974 */ 4975 while (i < count) { 4976 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 4977 scan_end = scan->key + 4978 ((hammer2_key_t)1 << scan->keybits) - 1; 4979 if (scan->key > key_beg || scan_end >= key_beg) 4980 break; 4981 } 4982 if (i >= limit) 4983 return (count); 4984 ++scan; 4985 ++i; 4986 } 4987 if (i != count) { 4988 parent->cache_index = i; 4989 if (i >= limit) { 4990 i = count; 4991 } else { 4992 scan_end = scan->key + 4993 ((hammer2_key_t)1 << scan->keybits); 4994 if (scan_end && (*key_nextp > scan_end || 4995 *key_nextp == 0)) { 4996 *key_nextp = scan_end; 4997 } 4998 } 4999 } 5000 return (i); 5001 } 5002 5003 /* 5004 * Do a combined search and return the next match either from the blockref 5005 * array or from the in-memory chain. Sets *brefp to the returned bref in 5006 * both cases, or sets it to NULL if the search exhausted. Only returns 5007 * a non-NULL chain if the search matched from the in-memory chain. 5008 * 5009 * When no in-memory chain has been found and a non-NULL bref is returned 5010 * in *brefp. 5011 * 5012 * 5013 * The returned chain is not locked or referenced. Use the returned bref 5014 * to determine if the search exhausted or not. Iterate if the base find 5015 * is chosen but matches a deleted chain. 5016 * 5017 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5018 * held through the operation. 5019 */ 5020 static hammer2_chain_t * 5021 hammer2_combined_find(hammer2_chain_t *parent, 5022 hammer2_blockref_t *base, int count, 5023 hammer2_key_t *key_nextp, 5024 hammer2_key_t key_beg, hammer2_key_t key_end, 5025 hammer2_blockref_t **brefp) 5026 { 5027 hammer2_blockref_t *bref; 5028 hammer2_chain_t *chain; 5029 int i; 5030 5031 /* 5032 * Lookup in block array and in rbtree. 5033 */ 5034 *key_nextp = key_end + 1; 5035 i = hammer2_base_find(parent, base, count, key_nextp, 5036 key_beg, key_end); 5037 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5038 5039 /* 5040 * Neither matched 5041 */ 5042 if (i == count && chain == NULL) { 5043 *brefp = NULL; 5044 return(NULL); 5045 } 5046 5047 /* 5048 * Only chain matched. 5049 */ 5050 if (i == count) { 5051 bref = &chain->bref; 5052 goto found; 5053 } 5054 5055 /* 5056 * Only blockref matched. 5057 */ 5058 if (chain == NULL) { 5059 bref = &base[i]; 5060 goto found; 5061 } 5062 5063 /* 5064 * Both in-memory and blockref matched, select the nearer element. 5065 * 5066 * If both are flush with the left-hand side or both are the 5067 * same distance away, select the chain. In this situation the 5068 * chain must have been loaded from the matching blockmap. 5069 */ 5070 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5071 chain->bref.key == base[i].key) { 5072 KKASSERT(chain->bref.key == base[i].key); 5073 bref = &chain->bref; 5074 goto found; 5075 } 5076 5077 /* 5078 * Select the nearer key 5079 */ 5080 if (chain->bref.key < base[i].key) { 5081 bref = &chain->bref; 5082 } else { 5083 bref = &base[i]; 5084 chain = NULL; 5085 } 5086 5087 /* 5088 * If the bref is out of bounds we've exhausted our search. 5089 */ 5090 found: 5091 if (bref->key > key_end) { 5092 *brefp = NULL; 5093 chain = NULL; 5094 } else { 5095 *brefp = bref; 5096 } 5097 return(chain); 5098 } 5099 5100 /* 5101 * Locate the specified block array element and delete it. The element 5102 * must exist. 5103 * 5104 * The spin lock on the related chain must be held. 5105 * 5106 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5107 * need to be adjusted when we commit the media change. 5108 */ 5109 void 5110 hammer2_base_delete(hammer2_chain_t *parent, 5111 hammer2_blockref_t *base, int count, 5112 hammer2_chain_t *chain, 5113 hammer2_blockref_t *obref) 5114 { 5115 hammer2_blockref_t *elm = &chain->bref; 5116 hammer2_blockref_t *scan; 5117 hammer2_key_t key_next; 5118 int i; 5119 5120 /* 5121 * Delete element. Expect the element to exist. 5122 * 5123 * XXX see caller, flush code not yet sophisticated enough to prevent 5124 * re-flushed in some cases. 5125 */ 5126 key_next = 0; /* max range */ 5127 i = hammer2_base_find(parent, base, count, &key_next, 5128 elm->key, elm->key); 5129 scan = &base[i]; 5130 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5131 scan->key != elm->key || 5132 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5133 scan->keybits != elm->keybits)) { 5134 hammer2_spin_unex(&parent->core.spin); 5135 panic("delete base %p element not found at %d/%d elm %p\n", 5136 base, i, count, elm); 5137 return; 5138 } 5139 5140 /* 5141 * Update stats and zero the entry. 5142 * 5143 * NOTE: Handle radix == 0 (0 bytes) case. 5144 */ 5145 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5146 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5147 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5148 } 5149 switch(scan->type) { 5150 case HAMMER2_BREF_TYPE_INODE: 5151 --parent->bref.embed.stats.inode_count; 5152 /* fall through */ 5153 case HAMMER2_BREF_TYPE_DATA: 5154 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5155 atomic_set_int(&chain->flags, 5156 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5157 } else { 5158 if (parent->bref.leaf_count) 5159 --parent->bref.leaf_count; 5160 } 5161 /* fall through */ 5162 case HAMMER2_BREF_TYPE_INDIRECT: 5163 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5164 parent->bref.embed.stats.data_count -= 5165 scan->embed.stats.data_count; 5166 parent->bref.embed.stats.inode_count -= 5167 scan->embed.stats.inode_count; 5168 } 5169 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5170 break; 5171 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5172 atomic_set_int(&chain->flags, 5173 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5174 } else { 5175 if (parent->bref.leaf_count <= scan->leaf_count) 5176 parent->bref.leaf_count = 0; 5177 else 5178 parent->bref.leaf_count -= scan->leaf_count; 5179 } 5180 break; 5181 case HAMMER2_BREF_TYPE_DIRENT: 5182 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5183 atomic_set_int(&chain->flags, 5184 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5185 } else { 5186 if (parent->bref.leaf_count) 5187 --parent->bref.leaf_count; 5188 } 5189 default: 5190 break; 5191 } 5192 5193 if (obref) 5194 *obref = *scan; 5195 bzero(scan, sizeof(*scan)); 5196 5197 /* 5198 * We can only optimize parent->core.live_zero for live chains. 5199 */ 5200 if (parent->core.live_zero == i + 1) { 5201 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5202 ; 5203 parent->core.live_zero = i + 1; 5204 } 5205 5206 /* 5207 * Clear appropriate blockmap flags in chain. 5208 */ 5209 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5210 HAMMER2_CHAIN_BLKMAPUPD); 5211 } 5212 5213 /* 5214 * Insert the specified element. The block array must not already have the 5215 * element and must have space available for the insertion. 5216 * 5217 * The spin lock on the related chain must be held. 5218 * 5219 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5220 * need to be adjusted when we commit the media change. 5221 */ 5222 void 5223 hammer2_base_insert(hammer2_chain_t *parent, 5224 hammer2_blockref_t *base, int count, 5225 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5226 { 5227 hammer2_key_t key_next; 5228 hammer2_key_t xkey; 5229 int i; 5230 int j; 5231 int k; 5232 int l; 5233 int u = 1; 5234 5235 /* 5236 * Insert new element. Expect the element to not already exist 5237 * unless we are replacing it. 5238 * 5239 * XXX see caller, flush code not yet sophisticated enough to prevent 5240 * re-flushed in some cases. 5241 */ 5242 key_next = 0; /* max range */ 5243 i = hammer2_base_find(parent, base, count, &key_next, 5244 elm->key, elm->key); 5245 5246 /* 5247 * Shortcut fill optimization, typical ordered insertion(s) may not 5248 * require a search. 5249 */ 5250 KKASSERT(i >= 0 && i <= count); 5251 5252 /* 5253 * Set appropriate blockmap flags in chain (if not NULL) 5254 */ 5255 if (chain) 5256 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5257 5258 /* 5259 * Update stats and zero the entry 5260 */ 5261 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5262 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5263 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5264 } 5265 switch(elm->type) { 5266 case HAMMER2_BREF_TYPE_INODE: 5267 ++parent->bref.embed.stats.inode_count; 5268 /* fall through */ 5269 case HAMMER2_BREF_TYPE_DATA: 5270 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5271 ++parent->bref.leaf_count; 5272 /* fall through */ 5273 case HAMMER2_BREF_TYPE_INDIRECT: 5274 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5275 parent->bref.embed.stats.data_count += 5276 elm->embed.stats.data_count; 5277 parent->bref.embed.stats.inode_count += 5278 elm->embed.stats.inode_count; 5279 } 5280 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5281 break; 5282 if (parent->bref.leaf_count + elm->leaf_count < 5283 HAMMER2_BLOCKREF_LEAF_MAX) { 5284 parent->bref.leaf_count += elm->leaf_count; 5285 } else { 5286 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5287 } 5288 break; 5289 case HAMMER2_BREF_TYPE_DIRENT: 5290 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5291 ++parent->bref.leaf_count; 5292 break; 5293 default: 5294 break; 5295 } 5296 5297 5298 /* 5299 * We can only optimize parent->core.live_zero for live chains. 5300 */ 5301 if (i == count && parent->core.live_zero < count) { 5302 i = parent->core.live_zero++; 5303 base[i] = *elm; 5304 return; 5305 } 5306 5307 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5308 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5309 hammer2_spin_unex(&parent->core.spin); 5310 panic("insert base %p overlapping elements at %d elm %p\n", 5311 base, i, elm); 5312 } 5313 5314 /* 5315 * Try to find an empty slot before or after. 5316 */ 5317 j = i; 5318 k = i; 5319 while (j > 0 || k < count) { 5320 --j; 5321 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5322 if (j == i - 1) { 5323 base[j] = *elm; 5324 } else { 5325 bcopy(&base[j+1], &base[j], 5326 (i - j - 1) * sizeof(*base)); 5327 base[i - 1] = *elm; 5328 } 5329 goto validate; 5330 } 5331 ++k; 5332 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5333 bcopy(&base[i], &base[i+1], 5334 (k - i) * sizeof(hammer2_blockref_t)); 5335 base[i] = *elm; 5336 5337 /* 5338 * We can only update parent->core.live_zero for live 5339 * chains. 5340 */ 5341 if (parent->core.live_zero <= k) 5342 parent->core.live_zero = k + 1; 5343 u = 2; 5344 goto validate; 5345 } 5346 } 5347 panic("hammer2_base_insert: no room!"); 5348 5349 /* 5350 * Debugging 5351 */ 5352 validate: 5353 key_next = 0; 5354 for (l = 0; l < count; ++l) { 5355 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5356 key_next = base[l].key + 5357 ((hammer2_key_t)1 << base[l].keybits) - 1; 5358 break; 5359 } 5360 } 5361 while (++l < count) { 5362 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5363 if (base[l].key <= key_next) 5364 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5365 key_next = base[l].key + 5366 ((hammer2_key_t)1 << base[l].keybits) - 1; 5367 5368 } 5369 } 5370 5371 } 5372 5373 #if 0 5374 5375 /* 5376 * Sort the blockref array for the chain. Used by the flush code to 5377 * sort the blockref[] array. 5378 * 5379 * The chain must be exclusively locked AND spin-locked. 5380 */ 5381 typedef hammer2_blockref_t *hammer2_blockref_p; 5382 5383 static 5384 int 5385 hammer2_base_sort_callback(const void *v1, const void *v2) 5386 { 5387 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5388 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5389 5390 /* 5391 * Make sure empty elements are placed at the end of the array 5392 */ 5393 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5394 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5395 return(0); 5396 return(1); 5397 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5398 return(-1); 5399 } 5400 5401 /* 5402 * Sort by key 5403 */ 5404 if (bref1->key < bref2->key) 5405 return(-1); 5406 if (bref1->key > bref2->key) 5407 return(1); 5408 return(0); 5409 } 5410 5411 void 5412 hammer2_base_sort(hammer2_chain_t *chain) 5413 { 5414 hammer2_blockref_t *base; 5415 int count; 5416 5417 switch(chain->bref.type) { 5418 case HAMMER2_BREF_TYPE_INODE: 5419 /* 5420 * Special shortcut for embedded data returns the inode 5421 * itself. Callers must detect this condition and access 5422 * the embedded data (the strategy code does this for us). 5423 * 5424 * This is only applicable to regular files and softlinks. 5425 */ 5426 if (chain->data->ipdata.meta.op_flags & 5427 HAMMER2_OPFLAG_DIRECTDATA) { 5428 return; 5429 } 5430 base = &chain->data->ipdata.u.blockset.blockref[0]; 5431 count = HAMMER2_SET_COUNT; 5432 break; 5433 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5434 case HAMMER2_BREF_TYPE_INDIRECT: 5435 /* 5436 * Optimize indirect blocks in the INITIAL state to avoid 5437 * I/O. 5438 */ 5439 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5440 base = &chain->data->npdata[0]; 5441 count = chain->bytes / sizeof(hammer2_blockref_t); 5442 break; 5443 case HAMMER2_BREF_TYPE_VOLUME: 5444 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5445 count = HAMMER2_SET_COUNT; 5446 break; 5447 case HAMMER2_BREF_TYPE_FREEMAP: 5448 base = &chain->data->blkset.blockref[0]; 5449 count = HAMMER2_SET_COUNT; 5450 break; 5451 default: 5452 panic("hammer2_base_sort: unrecognized " 5453 "blockref(A) type: %d", 5454 chain->bref.type); 5455 base = NULL; /* safety */ 5456 count = 0; /* safety */ 5457 break; 5458 } 5459 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5460 } 5461 5462 #endif 5463 5464 /* 5465 * Set the check data for a chain. This can be a heavy-weight operation 5466 * and typically only runs on-flush. For file data check data is calculated 5467 * when the logical buffers are flushed. 5468 */ 5469 void 5470 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5471 { 5472 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5473 5474 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5475 case HAMMER2_CHECK_NONE: 5476 break; 5477 case HAMMER2_CHECK_DISABLED: 5478 break; 5479 case HAMMER2_CHECK_ISCSI32: 5480 chain->bref.check.iscsi32.value = 5481 hammer2_icrc32(bdata, chain->bytes); 5482 break; 5483 case HAMMER2_CHECK_XXHASH64: 5484 chain->bref.check.xxhash64.value = 5485 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5486 break; 5487 case HAMMER2_CHECK_SHA192: 5488 { 5489 SHA256_CTX hash_ctx; 5490 union { 5491 uint8_t digest[SHA256_DIGEST_LENGTH]; 5492 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5493 } u; 5494 5495 SHA256_Init(&hash_ctx); 5496 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5497 SHA256_Final(u.digest, &hash_ctx); 5498 u.digest64[2] ^= u.digest64[3]; 5499 bcopy(u.digest, 5500 chain->bref.check.sha192.data, 5501 sizeof(chain->bref.check.sha192.data)); 5502 } 5503 break; 5504 case HAMMER2_CHECK_FREEMAP: 5505 chain->bref.check.freemap.icrc32 = 5506 hammer2_icrc32(bdata, chain->bytes); 5507 break; 5508 default: 5509 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5510 chain->bref.methods); 5511 break; 5512 } 5513 } 5514 5515 /* 5516 * Characterize a failed check code and try to trace back to the inode. 5517 */ 5518 static void 5519 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5520 int bits) 5521 { 5522 hammer2_chain_t *lchain; 5523 hammer2_chain_t *ochain; 5524 int did; 5525 5526 did = krateprintf(&krate_h2chk, 5527 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5528 "(flags=%08x, bref/data ", 5529 chain->bref.data_off, 5530 chain->bref.type, 5531 hammer2_bref_type_str(chain->bref.type), 5532 chain->bref.methods, 5533 chain->flags); 5534 if (did == 0) 5535 return; 5536 5537 if (bits == 32) { 5538 kprintf("%08x/%08x)\n", 5539 chain->bref.check.iscsi32.value, 5540 (uint32_t)check); 5541 } else { 5542 kprintf("%016jx/%016jx)\n", 5543 chain->bref.check.xxhash64.value, 5544 check); 5545 } 5546 5547 /* 5548 * Run up the chains to try to find the governing inode so we 5549 * can report it. 5550 * 5551 * XXX This error reporting is not really MPSAFE 5552 */ 5553 ochain = chain; 5554 lchain = chain; 5555 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5556 lchain = chain; 5557 chain = chain->parent; 5558 } 5559 5560 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5561 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5562 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5563 kprintf(" Resides at/in inode %ld\n", 5564 (long)chain->bref.key); 5565 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5566 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5567 } else { 5568 kprintf(" Resides in root index - CRITICAL!!!\n"); 5569 } 5570 if (ochain->hmp) { 5571 const char *pfsname = "UNKNOWN"; 5572 int i; 5573 5574 if (ochain->pmp) { 5575 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5576 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5577 ochain->pmp->pfs_names[i]) { 5578 pfsname = ochain->pmp->pfs_names[i]; 5579 break; 5580 } 5581 } 5582 } 5583 kprintf(" In pfs %s on device %s\n", 5584 pfsname, ochain->hmp->devrepname); 5585 } 5586 } 5587 5588 /* 5589 * Returns non-zero on success, 0 on failure. 5590 */ 5591 int 5592 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5593 { 5594 uint32_t check32; 5595 uint64_t check64; 5596 int r; 5597 5598 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5599 return 1; 5600 5601 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5602 case HAMMER2_CHECK_NONE: 5603 r = 1; 5604 break; 5605 case HAMMER2_CHECK_DISABLED: 5606 r = 1; 5607 break; 5608 case HAMMER2_CHECK_ISCSI32: 5609 check32 = hammer2_icrc32(bdata, chain->bytes); 5610 r = (chain->bref.check.iscsi32.value == check32); 5611 if (r == 0) { 5612 hammer2_characterize_failed_chain(chain, check32, 32); 5613 } 5614 hammer2_process_icrc32 += chain->bytes; 5615 break; 5616 case HAMMER2_CHECK_XXHASH64: 5617 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5618 r = (chain->bref.check.xxhash64.value == check64); 5619 if (r == 0) { 5620 hammer2_characterize_failed_chain(chain, check64, 64); 5621 } 5622 hammer2_process_xxhash64 += chain->bytes; 5623 break; 5624 case HAMMER2_CHECK_SHA192: 5625 { 5626 SHA256_CTX hash_ctx; 5627 union { 5628 uint8_t digest[SHA256_DIGEST_LENGTH]; 5629 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5630 } u; 5631 5632 SHA256_Init(&hash_ctx); 5633 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5634 SHA256_Final(u.digest, &hash_ctx); 5635 u.digest64[2] ^= u.digest64[3]; 5636 if (bcmp(u.digest, 5637 chain->bref.check.sha192.data, 5638 sizeof(chain->bref.check.sha192.data)) == 0) { 5639 r = 1; 5640 } else { 5641 r = 0; 5642 krateprintf(&krate_h2chk, 5643 "chain %016jx.%02x meth=%02x " 5644 "CHECK FAIL\n", 5645 chain->bref.data_off, 5646 chain->bref.type, 5647 chain->bref.methods); 5648 } 5649 } 5650 break; 5651 case HAMMER2_CHECK_FREEMAP: 5652 r = (chain->bref.check.freemap.icrc32 == 5653 hammer2_icrc32(bdata, chain->bytes)); 5654 if (r == 0) { 5655 int did; 5656 5657 did = krateprintf(&krate_h2chk, 5658 "chain %016jx.%02x meth=%02x " 5659 "CHECK FAIL\n", 5660 chain->bref.data_off, 5661 chain->bref.type, 5662 chain->bref.methods); 5663 if (did) { 5664 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5665 chain->bref.check.freemap.icrc32, 5666 hammer2_icrc32(bdata, chain->bytes), 5667 chain->bytes); 5668 if (chain->dio) { 5669 kprintf("dio %p buf %016jx,%d " 5670 "bdata %p/%p\n", 5671 chain->dio, 5672 (intmax_t)chain->dio->bp->b_loffset, 5673 chain->dio->bp->b_bufsize, 5674 bdata, 5675 chain->dio->bp->b_data); 5676 } 5677 } 5678 } 5679 break; 5680 default: 5681 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5682 chain->bref.methods); 5683 r = 1; 5684 break; 5685 } 5686 return r; 5687 } 5688 5689 /* 5690 * Acquire the chain and parent representing the specified inode for the 5691 * device at the specified cluster index. 5692 * 5693 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5694 * 5695 * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK 5696 * is returned. In case of error, *chainp and/or *parentp may still be returned 5697 * non-NULL. 5698 * 5699 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5700 * They will be unlocked and released by this function. The *parentp and 5701 * *chainp representing the located inode are returned locked. 5702 * 5703 * The returned error includes any error on the returned chain in addition to 5704 * errors incurred while trying to lookup the inode. However, a chain->error 5705 * might not be recognized if HAMMER2_LOOKUP_NODATA is passed. This flag may 5706 * not be passed to this function. 5707 */ 5708 int 5709 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5710 int clindex, int flags, 5711 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5712 { 5713 hammer2_chain_t *parent; 5714 hammer2_chain_t *rchain; 5715 hammer2_key_t key_dummy; 5716 hammer2_inode_t *ip; 5717 int resolve_flags; 5718 int error; 5719 5720 KKASSERT((flags & HAMMER2_LOOKUP_NODATA) == 0); 5721 5722 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5723 HAMMER2_RESOLVE_SHARED : 0; 5724 5725 /* 5726 * Caller expects us to replace these. 5727 */ 5728 if (*chainp) { 5729 hammer2_chain_unlock(*chainp); 5730 hammer2_chain_drop(*chainp); 5731 *chainp = NULL; 5732 } 5733 if (*parentp) { 5734 hammer2_chain_unlock(*parentp); 5735 hammer2_chain_drop(*parentp); 5736 *parentp = NULL; 5737 } 5738 5739 /* 5740 * Be very careful, this is a backend function and we CANNOT 5741 * lock any frontend inode structure we find. But we have to 5742 * look the inode up this way first in case it exists but is 5743 * detached from the radix tree. 5744 */ 5745 ip = hammer2_inode_lookup(pmp, inum); 5746 if (ip) { 5747 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 5748 parentp, 5749 resolve_flags); 5750 hammer2_inode_drop(ip); 5751 if (*chainp) 5752 return (*chainp)->error; 5753 hammer2_chain_unlock(*chainp); 5754 hammer2_chain_drop(*chainp); 5755 *chainp = NULL; 5756 if (*parentp) { 5757 hammer2_chain_unlock(*parentp); 5758 hammer2_chain_drop(*parentp); 5759 *parentp = NULL; 5760 } 5761 } 5762 5763 /* 5764 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5765 * inodes from root directory entries in the key lookup). 5766 */ 5767 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5768 rchain = NULL; 5769 if (parent) { 5770 /* 5771 * NOTE: rchain can be returned as NULL even if error == 0 5772 * (i.e. not found) 5773 */ 5774 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5775 inum, inum, 5776 &error, flags); 5777 /* 5778 * Propagate a chain-specific error to caller. 5779 * 5780 * If the chain is not errored, we must still validate that the inode 5781 * number is correct, because all hell will break loose if it isn't 5782 * correct. It should always be correct so print to the console and 5783 * simulate a CHECK error if it is not. 5784 */ 5785 if (error == 0 && rchain) { 5786 error = rchain->error; 5787 if (error == 0 && rchain->data) { 5788 if (inum != rchain->data->ipdata.meta.inum) { 5789 kprintf("hammer2_chain_inode_find: lookup inum %ld, " 5790 "got valid inode but with inum %ld\n", 5791 (long)inum, (long)rchain->data->ipdata.meta.inum); 5792 error = HAMMER2_ERROR_CHECK; 5793 rchain->error = error; 5794 } 5795 } 5796 } 5797 } else { 5798 error = HAMMER2_ERROR_EIO; 5799 } 5800 *parentp = parent; 5801 *chainp = rchain; 5802 5803 return error; 5804 } 5805 5806 /* 5807 * Used by the bulkscan code to snapshot the synchronized storage for 5808 * a volume, allowing it to be scanned concurrently against normal 5809 * operation. 5810 */ 5811 hammer2_chain_t * 5812 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5813 { 5814 hammer2_chain_t *copy; 5815 5816 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5817 copy->data = kmalloc(sizeof(copy->data->voldata), 5818 hmp->mmsg, M_WAITOK | M_ZERO); 5819 hammer2_voldata_lock(hmp); 5820 copy->data->voldata = hmp->volsync; 5821 hammer2_voldata_unlock(hmp); 5822 5823 return copy; 5824 } 5825 5826 void 5827 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5828 { 5829 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5830 KKASSERT(copy->data); 5831 kfree(copy->data, copy->hmp->mmsg); 5832 copy->data = NULL; 5833 hammer2_chain_drop(copy); 5834 } 5835 5836 /* 5837 * Returns non-zero if the chain (INODE or DIRENT) matches the 5838 * filename. 5839 */ 5840 int 5841 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5842 size_t name_len) 5843 { 5844 const hammer2_inode_data_t *ripdata; 5845 5846 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5847 ripdata = &chain->data->ipdata; 5848 if (ripdata->meta.name_len == name_len && 5849 bcmp(ripdata->filename, name, name_len) == 0) { 5850 return 1; 5851 } 5852 } 5853 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5854 chain->bref.embed.dirent.namlen == name_len) { 5855 if (name_len > sizeof(chain->bref.check.buf) && 5856 bcmp(chain->data->buf, name, name_len) == 0) { 5857 return 1; 5858 } 5859 if (name_len <= sizeof(chain->bref.check.buf) && 5860 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5861 return 1; 5862 } 5863 } 5864 return 0; 5865 } 5866 5867 /* 5868 * Debugging 5869 */ 5870 void 5871 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp, 5872 char pfx, u_int flags) 5873 { 5874 hammer2_chain_t *scan; 5875 hammer2_chain_t *parent; 5876 5877 --*countp; 5878 if (*countp == 0) { 5879 kprintf("%*.*s...\n", tab, tab, ""); 5880 return; 5881 } 5882 if (*countp < 0) 5883 return; 5884 kprintf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx\n", 5885 tab, tab, "", pfx, chain, 5886 hammer2_bref_type_str(chain->bref.type), bi, 5887 chain->bref.data_off, chain->bref.key, chain->bref.keybits, 5888 chain->bref.mirror_tid); 5889 5890 kprintf("%*.*s [%08x] (%s) refs=%d", 5891 tab, tab, "", 5892 chain->flags, 5893 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5894 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 5895 chain->refs); 5896 5897 parent = chain->parent; 5898 if (parent) 5899 kprintf("\n%*.*s p=%p [pflags %08x prefs %d]", 5900 tab, tab, "", 5901 parent, parent->flags, parent->refs); 5902 if (RB_EMPTY(&chain->core.rbtree)) { 5903 kprintf("\n"); 5904 } else { 5905 int bi = 0; 5906 kprintf(" {\n"); 5907 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) { 5908 if ((scan->flags & flags) || flags == (u_int)-1) { 5909 hammer2_dump_chain(scan, tab + 4, bi, countp, 5910 'a', flags); 5911 } 5912 bi++; 5913 } 5914 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 5915 kprintf("%*.*s}(%s)\n", tab, tab, "", 5916 chain->data->ipdata.filename); 5917 else 5918 kprintf("%*.*s}\n", tab, tab, ""); 5919 } 5920 } 5921