1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org> 5 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@dragonflybsd.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 /* 38 * This subsystem implements most of the core support functions for 39 * the hammer2_chain structure. 40 * 41 * Chains are the in-memory version on media objects (volume header, inodes, 42 * indirect blocks, data blocks, etc). Chains represent a portion of the 43 * HAMMER2 topology. 44 * 45 * Chains are no-longer delete-duplicated. Instead, the original in-memory 46 * chain will be moved along with its block reference (e.g. for things like 47 * renames, hardlink operations, modifications, etc), and will be indexed 48 * on a secondary list for flush handling instead of propagating a flag 49 * upward to the root. 50 * 51 * Concurrent front-end operations can still run against backend flushes 52 * as long as they do not cross the current flush boundary. An operation 53 * running above the current flush (in areas not yet flushed) can become 54 * part of the current flush while ano peration running below the current 55 * flush can become part of the next flush. 56 */ 57 /* 58 #include <sys/cdefs.h> 59 #include <sys/param.h> 60 #include <sys/systm.h> 61 #include <sys/types.h> 62 #include <sys/lock.h> 63 #include <sys/buf.h> 64 65 #include <crypto/sha2/sha2.h> 66 */ 67 68 #include "hammer2.h" 69 70 static hammer2_chain_t *hammer2_chain_create_indirect( 71 hammer2_chain_t *parent, 72 hammer2_key_t key, int keybits, 73 hammer2_tid_t mtid, int for_type, int *errorp); 74 static int hammer2_chain_delete_obref(hammer2_chain_t *parent, 75 hammer2_chain_t *chain, 76 hammer2_tid_t mtid, int flags, 77 hammer2_blockref_t *obref); 78 static hammer2_chain_t *hammer2_combined_find( 79 hammer2_chain_t *parent, 80 hammer2_blockref_t *base, int count, 81 hammer2_key_t *key_nextp, 82 hammer2_key_t key_beg, hammer2_key_t key_end, 83 hammer2_blockref_t **brefp); 84 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 85 int depth); 86 /* 87 * There are many degenerate situations where an extreme rate of console 88 * output can occur from warnings and errors. Make sure this output does 89 * not impede operations. 90 */ 91 /* 92 static struct krate krate_h2chk = { .freq = 5 }; 93 static struct krate krate_h2me = { .freq = 1 }; 94 static struct krate krate_h2em = { .freq = 1 }; 95 */ 96 97 /* 98 * Basic RBTree for chains (core.rbtree). 99 */ 100 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 101 102 int 103 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 104 { 105 hammer2_key_t c1_beg; 106 hammer2_key_t c1_end; 107 hammer2_key_t c2_beg; 108 hammer2_key_t c2_end; 109 110 /* 111 * Compare chains. Overlaps are not supposed to happen and catch 112 * any software issues early we count overlaps as a match. 113 */ 114 c1_beg = chain1->bref.key; 115 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 116 c2_beg = chain2->bref.key; 117 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 118 119 if (c1_end < c2_beg) /* fully to the left */ 120 return(-1); 121 if (c1_beg > c2_end) /* fully to the right */ 122 return(1); 123 return(0); /* overlap (must not cross edge boundary) */ 124 } 125 126 /* 127 * Assert that a chain has no media data associated with it. 128 */ 129 static __inline void 130 hammer2_chain_assert_no_data(hammer2_chain_t *chain) 131 { 132 KKASSERT(chain->dio == NULL); 133 if (chain->bref.type != HAMMER2_BREF_TYPE_VOLUME && 134 chain->bref.type != HAMMER2_BREF_TYPE_FREEMAP && 135 chain->data) { 136 panic("hammer2_chain_assert_no_data: chain %p still has data", 137 chain); 138 } 139 } 140 141 /* 142 * Make a chain visible to the flusher. The flusher operates using a top-down 143 * recursion based on the ONFLUSH flag. It locates MODIFIED and UPDATE chains, 144 * flushes them, and updates blocks back to the volume root. 145 * 146 * This routine sets the ONFLUSH flag upward from the triggering chain until 147 * it hits an inode root or the volume root. Inode chains serve as inflection 148 * points, requiring the flusher to bridge across trees. Inodes include 149 * regular inodes, PFS roots (pmp->iroot), and the media super root 150 * (spmp->iroot). 151 */ 152 void 153 hammer2_chain_setflush(hammer2_chain_t *chain) 154 { 155 hammer2_chain_t *parent; 156 157 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 158 hammer2_spin_sh(&chain->core.spin); 159 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 160 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 161 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 162 break; 163 if ((parent = chain->parent) == NULL) 164 break; 165 hammer2_spin_sh(&parent->core.spin); 166 hammer2_spin_unsh(&chain->core.spin); 167 chain = parent; 168 } 169 hammer2_spin_unsh(&chain->core.spin); 170 } 171 } 172 173 /* 174 * Allocate a new disconnected chain element representing the specified 175 * bref. chain->refs is set to 1 and the passed bref is copied to 176 * chain->bref. chain->bytes is derived from the bref. 177 * 178 * chain->pmp inherits pmp unless the chain is an inode (other than the 179 * super-root inode). 180 * 181 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 182 */ 183 hammer2_chain_t * 184 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 185 hammer2_blockref_t *bref) 186 { 187 hammer2_chain_t *chain; 188 u_int bytes; 189 190 /* 191 * Special case - radix of 0 indicates a chain that does not 192 * need a data reference (context is completely embedded in the 193 * bref). 194 */ 195 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 196 bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 197 else 198 bytes = 0; 199 200 switch(bref->type) { 201 case HAMMER2_BREF_TYPE_INODE: 202 case HAMMER2_BREF_TYPE_INDIRECT: 203 case HAMMER2_BREF_TYPE_DATA: 204 case HAMMER2_BREF_TYPE_DIRENT: 205 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 206 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 207 case HAMMER2_BREF_TYPE_FREEMAP: 208 case HAMMER2_BREF_TYPE_VOLUME: 209 chain = kmalloc_obj(sizeof(*chain), hmp->mchain, 210 M_WAITOK | M_ZERO); 211 atomic_add_long(&hammer2_chain_allocs, 1); 212 break; 213 case HAMMER2_BREF_TYPE_EMPTY: 214 default: 215 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 216 bref->type); 217 break; 218 } 219 220 /* 221 * Initialize the new chain structure. pmp must be set to NULL for 222 * chains belonging to the super-root topology of a device mount. 223 */ 224 if (pmp == hmp->spmp) 225 chain->pmp = NULL; 226 else 227 chain->pmp = pmp; 228 229 chain->hmp = hmp; 230 chain->bref = *bref; 231 chain->bytes = bytes; 232 chain->refs = 1; 233 chain->flags = HAMMER2_CHAIN_ALLOCATED; 234 235 /* 236 * Set the PFS boundary flag if this chain represents a PFS root. 237 */ 238 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 239 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 240 hammer2_chain_init(chain); 241 242 return (chain); 243 } 244 245 /* 246 * A common function to initialize chains including fchain and vchain. 247 */ 248 void 249 hammer2_chain_init(hammer2_chain_t *chain) 250 { 251 RB_INIT(&chain->core.rbtree); /* live chains */ 252 hammer2_mtx_init(&chain->lock, "h2chain"); 253 hammer2_spin_init(&chain->core.spin, "h2chain"); 254 lockinit(&chain->diolk, "chdio", 0, 0); 255 } 256 257 /* 258 * Add a reference to a chain element, preventing its destruction. 259 * Undone via hammer2_chain_drop() 260 * 261 * (can be called with spinlock held) 262 */ 263 void 264 hammer2_chain_ref(hammer2_chain_t *chain) 265 { 266 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 267 /* NOP */ 268 } 269 } 270 271 /* 272 * Ref a locked chain and force the data to be held across an unlock. 273 * Chain must be currently locked. The user of the chain who desires 274 * to release the hold must call hammer2_chain_lock_unhold() to relock 275 * and unhold the chain, then unlock normally, or may simply call 276 * hammer2_chain_drop_unhold() (which is safer against deadlocks). 277 */ 278 void 279 hammer2_chain_ref_hold(hammer2_chain_t *chain) 280 { 281 atomic_add_int(&chain->lockcnt, 1); 282 hammer2_chain_ref(chain); 283 } 284 285 /* 286 * Insert the chain in the core rbtree. 287 * 288 * Normal insertions are placed in the live rbtree. Insertion of a deleted 289 * chain is a special case used by the flush code that is placed on the 290 * unstaged deleted list to avoid confusing the live view. 291 */ 292 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 293 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 294 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 295 296 static 297 int 298 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 299 int flags, int generation) 300 { 301 hammer2_chain_t *xchain __debugvar; 302 int error = 0; 303 304 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 305 hammer2_spin_ex(&parent->core.spin); 306 307 /* 308 * Interlocked by spinlock, check for race 309 */ 310 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 311 parent->core.generation != generation) { 312 error = HAMMER2_ERROR_EAGAIN; 313 goto failed; 314 } 315 316 /* 317 * Insert chain 318 */ 319 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 320 KASSERT(xchain == NULL, 321 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 322 chain, xchain, chain->bref.key)); 323 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 324 chain->parent = parent; 325 ++parent->core.chain_count; 326 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 327 328 /* 329 * We have to keep track of the effective live-view blockref count 330 * so the create code knows when to push an indirect block. 331 */ 332 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 333 atomic_add_int(&parent->core.live_count, 1); 334 failed: 335 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 336 hammer2_spin_unex(&parent->core.spin); 337 return error; 338 } 339 340 /* 341 * Drop the caller's reference to the chain. When the ref count drops to 342 * zero this function will try to disassociate the chain from its parent and 343 * deallocate it, then recursely drop the parent using the implied ref 344 * from the chain's chain->parent. 345 * 346 * Nobody should own chain's mutex on the 1->0 transition, unless this drop 347 * races an acquisition by another cpu. Therefore we can loop if we are 348 * unable to acquire the mutex, and refs is unlikely to be 1 unless we again 349 * race against another drop. 350 */ 351 void 352 hammer2_chain_drop(hammer2_chain_t *chain) 353 { 354 u_int refs; 355 356 KKASSERT(chain->refs > 0); 357 358 while (chain) { 359 refs = chain->refs; 360 cpu_ccfence(); 361 KKASSERT(refs > 0); 362 363 if (refs == 1) { 364 if (hammer2_mtx_ex_try(&chain->lock) == 0) 365 chain = hammer2_chain_lastdrop(chain, 0); 366 /* retry the same chain, or chain from lastdrop */ 367 } else { 368 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 369 break; 370 /* retry the same chain */ 371 } 372 cpu_pause(); 373 } 374 } 375 376 /* 377 * Unhold a held and probably not-locked chain, ensure that the data is 378 * dropped on the 1->0 transition of lockcnt by obtaining an exclusive 379 * lock and then simply unlocking the chain. 380 */ 381 void 382 hammer2_chain_unhold(hammer2_chain_t *chain) 383 { 384 u_int lockcnt; 385 int iter = 0; 386 387 for (;;) { 388 lockcnt = chain->lockcnt; 389 cpu_ccfence(); 390 if (lockcnt > 1) { 391 if (atomic_cmpset_int(&chain->lockcnt, 392 lockcnt, lockcnt - 1)) { 393 break; 394 } 395 } else if (hammer2_mtx_ex_try(&chain->lock) == 0) { 396 hammer2_chain_unlock(chain); 397 break; 398 } else { 399 /* 400 * This situation can easily occur on SMP due to 401 * the gap inbetween the 1->0 transition and the 402 * final unlock. We cannot safely block on the 403 * mutex because lockcnt might go above 1. 404 * 405 * XXX Sleep for one tick if it takes too long. 406 */ 407 if (++iter > 1000) { 408 if (iter > 1000 + hz) { 409 kprintf("hammer2: h2race1 %p\n", chain); 410 iter = 1000; 411 } 412 tsleep(&iter, 0, "h2race1", 1); 413 } 414 cpu_pause(); 415 } 416 } 417 } 418 419 void 420 hammer2_chain_drop_unhold(hammer2_chain_t *chain) 421 { 422 hammer2_chain_unhold(chain); 423 hammer2_chain_drop(chain); 424 } 425 426 void 427 hammer2_chain_rehold(hammer2_chain_t *chain) 428 { 429 hammer2_chain_lock(chain, HAMMER2_RESOLVE_SHARED); 430 atomic_add_int(&chain->lockcnt, 1); 431 hammer2_chain_unlock(chain); 432 } 433 434 /* 435 * Handles the (potential) last drop of chain->refs from 1->0. Called with 436 * the mutex exclusively locked, refs == 1, and lockcnt 0. SMP races are 437 * possible against refs and lockcnt. We must dispose of the mutex on chain. 438 * 439 * This function returns an unlocked chain for recursive drop or NULL. It 440 * can return the same chain if it determines it has raced another ref. 441 * 442 * -- 443 * 444 * When two chains need to be recursively dropped we use the chain we 445 * would otherwise free to placehold the additional chain. It's a bit 446 * convoluted but we can't just recurse without potentially blowing out 447 * the kernel stack. 448 * 449 * The chain cannot be freed if it has any children. 450 * The chain cannot be freed if flagged MODIFIED unless we can dispose of it. 451 * The chain cannot be freed if flagged UPDATE unless we can dispose of it. 452 * Any dedup registration can remain intact. 453 * 454 * The core spinlock is allowed to nest child-to-parent (not parent-to-child). 455 */ 456 static 457 hammer2_chain_t * 458 hammer2_chain_lastdrop(hammer2_chain_t *chain, int depth) 459 { 460 hammer2_dev_t *hmp; 461 hammer2_chain_t *parent; 462 hammer2_chain_t *rdrop; 463 464 /* 465 * We need chain's spinlock to interlock the sub-tree test. 466 * We already have chain's mutex, protecting chain->parent. 467 * 468 * Remember that chain->refs can be in flux. 469 */ 470 hammer2_spin_ex(&chain->core.spin); 471 472 if (chain->parent != NULL) { 473 /* 474 * If the chain has a parent the UPDATE bit prevents scrapping 475 * as the chain is needed to properly flush the parent. Try 476 * to complete the 1->0 transition and return NULL. Retry 477 * (return chain) if we are unable to complete the 1->0 478 * transition, else return NULL (nothing more to do). 479 * 480 * If the chain has a parent the MODIFIED bit prevents 481 * scrapping. 482 * 483 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 484 */ 485 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 486 HAMMER2_CHAIN_MODIFIED)) { 487 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 488 hammer2_spin_unex(&chain->core.spin); 489 hammer2_chain_assert_no_data(chain); 490 hammer2_mtx_unlock(&chain->lock); 491 chain = NULL; 492 } else { 493 hammer2_spin_unex(&chain->core.spin); 494 hammer2_mtx_unlock(&chain->lock); 495 } 496 return (chain); 497 } 498 /* spinlock still held */ 499 } else if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME || 500 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP) { 501 /* 502 * Retain the static vchain and fchain. Clear bits that 503 * are not relevant. Do not clear the MODIFIED bit, 504 * and certainly do not put it on the delayed-flush queue. 505 */ 506 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 507 } else { 508 /* 509 * The chain has no parent and can be flagged for destruction. 510 * Since it has no parent, UPDATE can also be cleared. 511 */ 512 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 513 if (chain->flags & HAMMER2_CHAIN_UPDATE) 514 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 515 516 /* 517 * If the chain has children we must propagate the DESTROY 518 * flag downward and rip the disconnected topology apart. 519 * This is accomplished by calling hammer2_flush() on the 520 * chain. 521 * 522 * Any dedup is already handled by the underlying DIO, so 523 * we do not have to specifically flush it here. 524 */ 525 if (chain->core.chain_count) { 526 hammer2_spin_unex(&chain->core.spin); 527 hammer2_flush(chain, HAMMER2_FLUSH_TOP | 528 HAMMER2_FLUSH_ALL); 529 hammer2_mtx_unlock(&chain->lock); 530 531 return(chain); /* retry drop */ 532 } 533 534 /* 535 * Otherwise we can scrap the MODIFIED bit if it is set, 536 * and continue along the freeing path. 537 * 538 * Be sure to clean-out any dedup bits. Without a parent 539 * this chain will no longer be visible to the flush code. 540 * Easy check data_off to avoid the volume root. 541 */ 542 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 543 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 544 atomic_add_long(&hammer2_count_modified_chains, -1); 545 if (chain->pmp) 546 hammer2_pfs_memory_wakeup(chain->pmp, -1); 547 } 548 /* spinlock still held */ 549 } 550 551 /* spinlock still held */ 552 553 /* 554 * If any children exist we must leave the chain intact with refs == 0. 555 * They exist because chains are retained below us which have refs or 556 * may require flushing. 557 * 558 * Retry (return chain) if we fail to transition the refs to 0, else 559 * return NULL indication nothing more to do. 560 * 561 * Chains with children are NOT put on the LRU list. 562 */ 563 if (chain->core.chain_count) { 564 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 565 hammer2_spin_unex(&chain->core.spin); 566 hammer2_chain_assert_no_data(chain); 567 hammer2_mtx_unlock(&chain->lock); 568 chain = NULL; 569 } else { 570 hammer2_spin_unex(&chain->core.spin); 571 hammer2_mtx_unlock(&chain->lock); 572 } 573 return (chain); 574 } 575 /* spinlock still held */ 576 /* no chains left under us */ 577 578 /* 579 * chain->core has no children left so no accessors can get to our 580 * chain from there. Now we have to lock the parent core to interlock 581 * remaining possible accessors that might bump chain's refs before 582 * we can safely drop chain's refs with intent to free the chain. 583 */ 584 hmp = chain->hmp; 585 rdrop = NULL; 586 587 parent = chain->parent; 588 589 /* 590 * WARNING! chain's spin lock is still held here, and other spinlocks 591 * will be acquired and released in the code below. We 592 * cannot be making fancy procedure calls! 593 */ 594 595 /* 596 * Spinlock the parent and try to drop the last ref on chain. 597 * On success determine if we should dispose of the chain 598 * (remove the chain from its parent, etc). 599 * 600 * (normal core locks are top-down recursive but we define 601 * core spinlocks as bottom-up recursive, so this is safe). 602 */ 603 if (parent) { 604 hammer2_spin_ex(&parent->core.spin); 605 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 606 /* 607 * 1->0 transition failed, retry. 608 */ 609 hammer2_spin_unex(&parent->core.spin); 610 hammer2_spin_unex(&chain->core.spin); 611 hammer2_mtx_unlock(&chain->lock); 612 613 return(chain); 614 } 615 616 /* 617 * 1->0 transition successful, parent spin held to prevent 618 * new lookups, chain spinlock held to protect parent field. 619 * Remove chain from the parent. 620 * 621 * If the chain is being removed from the parent's rbtree but 622 * is not blkmapped, we have to adjust live_count downward. If 623 * it is blkmapped then the blockref is retained in the parent 624 * as is its associated live_count. This case can occur when 625 * a chain added to the topology is unable to flush and is 626 * then later deleted. 627 */ 628 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 629 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) && 630 (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) { 631 atomic_add_int(&parent->core.live_count, -1); 632 } 633 RB_REMOVE(hammer2_chain_tree, 634 &parent->core.rbtree, chain); 635 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 636 --parent->core.chain_count; 637 chain->parent = NULL; 638 } 639 640 /* 641 * If our chain was the last chain in the parent's core the 642 * core is now empty and its parent might have to be 643 * re-dropped if it has 0 refs. 644 */ 645 if (parent->core.chain_count == 0) { 646 rdrop = parent; 647 atomic_add_int(&rdrop->refs, 1); 648 /* 649 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 650 rdrop = NULL; 651 */ 652 } 653 hammer2_spin_unex(&parent->core.spin); 654 parent = NULL; /* safety */ 655 /* FALL THROUGH */ 656 } else { 657 /* 658 * No-parent case. 659 */ 660 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 661 /* 662 * 1->0 transition failed, retry. 663 */ 664 hammer2_spin_unex(&parent->core.spin); 665 hammer2_spin_unex(&chain->core.spin); 666 hammer2_mtx_unlock(&chain->lock); 667 668 return(chain); 669 } 670 } 671 672 /* 673 * Successful 1->0 transition, no parent, no children... no way for 674 * anyone to ref this chain any more. We can clean-up and free it. 675 * 676 * We still have the core spinlock, and core's chain_count is 0. 677 * Any parent spinlock is gone. 678 */ 679 hammer2_spin_unex(&chain->core.spin); 680 hammer2_chain_assert_no_data(chain); 681 hammer2_mtx_unlock(&chain->lock); 682 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 683 chain->core.chain_count == 0); 684 685 /* 686 * All locks are gone, no pointers remain to the chain, finish 687 * freeing it. 688 */ 689 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 690 HAMMER2_CHAIN_MODIFIED)) == 0); 691 692 /* 693 * Once chain resources are gone we can use the now dead chain 694 * structure to placehold what might otherwise require a recursive 695 * drop, because we have potentially two things to drop and can only 696 * return one directly. 697 */ 698 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 699 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 700 chain->hmp = NULL; 701 kfree_obj(chain, hmp->mchain); 702 atomic_add_long(&hammer2_chain_allocs, -1); 703 } 704 705 /* 706 * Possible chaining loop when parent re-drop needed. 707 */ 708 return(rdrop); 709 } 710 711 /* 712 * On last lock release. 713 */ 714 static hammer2_io_t * 715 hammer2_chain_drop_data(hammer2_chain_t *chain) 716 { 717 hammer2_io_t *dio; 718 719 if ((dio = chain->dio) != NULL) { 720 chain->dio = NULL; 721 chain->data = NULL; 722 } else { 723 switch(chain->bref.type) { 724 case HAMMER2_BREF_TYPE_VOLUME: 725 case HAMMER2_BREF_TYPE_FREEMAP: 726 break; 727 default: 728 if (chain->data != NULL) { 729 hammer2_spin_unex(&chain->core.spin); 730 panic("chain data not null: " 731 "chain %p bref %016jx.%02x " 732 "refs %d parent %p dio %p data %p", 733 chain, chain->bref.data_off, 734 chain->bref.type, chain->refs, 735 chain->parent, 736 chain->dio, chain->data); 737 } 738 KKASSERT(chain->data == NULL); 739 break; 740 } 741 } 742 return dio; 743 } 744 745 /* 746 * Lock a referenced chain element, acquiring its data with I/O if necessary, 747 * and specify how you would like the data to be resolved. 748 * 749 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 750 * 751 * The lock is allowed to recurse, multiple locking ops will aggregate 752 * the requested resolve types. Once data is assigned it will not be 753 * removed until the last unlock. 754 * 755 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 756 * (typically used to avoid device/logical buffer 757 * aliasing for data) 758 * 759 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 760 * the INITIAL-create state (indirect blocks only). 761 * 762 * Do not resolve data elements for DATA chains. 763 * (typically used to avoid device/logical buffer 764 * aliasing for data) 765 * 766 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 767 * 768 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 769 * it will be locked exclusive. 770 * 771 * HAMMER2_RESOLVE_NONBLOCK- (flag) The chain is locked non-blocking. If 772 * the lock fails, EAGAIN is returned. 773 * 774 * NOTE: Embedded elements (volume header, inodes) are always resolved 775 * regardless. 776 * 777 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 778 * element will instantiate and zero its buffer, and flush it on 779 * release. 780 * 781 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 782 * so as not to instantiate a device buffer, which could alias against 783 * a logical file buffer. However, if ALWAYS is specified the 784 * device buffer will be instantiated anyway. 785 * 786 * NOTE: The return value is always 0 unless NONBLOCK is specified, in which 787 * case it can be either 0 or EAGAIN. 788 * 789 * WARNING! This function blocks on I/O if data needs to be fetched. This 790 * blocking can run concurrent with other compatible lock holders 791 * who do not need data returning. The lock is not upgraded to 792 * exclusive during a data fetch, a separate bit is used to 793 * interlock I/O. However, an exclusive lock holder can still count 794 * on being interlocked against an I/O fetch managed by a shared 795 * lock holder. 796 */ 797 int 798 hammer2_chain_lock(hammer2_chain_t *chain, int how) 799 { 800 KKASSERT(chain->refs > 0); 801 802 if (how & HAMMER2_RESOLVE_NONBLOCK) { 803 /* 804 * We still have to bump lockcnt before acquiring the lock, 805 * even for non-blocking operation, because the unlock code 806 * live-loops on lockcnt == 1 when dropping the last lock. 807 * 808 * If the non-blocking operation fails we have to use an 809 * unhold sequence to undo the mess. 810 * 811 * NOTE: LOCKAGAIN must always succeed without blocking, 812 * even if NONBLOCK is specified. 813 */ 814 atomic_add_int(&chain->lockcnt, 1); 815 if (how & HAMMER2_RESOLVE_SHARED) { 816 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 817 hammer2_mtx_sh_again(&chain->lock); 818 } else { 819 if (hammer2_mtx_sh_try(&chain->lock) != 0) { 820 hammer2_chain_unhold(chain); 821 return EAGAIN; 822 } 823 } 824 } else { 825 if (hammer2_mtx_ex_try(&chain->lock) != 0) { 826 hammer2_chain_unhold(chain); 827 return EAGAIN; 828 } 829 } 830 } else { 831 /* 832 * Get the appropriate lock. If LOCKAGAIN is flagged with 833 * SHARED the caller expects a shared lock to already be 834 * present and we are giving it another ref. This case must 835 * importantly not block if there is a pending exclusive lock 836 * request. 837 */ 838 atomic_add_int(&chain->lockcnt, 1); 839 if (how & HAMMER2_RESOLVE_SHARED) { 840 if (how & HAMMER2_RESOLVE_LOCKAGAIN) { 841 hammer2_mtx_sh_again(&chain->lock); 842 } else { 843 hammer2_mtx_sh(&chain->lock); 844 } 845 } else { 846 hammer2_mtx_ex(&chain->lock); 847 } 848 } 849 850 /* 851 * If we already have a valid data pointer make sure the data is 852 * synchronized to the current cpu, and then no further action is 853 * necessary. 854 */ 855 if (chain->data) { 856 if (chain->dio) 857 hammer2_io_bkvasync(chain->dio); 858 return 0; 859 } 860 861 /* 862 * Do we have to resolve the data? This is generally only 863 * applicable to HAMMER2_BREF_TYPE_DATA which is special-cased. 864 * Other BREF types expects the data to be there. 865 */ 866 switch(how & HAMMER2_RESOLVE_MASK) { 867 case HAMMER2_RESOLVE_NEVER: 868 return 0; 869 case HAMMER2_RESOLVE_MAYBE: 870 if (chain->flags & HAMMER2_CHAIN_INITIAL) 871 return 0; 872 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 873 return 0; 874 #if 0 875 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 876 return 0; 877 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 878 return 0; 879 #endif 880 /* fall through */ 881 case HAMMER2_RESOLVE_ALWAYS: 882 default: 883 break; 884 } 885 886 /* 887 * Caller requires data 888 */ 889 hammer2_chain_load_data(chain); 890 891 return 0; 892 } 893 894 #if 0 895 /* 896 * Lock the chain, retain the hold, and drop the data persistence count. 897 * The data should remain valid because we never transitioned lockcnt 898 * through 0. 899 */ 900 void 901 hammer2_chain_lock_unhold(hammer2_chain_t *chain, int how) 902 { 903 hammer2_chain_lock(chain, how); 904 atomic_add_int(&chain->lockcnt, -1); 905 } 906 907 /* 908 * Downgrade an exclusive chain lock to a shared chain lock. 909 * 910 * NOTE: There is no upgrade equivalent due to the ease of 911 * deadlocks in that direction. 912 */ 913 void 914 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 915 { 916 hammer2_mtx_downgrade(&chain->lock); 917 } 918 #endif 919 920 /* 921 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 922 * may be of any type. 923 * 924 * Once chain->data is set it cannot be disposed of until all locks are 925 * released. 926 * 927 * Make sure the data is synchronized to the current cpu. 928 */ 929 void 930 hammer2_chain_load_data(hammer2_chain_t *chain) 931 { 932 hammer2_blockref_t *bref; 933 hammer2_dev_t *hmp; 934 hammer2_io_t *dio; 935 char *bdata; 936 int error; 937 938 /* 939 * Degenerate case, data already present, or chain has no media 940 * reference to load. 941 */ 942 KKASSERT(chain->lock.mtx_lock & MTX_MASK); 943 if (chain->data) { 944 if (chain->dio) 945 hammer2_io_bkvasync(chain->dio); 946 return; 947 } 948 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 949 return; 950 951 hmp = chain->hmp; 952 KKASSERT(hmp != NULL); 953 954 /* 955 * Gain the IOINPROG bit, interlocked block. 956 */ 957 for (;;) { 958 u_int oflags; 959 u_int nflags; 960 961 oflags = chain->flags; 962 cpu_ccfence(); 963 if (oflags & HAMMER2_CHAIN_IOINPROG) { 964 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 965 tsleep_interlock(&chain->flags, 0); 966 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 967 tsleep(&chain->flags, PINTERLOCKED, 968 "h2iocw", 0); 969 } 970 /* retry */ 971 } else { 972 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 973 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 974 break; 975 } 976 /* retry */ 977 } 978 } 979 980 /* 981 * We own CHAIN_IOINPROG 982 * 983 * Degenerate case if we raced another load. 984 */ 985 if (chain->data) { 986 if (chain->dio) 987 hammer2_io_bkvasync(chain->dio); 988 goto done; 989 } 990 991 /* 992 * We must resolve to a device buffer, either by issuing I/O or 993 * by creating a zero-fill element. We do not mark the buffer 994 * dirty when creating a zero-fill element (the hammer2_chain_modify() 995 * API must still be used to do that). 996 * 997 * The device buffer is variable-sized in powers of 2 down 998 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 999 * chunk always contains buffers of the same size. (XXX) 1000 * 1001 * The minimum physical IO size may be larger than the variable 1002 * block size. 1003 */ 1004 bref = &chain->bref; 1005 1006 /* 1007 * The getblk() optimization can only be used on newly created 1008 * elements if the physical block size matches the request. 1009 */ 1010 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1011 error = hammer2_io_new(hmp, bref->type, 1012 bref->data_off, chain->bytes, 1013 &chain->dio); 1014 } else { 1015 error = hammer2_io_bread(hmp, bref->type, 1016 bref->data_off, chain->bytes, 1017 &chain->dio); 1018 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1019 } 1020 if (error) { 1021 chain->error = HAMMER2_ERROR_EIO; 1022 kprintf("hammer2_chain_load_data: I/O error %016jx: %d\n", 1023 (intmax_t)bref->data_off, error); 1024 hammer2_io_bqrelse(&chain->dio); 1025 goto done; 1026 } 1027 chain->error = 0; 1028 1029 /* 1030 * This isn't perfect and can be ignored on OSs which do not have 1031 * an indication as to whether a buffer is coming from cache or 1032 * if I/O was actually issued for the read. TESTEDGOOD will work 1033 * pretty well without the B_IOISSUED logic because chains are 1034 * cached, but in that situation (without B_IOISSUED) it will not 1035 * detect whether a re-read via I/O is corrupted verses the original 1036 * read. 1037 * 1038 * We can't re-run the CRC on every fresh lock. That would be 1039 * insanely expensive. 1040 * 1041 * If the underlying kernel buffer covers the entire chain we can 1042 * use the B_IOISSUED indication to determine if we have to re-run 1043 * the CRC on chain data for chains that managed to stay cached 1044 * across the kernel disposal of the original buffer. 1045 */ 1046 if ((dio = chain->dio) != NULL && dio->bp) { 1047 //struct m_buf *bp = dio->bp; 1048 1049 if (dio->psize == chain->bytes //&& 1050 /*(bp->b_flags & B_IOISSUED)*/) { 1051 atomic_clear_int(&chain->flags, 1052 HAMMER2_CHAIN_TESTEDGOOD); 1053 //bp->b_flags &= ~B_IOISSUED; 1054 } 1055 } 1056 1057 /* 1058 * NOTE: A locked chain's data cannot be modified without first 1059 * calling hammer2_chain_modify(). 1060 */ 1061 1062 /* 1063 * NOTE: hammer2_io_data() call issues bkvasync() 1064 */ 1065 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 1066 1067 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1068 /* 1069 * Clear INITIAL. In this case we used io_new() and the 1070 * buffer has been zero'd and marked dirty. 1071 * 1072 * CHAIN_MODIFIED has not been set yet, and we leave it 1073 * that way for now. Set a temporary CHAIN_NOTTESTED flag 1074 * to prevent hammer2_chain_testcheck() from trying to match 1075 * a check code that has not yet been generated. This bit 1076 * should NOT end up on the actual media. 1077 */ 1078 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1079 atomic_set_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 1080 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 1081 /* 1082 * check data not currently synchronized due to 1083 * modification. XXX assumes data stays in the buffer 1084 * cache, which might not be true (need biodep on flush 1085 * to calculate crc? or simple crc?). 1086 */ 1087 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 1088 if (hammer2_chain_testcheck(chain, bdata) == 0) { 1089 chain->error = HAMMER2_ERROR_CHECK; 1090 } else { 1091 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1092 } 1093 } 1094 1095 /* 1096 * Setup the data pointer by pointing it into the buffer. 1097 * WARNING! Other threads can start using the data the instant we 1098 * set chain->data non-NULL. 1099 */ 1100 switch (bref->type) { 1101 case HAMMER2_BREF_TYPE_VOLUME: 1102 case HAMMER2_BREF_TYPE_FREEMAP: 1103 panic("hammer2_chain_load_data: unresolved volume header"); 1104 break; 1105 case HAMMER2_BREF_TYPE_DIRENT: 1106 KKASSERT(chain->bytes != 0); 1107 /* fall through */ 1108 case HAMMER2_BREF_TYPE_INODE: 1109 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1110 case HAMMER2_BREF_TYPE_INDIRECT: 1111 case HAMMER2_BREF_TYPE_DATA: 1112 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1113 default: 1114 /* 1115 * Point data at the device buffer and leave dio intact. 1116 */ 1117 chain->data = (void *)bdata; 1118 break; 1119 } 1120 1121 /* 1122 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1123 */ 1124 done: 1125 for (;;) { 1126 u_int oflags; 1127 u_int nflags; 1128 1129 oflags = chain->flags; 1130 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1131 HAMMER2_CHAIN_IOSIGNAL); 1132 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1133 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1134 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1135 wakeup(&chain->flags); 1136 break; 1137 } 1138 } 1139 } 1140 1141 /* 1142 * Unlock and deref a chain element. 1143 * 1144 * Remember that the presence of children under chain prevent the chain's 1145 * destruction but do not add additional references, so the dio will still 1146 * be dropped. 1147 */ 1148 void 1149 hammer2_chain_unlock(hammer2_chain_t *chain) 1150 { 1151 hammer2_io_t *dio; 1152 u_int lockcnt; 1153 int iter = 0; 1154 1155 /* 1156 * If multiple locks are present (or being attempted) on this 1157 * particular chain we can just unlock, drop refs, and return. 1158 * 1159 * Otherwise fall-through on the 1->0 transition. 1160 */ 1161 for (;;) { 1162 lockcnt = chain->lockcnt; 1163 KKASSERT(lockcnt > 0); 1164 cpu_ccfence(); 1165 if (lockcnt > 1) { 1166 if (atomic_cmpset_int(&chain->lockcnt, 1167 lockcnt, lockcnt - 1)) { 1168 hammer2_mtx_unlock(&chain->lock); 1169 return; 1170 } 1171 } else if (hammer2_mtx_upgrade_try(&chain->lock) == 0) { 1172 /* while holding the mutex exclusively */ 1173 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1174 break; 1175 } else { 1176 /* 1177 * This situation can easily occur on SMP due to 1178 * the gap inbetween the 1->0 transition and the 1179 * final unlock. We cannot safely block on the 1180 * mutex because lockcnt might go above 1. 1181 * 1182 * XXX Sleep for one tick if it takes too long. 1183 */ 1184 if (++iter > 1000) { 1185 if (iter > 1000 + hz) { 1186 kprintf("hammer2: h2race2 %p\n", chain); 1187 iter = 1000; 1188 } 1189 tsleep(&iter, 0, "h2race2", 1); 1190 } 1191 cpu_pause(); 1192 } 1193 /* retry */ 1194 } 1195 1196 /* 1197 * Last unlock / mutex upgraded to exclusive. Drop the data 1198 * reference. 1199 */ 1200 dio = hammer2_chain_drop_data(chain); 1201 if (dio) 1202 hammer2_io_bqrelse(&dio); 1203 hammer2_mtx_unlock(&chain->lock); 1204 } 1205 1206 #if 0 1207 /* 1208 * Unlock and hold chain data intact 1209 */ 1210 void 1211 hammer2_chain_unlock_hold(hammer2_chain_t *chain) 1212 { 1213 atomic_add_int(&chain->lockcnt, 1); 1214 hammer2_chain_unlock(chain); 1215 } 1216 #endif 1217 1218 /* 1219 * Helper to obtain the blockref[] array base and count for a chain. 1220 * 1221 * XXX Not widely used yet, various use cases need to be validated and 1222 * converted to use this function. 1223 */ 1224 static 1225 hammer2_blockref_t * 1226 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1227 { 1228 hammer2_blockref_t *base; 1229 int count; 1230 1231 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1232 base = NULL; 1233 1234 switch(parent->bref.type) { 1235 case HAMMER2_BREF_TYPE_INODE: 1236 count = HAMMER2_SET_COUNT; 1237 break; 1238 case HAMMER2_BREF_TYPE_INDIRECT: 1239 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1240 count = parent->bytes / sizeof(hammer2_blockref_t); 1241 break; 1242 case HAMMER2_BREF_TYPE_VOLUME: 1243 count = HAMMER2_SET_COUNT; 1244 break; 1245 case HAMMER2_BREF_TYPE_FREEMAP: 1246 count = HAMMER2_SET_COUNT; 1247 break; 1248 default: 1249 panic("hammer2_chain_base_and_count: " 1250 "unrecognized blockref type: %d", 1251 parent->bref.type); 1252 count = 0; 1253 break; 1254 } 1255 } else { 1256 switch(parent->bref.type) { 1257 case HAMMER2_BREF_TYPE_INODE: 1258 base = &parent->data->ipdata.u.blockset.blockref[0]; 1259 count = HAMMER2_SET_COUNT; 1260 break; 1261 case HAMMER2_BREF_TYPE_INDIRECT: 1262 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1263 base = &parent->data->npdata[0]; 1264 count = parent->bytes / sizeof(hammer2_blockref_t); 1265 break; 1266 case HAMMER2_BREF_TYPE_VOLUME: 1267 base = &parent->data->voldata. 1268 sroot_blockset.blockref[0]; 1269 count = HAMMER2_SET_COUNT; 1270 break; 1271 case HAMMER2_BREF_TYPE_FREEMAP: 1272 base = &parent->data->blkset.blockref[0]; 1273 count = HAMMER2_SET_COUNT; 1274 break; 1275 default: 1276 panic("hammer2_chain_base_and_count: " 1277 "unrecognized blockref type: %d", 1278 parent->bref.type); 1279 base = NULL; 1280 count = 0; 1281 break; 1282 } 1283 } 1284 *countp = count; 1285 1286 return base; 1287 } 1288 1289 /* 1290 * This counts the number of live blockrefs in a block array and 1291 * also calculates the point at which all remaining blockrefs are empty. 1292 * This routine can only be called on a live chain. 1293 * 1294 * Caller holds the chain locked, but possibly with a shared lock. We 1295 * must use an exclusive spinlock to prevent corruption. 1296 * 1297 * NOTE: Flag is not set until after the count is complete, allowing 1298 * callers to test the flag without holding the spinlock. 1299 * 1300 * NOTE: If base is NULL the related chain is still in the INITIAL 1301 * state and there are no blockrefs to count. 1302 * 1303 * NOTE: live_count may already have some counts accumulated due to 1304 * creation and deletion and could even be initially negative. 1305 */ 1306 void 1307 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1308 hammer2_blockref_t *base, int count) 1309 { 1310 hammer2_spin_ex(&chain->core.spin); 1311 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1312 if (base) { 1313 while (--count >= 0) { 1314 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1315 break; 1316 } 1317 chain->core.live_zero = count + 1; 1318 while (count >= 0) { 1319 if (base[count].type != HAMMER2_BREF_TYPE_EMPTY) 1320 atomic_add_int(&chain->core.live_count, 1321 1); 1322 --count; 1323 } 1324 } else { 1325 chain->core.live_zero = 0; 1326 } 1327 /* else do not modify live_count */ 1328 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1329 } 1330 hammer2_spin_unex(&chain->core.spin); 1331 } 1332 1333 /* 1334 * Resize the chain's physical storage allocation in-place. This function does 1335 * not usually adjust the data pointer and must be followed by (typically) a 1336 * hammer2_chain_modify() call to copy any old data over and adjust the 1337 * data pointer. 1338 * 1339 * Chains can be resized smaller without reallocating the storage. Resizing 1340 * larger will reallocate the storage. Excess or prior storage is reclaimed 1341 * asynchronously at a later time. 1342 * 1343 * An nradix value of 0 is special-cased to mean that the storage should 1344 * be disassociated, that is the chain is being resized to 0 bytes (not 1 1345 * byte). 1346 * 1347 * Must be passed an exclusively locked parent and chain. 1348 * 1349 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1350 * to avoid instantiating a device buffer that conflicts with the vnode data 1351 * buffer. However, because H2 can compress or encrypt data, the chain may 1352 * have a dio assigned to it in those situations, and they do not conflict. 1353 * 1354 * XXX return error if cannot resize. 1355 */ 1356 int 1357 hammer2_chain_resize(hammer2_chain_t *chain, 1358 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1359 int nradix, int flags) 1360 { 1361 hammer2_dev_t *hmp; 1362 size_t obytes; 1363 size_t nbytes; 1364 int error; 1365 1366 hmp = chain->hmp; 1367 1368 /* 1369 * Only data and indirect blocks can be resized for now. 1370 * (The volu root, inodes, and freemap elements use a fixed size). 1371 */ 1372 KKASSERT(chain != &hmp->vchain); 1373 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1374 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1375 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1376 1377 /* 1378 * Nothing to do if the element is already the proper size 1379 */ 1380 obytes = chain->bytes; 1381 nbytes = (nradix) ? (1U << nradix) : 0; 1382 if (obytes == nbytes) 1383 return (chain->error); 1384 1385 /* 1386 * Make sure the old data is instantiated so we can copy it. If this 1387 * is a data block, the device data may be superfluous since the data 1388 * might be in a logical block, but compressed or encrypted data is 1389 * another matter. 1390 * 1391 * NOTE: The modify will set BLKMAPUPD for us if BLKMAPPED is set. 1392 */ 1393 error = hammer2_chain_modify(chain, mtid, dedup_off, 0); 1394 if (error) 1395 return error; 1396 1397 /* 1398 * Reallocate the block, even if making it smaller (because different 1399 * block sizes may be in different regions). 1400 * 1401 * NOTE: Operation does not copy the data and may only be used 1402 * to resize data blocks in-place, or directory entry blocks 1403 * which are about to be modified in some manner. 1404 */ 1405 error = hammer2_freemap_alloc(chain, nbytes); 1406 if (error) 1407 return error; 1408 1409 chain->bytes = nbytes; 1410 1411 /* 1412 * We don't want the followup chain_modify() to try to copy data 1413 * from the old (wrong-sized) buffer. It won't know how much to 1414 * copy. This case should only occur during writes when the 1415 * originator already has the data to write in-hand. 1416 */ 1417 if (chain->dio) { 1418 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1419 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT); 1420 hammer2_io_brelse(&chain->dio); 1421 chain->data = NULL; 1422 } 1423 return (chain->error); 1424 } 1425 1426 /* 1427 * Set the chain modified so its data can be changed by the caller, or 1428 * install deduplicated data. The caller must call this routine for each 1429 * set of modifications it makes, even if the chain is already flagged 1430 * MODIFIED. 1431 * 1432 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1433 * is a CLC (cluster level change) field and is not updated by parent 1434 * propagation during a flush. 1435 * 1436 * Returns an appropriate HAMMER2_ERROR_* code, which will generally reflect 1437 * chain->error except for HAMMER2_ERROR_ENOSPC. If the allocation fails 1438 * due to no space available, HAMMER2_ERROR_ENOSPC is returned and the chain 1439 * remains unmodified with its old data ref intact and chain->error 1440 * unchanged. 1441 * 1442 * Dedup Handling 1443 * 1444 * If the DEDUPABLE flag is set in the chain the storage must be reallocated 1445 * even if the chain is still flagged MODIFIED. In this case the chain's 1446 * DEDUPABLE flag will be cleared once the new storage has been assigned. 1447 * 1448 * If the caller passes a non-zero dedup_off we will use it to assign the 1449 * new storage. The MODIFIED flag will be *CLEARED* in this case, and 1450 * DEDUPABLE will be set (NOTE: the UPDATE flag is always set). The caller 1451 * must not modify the data content upon return. 1452 */ 1453 int 1454 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1455 hammer2_off_t dedup_off, int flags) 1456 { 1457 hammer2_dev_t *hmp; 1458 hammer2_io_t *dio; 1459 int error; 1460 int wasinitial; 1461 int setmodified; 1462 int setupdate; 1463 int newmod; 1464 char *bdata; 1465 1466 hmp = chain->hmp; 1467 KKASSERT(chain->lock.mtx_lock & MTX_EXCLUSIVE); 1468 1469 /* 1470 * Data is not optional for freemap chains (we must always be sure 1471 * to copy the data on COW storage allocations). 1472 */ 1473 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1474 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1475 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1476 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1477 } 1478 1479 /* 1480 * Data must be resolved if already assigned, unless explicitly 1481 * flagged otherwise. If we cannot safety load the data the 1482 * modification fails and we return early. 1483 */ 1484 if (chain->data == NULL && chain->bytes != 0 && 1485 (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1486 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1487 hammer2_chain_load_data(chain); 1488 if (chain->error) 1489 return (chain->error); 1490 } 1491 error = 0; 1492 1493 /* 1494 * Set MODIFIED to indicate that the chain has been modified. A new 1495 * allocation is required when modifying a chain. 1496 * 1497 * Set UPDATE to ensure that the blockref is updated in the parent. 1498 * 1499 * If MODIFIED is already set determine if we can reuse the assigned 1500 * data block or if we need a new data block. 1501 */ 1502 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1503 /* 1504 * Must set modified bit. 1505 */ 1506 atomic_add_long(&hammer2_count_modified_chains, 1); 1507 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1508 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1509 setmodified = 1; 1510 1511 /* 1512 * We may be able to avoid a copy-on-write if the chain's 1513 * check mode is set to NONE and the chain's current 1514 * modify_tid is beyond the last explicit snapshot tid. 1515 * 1516 * This implements HAMMER2's overwrite-in-place feature. 1517 * 1518 * NOTE! This data-block cannot be used as a de-duplication 1519 * source when the check mode is set to NONE. 1520 */ 1521 if ((chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1522 chain->bref.type == HAMMER2_BREF_TYPE_DIRENT) && 1523 (chain->flags & HAMMER2_CHAIN_INITIAL) == 0 && 1524 (chain->flags & HAMMER2_CHAIN_DEDUPABLE) == 0 && 1525 HAMMER2_DEC_CHECK(chain->bref.methods) == 1526 HAMMER2_CHECK_NONE && 1527 chain->pmp && 1528 chain->bref.modify_tid > 1529 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1530 /* 1531 * Sector overwrite allowed. 1532 */ 1533 newmod = 0; 1534 } else if ((hmp->hflags & HMNT2_EMERG) && 1535 chain->pmp && 1536 chain->bref.modify_tid > 1537 chain->pmp->iroot->meta.pfs_lsnap_tid) { 1538 /* 1539 * If in emergency delete mode then do a modify-in- 1540 * place on any chain type belonging to the PFS as 1541 * long as it doesn't mess up a snapshot. We might 1542 * be forced to do this anyway a little further down 1543 * in the code if the allocation fails. 1544 * 1545 * Also note that in emergency mode, these modify-in- 1546 * place operations are NOT SAFE. A storage failure, 1547 * power failure, or panic can corrupt the filesystem. 1548 */ 1549 newmod = 0; 1550 } else { 1551 /* 1552 * Sector overwrite not allowed, must copy-on-write. 1553 */ 1554 newmod = 1; 1555 } 1556 } else if (chain->flags & HAMMER2_CHAIN_DEDUPABLE) { 1557 /* 1558 * If the modified chain was registered for dedup we need 1559 * a new allocation. This only happens for delayed-flush 1560 * chains (i.e. which run through the front-end buffer 1561 * cache). 1562 */ 1563 newmod = 1; 1564 setmodified = 0; 1565 } else { 1566 /* 1567 * Already flagged modified, no new allocation is needed. 1568 */ 1569 newmod = 0; 1570 setmodified = 0; 1571 } 1572 1573 /* 1574 * Flag parent update required. 1575 */ 1576 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1577 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1578 setupdate = 1; 1579 } else { 1580 setupdate = 0; 1581 } 1582 1583 /* 1584 * The XOP code returns held but unlocked focus chains. This 1585 * prevents the chain from being destroyed but does not prevent 1586 * it from being modified. diolk is used to interlock modifications 1587 * against XOP frontend accesses to the focus. 1588 * 1589 * This allows us to theoretically avoid deadlocking the frontend 1590 * if one of the backends lock up by not formally locking the 1591 * focused chain in the frontend. In addition, the synchronization 1592 * code relies on this mechanism to avoid deadlocking concurrent 1593 * synchronization threads. 1594 */ 1595 lockmgr(&chain->diolk, LK_EXCLUSIVE); 1596 1597 /* 1598 * The modification or re-modification requires an allocation and 1599 * possible COW. If an error occurs, the previous content and data 1600 * reference is retained and the modification fails. 1601 * 1602 * If dedup_off is non-zero, the caller is requesting a deduplication 1603 * rather than a modification. The MODIFIED bit is not set and the 1604 * data offset is set to the deduplication offset. The data cannot 1605 * be modified. 1606 * 1607 * NOTE: The dedup offset is allowed to be in a partially free state 1608 * and we must be sure to reset it to a fully allocated state 1609 * to force two bulkfree passes to free it again. 1610 * 1611 * NOTE: Only applicable when chain->bytes != 0. 1612 * 1613 * XXX can a chain already be marked MODIFIED without a data 1614 * assignment? If not, assert here instead of testing the case. 1615 */ 1616 if (chain != &hmp->vchain && chain != &hmp->fchain && 1617 chain->bytes) { 1618 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1619 newmod 1620 ) { 1621 /* 1622 * NOTE: We do not have to remove the dedup 1623 * registration because the area is still 1624 * allocated and the underlying DIO will 1625 * still be flushed. 1626 */ 1627 if (dedup_off) { 1628 chain->bref.data_off = dedup_off; 1629 if ((int)(dedup_off & HAMMER2_OFF_MASK_RADIX)) 1630 chain->bytes = 1 << 1631 (int)(dedup_off & 1632 HAMMER2_OFF_MASK_RADIX); 1633 else 1634 chain->bytes = 0; 1635 chain->error = 0; 1636 atomic_clear_int(&chain->flags, 1637 HAMMER2_CHAIN_MODIFIED); 1638 atomic_add_long(&hammer2_count_modified_chains, 1639 -1); 1640 if (chain->pmp) { 1641 hammer2_pfs_memory_wakeup( 1642 chain->pmp, -1); 1643 } 1644 hammer2_freemap_adjust(hmp, &chain->bref, 1645 HAMMER2_FREEMAP_DORECOVER); 1646 atomic_set_int(&chain->flags, 1647 HAMMER2_CHAIN_DEDUPABLE); 1648 } else { 1649 error = hammer2_freemap_alloc(chain, 1650 chain->bytes); 1651 atomic_clear_int(&chain->flags, 1652 HAMMER2_CHAIN_DEDUPABLE); 1653 1654 /* 1655 * If we are unable to allocate a new block 1656 * but we are in emergency mode, issue a 1657 * warning to the console and reuse the same 1658 * block. 1659 * 1660 * We behave as if the allocation were 1661 * successful. 1662 * 1663 * THIS IS IMPORTANT: These modifications 1664 * are virtually guaranteed to corrupt any 1665 * snapshots related to this filesystem. 1666 */ 1667 if (error && (hmp->hflags & HMNT2_EMERG)) { 1668 error = 0; 1669 chain->bref.flags |= 1670 HAMMER2_BREF_FLAG_EMERG_MIP; 1671 1672 krateprintf(&krate_h2em, 1673 "hammer2: Emergency Mode WARNING: " 1674 "Operation will likely corrupt " 1675 "related snapshot: " 1676 "%016jx.%02x key=%016jx\n", 1677 chain->bref.data_off, 1678 chain->bref.type, 1679 chain->bref.key); 1680 } else if (error == 0) { 1681 chain->bref.flags &= 1682 ~HAMMER2_BREF_FLAG_EMERG_MIP; 1683 } 1684 } 1685 } 1686 } 1687 1688 /* 1689 * Stop here if error. We have to undo any flag bits we might 1690 * have set above. 1691 */ 1692 if (error) { 1693 if (setmodified) { 1694 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1695 atomic_add_long(&hammer2_count_modified_chains, -1); 1696 if (chain->pmp) 1697 hammer2_pfs_memory_wakeup(chain->pmp, -1); 1698 } 1699 if (setupdate) { 1700 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1701 } 1702 lockmgr(&chain->diolk, LK_RELEASE); 1703 1704 return error; 1705 } 1706 1707 /* 1708 * Update mirror_tid and modify_tid. modify_tid is only updated 1709 * if not passed as zero (during flushes, parent propagation passes 1710 * the value 0). 1711 * 1712 * NOTE: chain->pmp could be the device spmp. 1713 */ 1714 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1715 if (mtid) 1716 chain->bref.modify_tid = mtid; 1717 1718 /* 1719 * Set BLKMAPUPD to tell the flush code that an existing blockmap entry 1720 * requires updating as well as to tell the delete code that the 1721 * chain's blockref might not exactly match (in terms of physical size 1722 * or block offset) the one in the parent's blocktable. The base key 1723 * of course will still match. 1724 */ 1725 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) 1726 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD); 1727 1728 /* 1729 * Short-cut data block handling when the caller does not need an 1730 * actual data reference to (aka OPTDATA), as long as the chain does 1731 * not already have a data pointer to the data and no de-duplication 1732 * occurred. 1733 * 1734 * This generally means that the modifications are being done via the 1735 * logical buffer cache. 1736 * 1737 * NOTE: If deduplication occurred we have to run through the data 1738 * stuff to clear INITIAL, and the caller will likely want to 1739 * assign the check code anyway. Leaving INITIAL set on a 1740 * dedup can be deadly (it can cause the block to be zero'd!). 1741 * 1742 * This code also handles bytes == 0 (most dirents). 1743 */ 1744 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1745 (flags & HAMMER2_MODIFY_OPTDATA) && 1746 chain->data == NULL) { 1747 if (dedup_off == 0) { 1748 KKASSERT(chain->dio == NULL); 1749 goto skip2; 1750 } 1751 } 1752 1753 /* 1754 * Clearing the INITIAL flag (for indirect blocks) indicates that 1755 * we've processed the uninitialized storage allocation. 1756 * 1757 * If this flag is already clear we are likely in a copy-on-write 1758 * situation but we have to be sure NOT to bzero the storage if 1759 * no data is present. 1760 * 1761 * Clearing of NOTTESTED is allowed if the MODIFIED bit is set, 1762 */ 1763 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1764 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1765 wasinitial = 1; 1766 } else { 1767 wasinitial = 0; 1768 } 1769 1770 /* 1771 * Instantiate data buffer and possibly execute COW operation 1772 */ 1773 switch(chain->bref.type) { 1774 case HAMMER2_BREF_TYPE_VOLUME: 1775 case HAMMER2_BREF_TYPE_FREEMAP: 1776 /* 1777 * The data is embedded, no copy-on-write operation is 1778 * needed. 1779 */ 1780 KKASSERT(chain->dio == NULL); 1781 break; 1782 case HAMMER2_BREF_TYPE_DIRENT: 1783 /* 1784 * The data might be fully embedded. 1785 */ 1786 if (chain->bytes == 0) { 1787 KKASSERT(chain->dio == NULL); 1788 break; 1789 } 1790 /* fall through */ 1791 case HAMMER2_BREF_TYPE_INODE: 1792 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1793 case HAMMER2_BREF_TYPE_DATA: 1794 case HAMMER2_BREF_TYPE_INDIRECT: 1795 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1796 /* 1797 * Perform the copy-on-write operation 1798 * 1799 * zero-fill or copy-on-write depending on whether 1800 * chain->data exists or not and set the dirty state for 1801 * the new buffer. hammer2_io_new() will handle the 1802 * zero-fill. 1803 * 1804 * If a dedup_off was supplied this is an existing block 1805 * and no COW, copy, or further modification is required. 1806 */ 1807 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1808 1809 if (wasinitial && dedup_off == 0) { 1810 error = hammer2_io_new(hmp, chain->bref.type, 1811 chain->bref.data_off, 1812 chain->bytes, &dio); 1813 } else { 1814 error = hammer2_io_bread(hmp, chain->bref.type, 1815 chain->bref.data_off, 1816 chain->bytes, &dio); 1817 } 1818 hammer2_adjreadcounter(chain->bref.type, chain->bytes); 1819 1820 /* 1821 * If an I/O error occurs make sure callers cannot accidently 1822 * modify the old buffer's contents and corrupt the filesystem. 1823 * 1824 * NOTE: hammer2_io_data() call issues bkvasync() 1825 */ 1826 if (error) { 1827 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1828 hmp); 1829 chain->error = HAMMER2_ERROR_EIO; 1830 hammer2_io_brelse(&dio); 1831 hammer2_io_brelse(&chain->dio); 1832 chain->data = NULL; 1833 break; 1834 } 1835 chain->error = 0; 1836 bdata = hammer2_io_data(dio, chain->bref.data_off); 1837 1838 if (chain->data) { 1839 /* 1840 * COW (unless a dedup). 1841 */ 1842 KKASSERT(chain->dio != NULL); 1843 if (chain->data != (void *)bdata && dedup_off == 0) { 1844 bcopy(chain->data, bdata, chain->bytes); 1845 } 1846 } else if (wasinitial == 0 && dedup_off == 0) { 1847 /* 1848 * We have a problem. We were asked to COW but 1849 * we don't have any data to COW with! 1850 */ 1851 panic("hammer2_chain_modify: having a COW %p\n", 1852 chain); 1853 } 1854 1855 /* 1856 * Retire the old buffer, replace with the new. Dirty or 1857 * redirty the new buffer. 1858 * 1859 * WARNING! The system buffer cache may have already flushed 1860 * the buffer, so we must be sure to [re]dirty it 1861 * for further modification. 1862 * 1863 * If dedup_off was supplied, the caller is not 1864 * expected to make any further modification to the 1865 * buffer. 1866 * 1867 * WARNING! hammer2_get_gdata() assumes dio never transitions 1868 * through NULL in order to optimize away unnecessary 1869 * diolk operations. 1870 */ 1871 { 1872 hammer2_io_t *tio; 1873 1874 if ((tio = chain->dio) != NULL) 1875 hammer2_io_bqrelse(&tio); 1876 chain->data = (void *)bdata; 1877 chain->dio = dio; 1878 if (dedup_off == 0) 1879 hammer2_io_setdirty(dio); 1880 } 1881 break; 1882 default: 1883 panic("hammer2_chain_modify: illegal non-embedded type %d", 1884 chain->bref.type); 1885 break; 1886 1887 } 1888 skip2: 1889 /* 1890 * setflush on parent indicating that the parent must recurse down 1891 * to us. Do not call on chain itself which might already have it 1892 * set. 1893 */ 1894 if (chain->parent) 1895 hammer2_chain_setflush(chain->parent); 1896 lockmgr(&chain->diolk, LK_RELEASE); 1897 1898 return (chain->error); 1899 } 1900 1901 /* 1902 * Modify the chain associated with an inode. 1903 */ 1904 int 1905 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1906 hammer2_tid_t mtid, int flags) 1907 { 1908 int error; 1909 1910 hammer2_inode_modify(ip); 1911 error = hammer2_chain_modify(chain, mtid, 0, flags); 1912 1913 return error; 1914 } 1915 1916 /* 1917 * This function returns the chain at the nearest key within the specified 1918 * range. The returned chain will be referenced but not locked. 1919 * 1920 * This function will recurse through chain->rbtree as necessary and will 1921 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1922 * the iteration value is less than the current value of *key_nextp. 1923 * 1924 * The caller should use (*key_nextp) to calculate the actual range of 1925 * the returned element, which will be (key_beg to *key_nextp - 1), because 1926 * there might be another element which is superior to the returned element 1927 * and overlaps it. 1928 * 1929 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1930 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1931 * it will wind up being (key_end + 1). 1932 * 1933 * WARNING! Must be called with child's spinlock held. Spinlock remains 1934 * held through the operation. 1935 */ 1936 struct hammer2_chain_find_info { 1937 hammer2_chain_t *best; 1938 hammer2_key_t key_beg; 1939 hammer2_key_t key_end; 1940 hammer2_key_t key_next; 1941 }; 1942 1943 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1944 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1945 1946 static 1947 hammer2_chain_t * 1948 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1949 hammer2_key_t key_beg, hammer2_key_t key_end) 1950 { 1951 struct hammer2_chain_find_info info; 1952 1953 info.best = NULL; 1954 info.key_beg = key_beg; 1955 info.key_end = key_end; 1956 info.key_next = *key_nextp; 1957 1958 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1959 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1960 &info); 1961 *key_nextp = info.key_next; 1962 #if 0 1963 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1964 parent, key_beg, key_end, *key_nextp); 1965 #endif 1966 1967 return (info.best); 1968 } 1969 1970 static 1971 int 1972 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1973 { 1974 struct hammer2_chain_find_info *info = data; 1975 hammer2_key_t child_beg; 1976 hammer2_key_t child_end; 1977 1978 child_beg = child->bref.key; 1979 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1980 1981 if (child_end < info->key_beg) 1982 return(-1); 1983 if (child_beg > info->key_end) 1984 return(1); 1985 return(0); 1986 } 1987 1988 static 1989 int 1990 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1991 { 1992 struct hammer2_chain_find_info *info = data; 1993 hammer2_chain_t *best; 1994 hammer2_key_t child_end; 1995 1996 if ((best = info->best) == NULL) { 1997 /* 1998 * No previous best. Assign best 1999 */ 2000 info->best = child; 2001 } else if (best->bref.key <= info->key_beg && 2002 child->bref.key <= info->key_beg) { 2003 /* 2004 * Illegal overlap. 2005 */ 2006 KKASSERT(0); 2007 /*info->best = child;*/ 2008 } else if (child->bref.key < best->bref.key) { 2009 /* 2010 * Child has a nearer key and best is not flush with key_beg. 2011 * Set best to child. Truncate key_next to the old best key. 2012 */ 2013 info->best = child; 2014 if (info->key_next > best->bref.key || info->key_next == 0) 2015 info->key_next = best->bref.key; 2016 } else if (child->bref.key == best->bref.key) { 2017 /* 2018 * If our current best is flush with the child then this 2019 * is an illegal overlap. 2020 * 2021 * key_next will automatically be limited to the smaller of 2022 * the two end-points. 2023 */ 2024 KKASSERT(0); 2025 info->best = child; 2026 } else { 2027 /* 2028 * Keep the current best but truncate key_next to the child's 2029 * base. 2030 * 2031 * key_next will also automatically be limited to the smaller 2032 * of the two end-points (probably not necessary for this case 2033 * but we do it anyway). 2034 */ 2035 if (info->key_next > child->bref.key || info->key_next == 0) 2036 info->key_next = child->bref.key; 2037 } 2038 2039 /* 2040 * Always truncate key_next based on child's end-of-range. 2041 */ 2042 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 2043 if (child_end && (info->key_next > child_end || info->key_next == 0)) 2044 info->key_next = child_end; 2045 2046 return(0); 2047 } 2048 2049 /* 2050 * Retrieve the specified chain from a media blockref, creating the 2051 * in-memory chain structure which reflects it. The returned chain is 2052 * held and locked according to (how) (HAMMER2_RESOLVE_*). The caller must 2053 * handle crc-checks and so forth, and should check chain->error before 2054 * assuming that the data is good. 2055 * 2056 * To handle insertion races pass the INSERT_RACE flag along with the 2057 * generation number of the core. NULL will be returned if the generation 2058 * number changes before we have a chance to insert the chain. Insert 2059 * races can occur because the parent might be held shared. 2060 * 2061 * Caller must hold the parent locked shared or exclusive since we may 2062 * need the parent's bref array to find our block. 2063 * 2064 * WARNING! chain->pmp is always set to NULL for any chain representing 2065 * part of the super-root topology. 2066 */ 2067 hammer2_chain_t * 2068 hammer2_chain_get(hammer2_chain_t *parent, int generation, 2069 hammer2_blockref_t *bref, int how) 2070 { 2071 hammer2_dev_t *hmp = parent->hmp; 2072 hammer2_chain_t *chain; 2073 int error; 2074 2075 /* 2076 * Allocate a chain structure representing the existing media 2077 * entry. Resulting chain has one ref and is not locked. 2078 */ 2079 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 2080 chain = hammer2_chain_alloc(hmp, NULL, bref); 2081 else 2082 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 2083 /* ref'd chain returned */ 2084 2085 /* 2086 * Flag that the chain is in the parent's blockmap so delete/flush 2087 * knows what to do with it. 2088 */ 2089 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 2090 2091 /* 2092 * chain must be locked to avoid unexpected ripouts 2093 */ 2094 hammer2_chain_lock(chain, how); 2095 2096 /* 2097 * Link the chain into its parent. A spinlock is required to safely 2098 * access the RBTREE, and it is possible to collide with another 2099 * hammer2_chain_get() operation because the caller might only hold 2100 * a shared lock on the parent. 2101 * 2102 * NOTE: Get races can occur quite often when we distribute 2103 * asynchronous read-aheads across multiple threads. 2104 */ 2105 KKASSERT(parent->refs > 0); 2106 error = hammer2_chain_insert(parent, chain, 2107 HAMMER2_CHAIN_INSERT_SPIN | 2108 HAMMER2_CHAIN_INSERT_RACE, 2109 generation); 2110 if (error) { 2111 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 2112 /*kprintf("chain %p get race\n", chain);*/ 2113 hammer2_chain_unlock(chain); 2114 hammer2_chain_drop(chain); 2115 chain = NULL; 2116 } else { 2117 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2118 } 2119 2120 /* 2121 * Return our new chain referenced but not locked, or NULL if 2122 * a race occurred. 2123 */ 2124 return (chain); 2125 } 2126 2127 /* 2128 * Lookup initialization/completion API 2129 */ 2130 hammer2_chain_t * 2131 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 2132 { 2133 hammer2_chain_ref(parent); 2134 if (flags & HAMMER2_LOOKUP_SHARED) { 2135 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 2136 HAMMER2_RESOLVE_SHARED); 2137 } else { 2138 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 2139 } 2140 return (parent); 2141 } 2142 2143 void 2144 hammer2_chain_lookup_done(hammer2_chain_t *parent) 2145 { 2146 if (parent) { 2147 hammer2_chain_unlock(parent); 2148 hammer2_chain_drop(parent); 2149 } 2150 } 2151 2152 /* 2153 * Take the locked chain and return a locked parent. The chain remains 2154 * locked on return, but may have to be temporarily unlocked to acquire 2155 * the parent. Because of this, (chain) must be stable and cannot be 2156 * deleted while it was temporarily unlocked (typically means that (chain) 2157 * is an inode). 2158 * 2159 * Pass HAMMER2_RESOLVE_* flags in flags. 2160 * 2161 * This will work even if the chain is errored, and the caller can check 2162 * parent->error on return if desired since the parent will be locked. 2163 * 2164 * This function handles the lock order reversal. 2165 */ 2166 hammer2_chain_t * 2167 hammer2_chain_getparent(hammer2_chain_t *chain, int flags) 2168 { 2169 hammer2_chain_t *parent; 2170 2171 /* 2172 * Be careful of order, chain must be unlocked before parent 2173 * is locked below to avoid a deadlock. Try it trivially first. 2174 */ 2175 parent = chain->parent; 2176 if (parent == NULL) 2177 panic("hammer2_chain_getparent: no parent"); 2178 hammer2_chain_ref(parent); 2179 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) 2180 return parent; 2181 2182 for (;;) { 2183 hammer2_chain_unlock(chain); 2184 hammer2_chain_lock(parent, flags); 2185 hammer2_chain_lock(chain, flags); 2186 2187 /* 2188 * Parent relinking races are quite common. We have to get 2189 * it right or we will blow up the block table. 2190 */ 2191 if (chain->parent == parent) 2192 break; 2193 hammer2_chain_unlock(parent); 2194 hammer2_chain_drop(parent); 2195 cpu_ccfence(); 2196 parent = chain->parent; 2197 if (parent == NULL) 2198 panic("hammer2_chain_getparent: no parent"); 2199 hammer2_chain_ref(parent); 2200 } 2201 return parent; 2202 } 2203 2204 /* 2205 * Take the locked chain and return a locked parent. The chain is unlocked 2206 * and dropped. *chainp is set to the returned parent as a convenience. 2207 * Pass HAMMER2_RESOLVE_* flags in flags. 2208 * 2209 * This will work even if the chain is errored, and the caller can check 2210 * parent->error on return if desired since the parent will be locked. 2211 * 2212 * The chain does NOT need to be stable. We use a tracking structure 2213 * to track the expected parent if the chain is deleted out from under us. 2214 * 2215 * This function handles the lock order reversal. 2216 */ 2217 hammer2_chain_t * 2218 hammer2_chain_repparent(hammer2_chain_t **chainp, int flags) 2219 { 2220 hammer2_chain_t *chain; 2221 hammer2_chain_t *parent; 2222 struct hammer2_reptrack reptrack; 2223 struct hammer2_reptrack **repp; 2224 2225 /* 2226 * Be careful of order, chain must be unlocked before parent 2227 * is locked below to avoid a deadlock. Try it trivially first. 2228 */ 2229 chain = *chainp; 2230 parent = chain->parent; 2231 if (parent == NULL) { 2232 hammer2_spin_unex(&chain->core.spin); 2233 panic("hammer2_chain_repparent: no parent"); 2234 } 2235 hammer2_chain_ref(parent); 2236 if (hammer2_chain_lock(parent, flags|HAMMER2_RESOLVE_NONBLOCK) == 0) { 2237 hammer2_chain_unlock(chain); 2238 hammer2_chain_drop(chain); 2239 *chainp = parent; 2240 2241 return parent; 2242 } 2243 2244 /* 2245 * Ok, now it gets a bit nasty. There are multiple situations where 2246 * the parent might be in the middle of a deletion, or where the child 2247 * (chain) might be deleted the instant we let go of its lock. 2248 * We can potentially end up in a no-win situation! 2249 * 2250 * In particular, the indirect_maintenance() case can cause these 2251 * situations. 2252 * 2253 * To deal with this we install a reptrack structure in the parent 2254 * This reptrack structure 'owns' the parent ref and will automatically 2255 * migrate to the parent's parent if the parent is deleted permanently. 2256 */ 2257 hammer2_spin_init(&reptrack.spin, "h2reptrk"); 2258 reptrack.chain = parent; 2259 hammer2_chain_ref(parent); /* for the reptrack */ 2260 2261 hammer2_spin_ex(&parent->core.spin); 2262 reptrack.next = parent->core.reptrack; 2263 parent->core.reptrack = &reptrack; 2264 hammer2_spin_unex(&parent->core.spin); 2265 2266 hammer2_chain_unlock(chain); 2267 hammer2_chain_drop(chain); 2268 chain = NULL; /* gone */ 2269 2270 /* 2271 * At the top of this loop, chain is gone and parent is refd both 2272 * by us explicitly AND via our reptrack. We are attempting to 2273 * lock parent. 2274 */ 2275 for (;;) { 2276 hammer2_chain_lock(parent, flags); 2277 2278 if (reptrack.chain == parent) 2279 break; 2280 hammer2_chain_unlock(parent); 2281 hammer2_chain_drop(parent); 2282 2283 kprintf("hammer2: debug REPTRACK %p->%p\n", 2284 parent, reptrack.chain); 2285 hammer2_spin_ex(&reptrack.spin); 2286 parent = reptrack.chain; 2287 hammer2_chain_ref(parent); 2288 hammer2_spin_unex(&reptrack.spin); 2289 } 2290 2291 /* 2292 * Once parent is locked and matches our reptrack, our reptrack 2293 * will be stable and we have our parent. We can unlink our 2294 * reptrack. 2295 * 2296 * WARNING! Remember that the chain lock might be shared. Chains 2297 * locked shared have stable parent linkages. 2298 */ 2299 hammer2_spin_ex(&parent->core.spin); 2300 repp = &parent->core.reptrack; 2301 while (*repp != &reptrack) 2302 repp = &(*repp)->next; 2303 *repp = reptrack.next; 2304 hammer2_spin_unex(&parent->core.spin); 2305 2306 hammer2_chain_drop(parent); /* reptrack ref */ 2307 *chainp = parent; /* return parent lock+ref */ 2308 2309 return parent; 2310 } 2311 2312 /* 2313 * Dispose of any linked reptrack structures in (chain) by shifting them to 2314 * (parent). Both (chain) and (parent) must be exclusively locked. 2315 * 2316 * This is interlocked against any children of (chain) on the other side. 2317 * No children so remain as-of when this is called so we can test 2318 * core.reptrack without holding the spin-lock. 2319 * 2320 * Used whenever the caller intends to permanently delete chains related 2321 * to topological recursions (BREF_TYPE_INDIRECT, BREF_TYPE_FREEMAP_NODE), 2322 * where the chains underneath the node being deleted are given a new parent 2323 * above the node being deleted. 2324 */ 2325 static 2326 void 2327 hammer2_chain_repchange(hammer2_chain_t *parent, hammer2_chain_t *chain) 2328 { 2329 struct hammer2_reptrack *reptrack; 2330 2331 KKASSERT(chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)); 2332 while (chain->core.reptrack) { 2333 hammer2_spin_ex(&parent->core.spin); 2334 hammer2_spin_ex(&chain->core.spin); 2335 reptrack = chain->core.reptrack; 2336 if (reptrack == NULL) { 2337 hammer2_spin_unex(&chain->core.spin); 2338 hammer2_spin_unex(&parent->core.spin); 2339 break; 2340 } 2341 hammer2_spin_ex(&reptrack->spin); 2342 chain->core.reptrack = reptrack->next; 2343 reptrack->chain = parent; 2344 reptrack->next = parent->core.reptrack; 2345 parent->core.reptrack = reptrack; 2346 hammer2_chain_ref(parent); /* reptrack */ 2347 2348 hammer2_spin_unex(&chain->core.spin); 2349 hammer2_spin_unex(&parent->core.spin); 2350 kprintf("hammer2: debug repchange %p %p->%p\n", 2351 reptrack, chain, parent); 2352 hammer2_chain_drop(chain); /* reptrack */ 2353 } 2354 } 2355 2356 /* 2357 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 2358 * (*parentp) typically points to an inode but can also point to a related 2359 * indirect block and this function will recurse upwards and find the inode 2360 * or the nearest undeleted indirect block covering the key range. 2361 * 2362 * This function unconditionally sets *errorp, replacing any previous value. 2363 * 2364 * (*parentp) must be exclusive or shared locked (depending on flags) and 2365 * referenced and can be an inode or an existing indirect block within the 2366 * inode. 2367 * 2368 * If (*parent) is errored out, this function will not attempt to recurse 2369 * the radix tree and will return NULL along with an appropriate *errorp. 2370 * If NULL is returned and *errorp is 0, the requested lookup could not be 2371 * located. 2372 * 2373 * On return (*parentp) will be modified to point at the deepest parent chain 2374 * element encountered during the search, as a helper for an insertion or 2375 * deletion. 2376 * 2377 * The new (*parentp) will be locked shared or exclusive (depending on flags), 2378 * and referenced, and the old will be unlocked and dereferenced (no change 2379 * if they are both the same). This is particularly important if the caller 2380 * wishes to insert a new chain, (*parentp) will be set properly even if NULL 2381 * is returned, as long as no error occurred. 2382 * 2383 * The matching chain will be returned locked according to flags. 2384 * 2385 * -- 2386 * 2387 * NULL is returned if no match was found, but (*parentp) will still 2388 * potentially be adjusted. 2389 * 2390 * On return (*key_nextp) will point to an iterative value for key_beg. 2391 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 2392 * 2393 * This function will also recurse up the chain if the key is not within the 2394 * current parent's range. (*parentp) can never be set to NULL. An iteration 2395 * can simply allow (*parentp) to float inside the loop. 2396 * 2397 * NOTE! chain->data is not always resolved. By default it will not be 2398 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 2399 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 2400 * BREF_TYPE_DATA as the device buffer can alias the logical file 2401 * buffer). 2402 */ 2403 hammer2_chain_t * 2404 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 2405 hammer2_key_t key_beg, hammer2_key_t key_end, 2406 int *errorp, int flags) 2407 { 2408 hammer2_chain_t *parent; 2409 hammer2_chain_t *chain; 2410 hammer2_blockref_t *base; 2411 hammer2_blockref_t *bref; 2412 hammer2_blockref_t bsave; 2413 hammer2_key_t scan_beg; 2414 hammer2_key_t scan_end; 2415 int count = 0; 2416 int how_always = HAMMER2_RESOLVE_ALWAYS; 2417 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2418 int how; 2419 int generation; 2420 int maxloops = 300000; 2421 2422 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2423 how_maybe = how_always; 2424 how = HAMMER2_RESOLVE_ALWAYS; 2425 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2426 how = HAMMER2_RESOLVE_NEVER; 2427 } else { 2428 how = HAMMER2_RESOLVE_MAYBE; 2429 } 2430 if (flags & HAMMER2_LOOKUP_SHARED) { 2431 how_maybe |= HAMMER2_RESOLVE_SHARED; 2432 how_always |= HAMMER2_RESOLVE_SHARED; 2433 how |= HAMMER2_RESOLVE_SHARED; 2434 } 2435 2436 /* 2437 * Recurse (*parentp) upward if necessary until the parent completely 2438 * encloses the key range or we hit the inode. 2439 * 2440 * Handle races against the flusher deleting indirect nodes on its 2441 * way back up by continuing to recurse upward past the deletion. 2442 */ 2443 parent = *parentp; 2444 *errorp = 0; 2445 2446 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2447 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2448 scan_beg = parent->bref.key; 2449 scan_end = scan_beg + 2450 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2451 if ((parent->flags & HAMMER2_CHAIN_DELETED) == 0) { 2452 if (key_beg >= scan_beg && key_end <= scan_end) 2453 break; 2454 } 2455 parent = hammer2_chain_repparent(parentp, how_maybe); 2456 } 2457 again: 2458 if (--maxloops == 0) 2459 panic("hammer2_chain_lookup: maxloops"); 2460 2461 /* 2462 * MATCHIND case that does not require parent->data (do prior to 2463 * parent->error check). 2464 */ 2465 switch(parent->bref.type) { 2466 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2467 case HAMMER2_BREF_TYPE_INDIRECT: 2468 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2469 scan_beg = parent->bref.key; 2470 scan_end = scan_beg + 2471 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2472 if (key_beg == scan_beg && key_end == scan_end) { 2473 chain = parent; 2474 hammer2_chain_ref(chain); 2475 hammer2_chain_lock(chain, how_maybe); 2476 *key_nextp = scan_end + 1; 2477 goto done; 2478 } 2479 } 2480 break; 2481 default: 2482 break; 2483 } 2484 2485 /* 2486 * No lookup is possible if the parent is errored. We delayed 2487 * this check as long as we could to ensure that the parent backup, 2488 * embedded data, and MATCHIND code could still execute. 2489 */ 2490 if (parent->error) { 2491 *errorp = parent->error; 2492 return NULL; 2493 } 2494 2495 /* 2496 * Locate the blockref array. Currently we do a fully associative 2497 * search through the array. 2498 */ 2499 switch(parent->bref.type) { 2500 case HAMMER2_BREF_TYPE_INODE: 2501 /* 2502 * Special shortcut for embedded data returns the inode 2503 * itself. Callers must detect this condition and access 2504 * the embedded data (the strategy code does this for us). 2505 * 2506 * This is only applicable to regular files and softlinks. 2507 * 2508 * We need a second lock on parent. Since we already have 2509 * a lock we must pass LOCKAGAIN to prevent unexpected 2510 * blocking (we don't want to block on a second shared 2511 * ref if an exclusive lock is pending) 2512 */ 2513 if (parent->data->ipdata.meta.op_flags & 2514 HAMMER2_OPFLAG_DIRECTDATA) { 2515 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2516 chain = NULL; 2517 *key_nextp = key_end + 1; 2518 goto done; 2519 } 2520 hammer2_chain_ref(parent); 2521 hammer2_chain_lock(parent, how_always | 2522 HAMMER2_RESOLVE_LOCKAGAIN); 2523 *key_nextp = key_end + 1; 2524 return (parent); 2525 } 2526 base = &parent->data->ipdata.u.blockset.blockref[0]; 2527 count = HAMMER2_SET_COUNT; 2528 break; 2529 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2530 case HAMMER2_BREF_TYPE_INDIRECT: 2531 /* 2532 * Optimize indirect blocks in the INITIAL state to avoid 2533 * I/O. 2534 * 2535 * Debugging: Enter permanent wait state instead of 2536 * panicing on unexpectedly NULL data for the moment. 2537 */ 2538 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2539 base = NULL; 2540 } else { 2541 if (parent->data == NULL) { 2542 kprintf("hammer2: unexpected NULL data " 2543 "on %p\n", parent); 2544 while (1) 2545 tsleep(parent, 0, "xxx", 0); 2546 } 2547 base = &parent->data->npdata[0]; 2548 } 2549 count = parent->bytes / sizeof(hammer2_blockref_t); 2550 break; 2551 case HAMMER2_BREF_TYPE_VOLUME: 2552 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2553 count = HAMMER2_SET_COUNT; 2554 break; 2555 case HAMMER2_BREF_TYPE_FREEMAP: 2556 base = &parent->data->blkset.blockref[0]; 2557 count = HAMMER2_SET_COUNT; 2558 break; 2559 default: 2560 panic("hammer2_chain_lookup: unrecognized " 2561 "blockref(B) type: %d", 2562 parent->bref.type); 2563 base = NULL; /* safety */ 2564 count = 0; /* safety */ 2565 break; 2566 } 2567 2568 /* 2569 * Merged scan to find next candidate. 2570 * 2571 * hammer2_base_*() functions require the parent->core.live_* fields 2572 * to be synchronized. 2573 * 2574 * We need to hold the spinlock to access the block array and RB tree 2575 * and to interlock chain creation. 2576 */ 2577 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2578 hammer2_chain_countbrefs(parent, base, count); 2579 2580 /* 2581 * Combined search 2582 */ 2583 hammer2_spin_ex(&parent->core.spin); 2584 chain = hammer2_combined_find(parent, base, count, 2585 key_nextp, 2586 key_beg, key_end, 2587 &bref); 2588 generation = parent->core.generation; 2589 2590 /* 2591 * Exhausted parent chain, iterate. 2592 */ 2593 if (bref == NULL) { 2594 KKASSERT(chain == NULL); 2595 hammer2_spin_unex(&parent->core.spin); 2596 if (key_beg == key_end) /* short cut single-key case */ 2597 return (NULL); 2598 2599 /* 2600 * Stop if we reached the end of the iteration. 2601 */ 2602 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2603 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2604 return (NULL); 2605 } 2606 2607 /* 2608 * Calculate next key, stop if we reached the end of the 2609 * iteration, otherwise go up one level and loop. 2610 */ 2611 key_beg = parent->bref.key + 2612 ((hammer2_key_t)1 << parent->bref.keybits); 2613 if (key_beg == 0 || key_beg > key_end) 2614 return (NULL); 2615 parent = hammer2_chain_repparent(parentp, how_maybe); 2616 goto again; 2617 } 2618 2619 /* 2620 * Selected from blockref or in-memory chain. 2621 */ 2622 bsave = *bref; 2623 if (chain == NULL) { 2624 hammer2_spin_unex(&parent->core.spin); 2625 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2626 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2627 chain = hammer2_chain_get(parent, generation, 2628 &bsave, how_maybe); 2629 } else { 2630 chain = hammer2_chain_get(parent, generation, 2631 &bsave, how); 2632 } 2633 if (chain == NULL) 2634 goto again; 2635 } else { 2636 hammer2_chain_ref(chain); 2637 hammer2_spin_unex(&parent->core.spin); 2638 2639 /* 2640 * chain is referenced but not locked. We must lock the 2641 * chain to obtain definitive state. 2642 */ 2643 if (bsave.type == HAMMER2_BREF_TYPE_INDIRECT || 2644 bsave.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2645 hammer2_chain_lock(chain, how_maybe); 2646 } else { 2647 hammer2_chain_lock(chain, how); 2648 } 2649 KKASSERT(chain->parent == parent); 2650 } 2651 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 2652 chain->parent != parent) { 2653 hammer2_chain_unlock(chain); 2654 hammer2_chain_drop(chain); 2655 chain = NULL; /* SAFETY */ 2656 goto again; 2657 } 2658 2659 2660 /* 2661 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2662 * 2663 * NOTE: Chain's key range is not relevant as there might be 2664 * one-offs within the range that are not deleted. 2665 * 2666 * NOTE: Lookups can race delete-duplicate because 2667 * delete-duplicate does not lock the parent's core 2668 * (they just use the spinlock on the core). 2669 */ 2670 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2671 kprintf("skip deleted chain %016jx.%02x key=%016jx\n", 2672 chain->bref.data_off, chain->bref.type, 2673 chain->bref.key); 2674 hammer2_chain_unlock(chain); 2675 hammer2_chain_drop(chain); 2676 chain = NULL; /* SAFETY */ 2677 key_beg = *key_nextp; 2678 if (key_beg == 0 || key_beg > key_end) 2679 return(NULL); 2680 goto again; 2681 } 2682 2683 /* 2684 * If the chain element is an indirect block it becomes the new 2685 * parent and we loop on it. We must maintain our top-down locks 2686 * to prevent the flusher from interfering (i.e. doing a 2687 * delete-duplicate and leaving us recursing down a deleted chain). 2688 * 2689 * The parent always has to be locked with at least RESOLVE_MAYBE 2690 * so we can access its data. It might need a fixup if the caller 2691 * passed incompatible flags. Be careful not to cause a deadlock 2692 * as a data-load requires an exclusive lock. 2693 * 2694 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2695 * range is within the requested key range we return the indirect 2696 * block and do NOT loop. This is usually only used to acquire 2697 * freemap nodes. 2698 */ 2699 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2700 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2701 hammer2_chain_unlock(parent); 2702 hammer2_chain_drop(parent); 2703 *parentp = parent = chain; 2704 chain = NULL; /* SAFETY */ 2705 goto again; 2706 } 2707 done: 2708 /* 2709 * All done, return the locked chain. 2710 * 2711 * If the caller does not want a locked chain, replace the lock with 2712 * a ref. Perhaps this can eventually be optimized to not obtain the 2713 * lock in the first place for situations where the data does not 2714 * need to be resolved. 2715 * 2716 * NOTE! A chain->error must be tested by the caller upon return. 2717 * *errorp is only set based on issues which occur while 2718 * trying to reach the chain. 2719 */ 2720 return (chain); 2721 } 2722 2723 /* 2724 * After having issued a lookup we can iterate all matching keys. 2725 * 2726 * If chain is non-NULL we continue the iteration from just after it's index. 2727 * 2728 * If chain is NULL we assume the parent was exhausted and continue the 2729 * iteration at the next parent. 2730 * 2731 * If a fatal error occurs (typically an I/O error), a dummy chain is 2732 * returned with chain->error and error-identifying information set. This 2733 * chain will assert if you try to do anything fancy with it. 2734 * 2735 * XXX Depending on where the error occurs we should allow continued iteration. 2736 * 2737 * parent must be locked on entry and remains locked throughout. chain's 2738 * lock status must match flags. Chain is always at least referenced. 2739 * 2740 * WARNING! The MATCHIND flag does not apply to this function. 2741 */ 2742 hammer2_chain_t * 2743 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2744 hammer2_key_t *key_nextp, 2745 hammer2_key_t key_beg, hammer2_key_t key_end, 2746 int *errorp, int flags) 2747 { 2748 hammer2_chain_t *parent; 2749 int how_maybe; 2750 2751 /* 2752 * Calculate locking flags for upward recursion. 2753 */ 2754 how_maybe = HAMMER2_RESOLVE_MAYBE; 2755 if (flags & HAMMER2_LOOKUP_SHARED) 2756 how_maybe |= HAMMER2_RESOLVE_SHARED; 2757 2758 parent = *parentp; 2759 *errorp = 0; 2760 2761 /* 2762 * Calculate the next index and recalculate the parent if necessary. 2763 */ 2764 if (chain) { 2765 key_beg = chain->bref.key + 2766 ((hammer2_key_t)1 << chain->bref.keybits); 2767 hammer2_chain_unlock(chain); 2768 hammer2_chain_drop(chain); 2769 2770 /* 2771 * chain invalid past this point, but we can still do a 2772 * pointer comparison w/parent. 2773 * 2774 * Any scan where the lookup returned degenerate data embedded 2775 * in the inode has an invalid index and must terminate. 2776 */ 2777 if (chain == parent) 2778 return(NULL); 2779 if (key_beg == 0 || key_beg > key_end) 2780 return(NULL); 2781 chain = NULL; 2782 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2783 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2784 /* 2785 * We reached the end of the iteration. 2786 */ 2787 return (NULL); 2788 } else { 2789 /* 2790 * Continue iteration with next parent unless the current 2791 * parent covers the range. 2792 * 2793 * (This also handles the case of a deleted, empty indirect 2794 * node). 2795 */ 2796 key_beg = parent->bref.key + 2797 ((hammer2_key_t)1 << parent->bref.keybits); 2798 if (key_beg == 0 || key_beg > key_end) 2799 return (NULL); 2800 parent = hammer2_chain_repparent(parentp, how_maybe); 2801 } 2802 2803 /* 2804 * And execute 2805 */ 2806 return (hammer2_chain_lookup(parentp, key_nextp, 2807 key_beg, key_end, 2808 errorp, flags)); 2809 } 2810 2811 /* 2812 * Caller wishes to iterate chains under parent, loading new chains into 2813 * chainp. Caller must initialize *chainp to NULL and *firstp to 1, and 2814 * then call hammer2_chain_scan() repeatedly until a non-zero return. 2815 * During the scan, *firstp will be set to 0 and (*chainp) will be replaced 2816 * with the returned chain for the scan. The returned *chainp will be 2817 * locked and referenced. Any prior contents will be unlocked and dropped. 2818 * 2819 * Caller should check the return value. A normal scan EOF will return 2820 * exactly HAMMER2_ERROR_EOF. Any other non-zero value indicates an 2821 * error trying to access parent data. Any error in the returned chain 2822 * must be tested separately by the caller. 2823 * 2824 * (*chainp) is dropped on each scan, but will only be set if the returned 2825 * element itself can recurse. Leaf elements are NOT resolved, loaded, or 2826 * returned via *chainp. The caller will get their bref only. 2827 * 2828 * The raw scan function is similar to lookup/next but does not seek to a key. 2829 * Blockrefs are iterated via first_bref = (parent, NULL) and 2830 * next_chain = (parent, bref). 2831 * 2832 * The passed-in parent must be locked and its data resolved. The function 2833 * nominally returns a locked and referenced *chainp != NULL for chains 2834 * the caller might need to recurse on (and will dipose of any *chainp passed 2835 * in). The caller must check the chain->bref.type either way. 2836 */ 2837 int 2838 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2839 hammer2_blockref_t *bref, int *firstp, 2840 int flags) 2841 { 2842 hammer2_blockref_t *base; 2843 hammer2_blockref_t *bref_ptr; 2844 hammer2_key_t key; 2845 hammer2_key_t next_key; 2846 hammer2_chain_t *chain = NULL; 2847 int count = 0; 2848 int how; 2849 int generation; 2850 int maxloops = 300000; 2851 int error; 2852 2853 error = 0; 2854 2855 /* 2856 * Scan flags borrowed from lookup. 2857 */ 2858 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2859 how = HAMMER2_RESOLVE_ALWAYS; 2860 } else if (flags & HAMMER2_LOOKUP_NODATA) { 2861 how = HAMMER2_RESOLVE_NEVER; 2862 } else { 2863 how = HAMMER2_RESOLVE_MAYBE; 2864 } 2865 if (flags & HAMMER2_LOOKUP_SHARED) { 2866 how |= HAMMER2_RESOLVE_SHARED; 2867 } 2868 2869 /* 2870 * Calculate key to locate first/next element, unlocking the previous 2871 * element as we go. Be careful, the key calculation can overflow. 2872 * 2873 * (also reset bref to NULL) 2874 */ 2875 if (*firstp) { 2876 key = 0; 2877 *firstp = 0; 2878 } else { 2879 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2880 if ((chain = *chainp) != NULL) { 2881 *chainp = NULL; 2882 hammer2_chain_unlock(chain); 2883 hammer2_chain_drop(chain); 2884 chain = NULL; 2885 } 2886 if (key == 0) { 2887 error |= HAMMER2_ERROR_EOF; 2888 goto done; 2889 } 2890 } 2891 2892 again: 2893 if (parent->error) { 2894 error = parent->error; 2895 goto done; 2896 } 2897 if (--maxloops == 0) 2898 panic("hammer2_chain_scan: maxloops"); 2899 2900 /* 2901 * Locate the blockref array. Currently we do a fully associative 2902 * search through the array. 2903 */ 2904 switch(parent->bref.type) { 2905 case HAMMER2_BREF_TYPE_INODE: 2906 /* 2907 * An inode with embedded data has no sub-chains. 2908 * 2909 * WARNING! Bulk scan code may pass a static chain marked 2910 * as BREF_TYPE_INODE with a copy of the volume 2911 * root blockset to snapshot the volume. 2912 */ 2913 if (parent->data->ipdata.meta.op_flags & 2914 HAMMER2_OPFLAG_DIRECTDATA) { 2915 error |= HAMMER2_ERROR_EOF; 2916 goto done; 2917 } 2918 base = &parent->data->ipdata.u.blockset.blockref[0]; 2919 count = HAMMER2_SET_COUNT; 2920 break; 2921 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2922 case HAMMER2_BREF_TYPE_INDIRECT: 2923 /* 2924 * Optimize indirect blocks in the INITIAL state to avoid 2925 * I/O. 2926 */ 2927 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2928 base = NULL; 2929 } else { 2930 if (parent->data == NULL) 2931 panic("parent->data is NULL"); 2932 base = &parent->data->npdata[0]; 2933 } 2934 count = parent->bytes / sizeof(hammer2_blockref_t); 2935 break; 2936 case HAMMER2_BREF_TYPE_VOLUME: 2937 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2938 count = HAMMER2_SET_COUNT; 2939 break; 2940 case HAMMER2_BREF_TYPE_FREEMAP: 2941 base = &parent->data->blkset.blockref[0]; 2942 count = HAMMER2_SET_COUNT; 2943 break; 2944 default: 2945 panic("hammer2_chain_scan: unrecognized blockref type: %d", 2946 parent->bref.type); 2947 base = NULL; /* safety */ 2948 count = 0; /* safety */ 2949 break; 2950 } 2951 2952 /* 2953 * Merged scan to find next candidate. 2954 * 2955 * hammer2_base_*() functions require the parent->core.live_* fields 2956 * to be synchronized. 2957 * 2958 * We need to hold the spinlock to access the block array and RB tree 2959 * and to interlock chain creation. 2960 */ 2961 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2962 hammer2_chain_countbrefs(parent, base, count); 2963 2964 next_key = 0; 2965 bref_ptr = NULL; 2966 hammer2_spin_ex(&parent->core.spin); 2967 chain = hammer2_combined_find(parent, base, count, 2968 &next_key, 2969 key, HAMMER2_KEY_MAX, 2970 &bref_ptr); 2971 generation = parent->core.generation; 2972 2973 /* 2974 * Exhausted parent chain, we're done. 2975 */ 2976 if (bref_ptr == NULL) { 2977 hammer2_spin_unex(&parent->core.spin); 2978 KKASSERT(chain == NULL); 2979 error |= HAMMER2_ERROR_EOF; 2980 goto done; 2981 } 2982 2983 /* 2984 * Copy into the supplied stack-based blockref. 2985 */ 2986 *bref = *bref_ptr; 2987 2988 /* 2989 * Selected from blockref or in-memory chain. 2990 */ 2991 if (chain == NULL) { 2992 switch(bref->type) { 2993 case HAMMER2_BREF_TYPE_INODE: 2994 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2995 case HAMMER2_BREF_TYPE_INDIRECT: 2996 case HAMMER2_BREF_TYPE_VOLUME: 2997 case HAMMER2_BREF_TYPE_FREEMAP: 2998 /* 2999 * Recursion, always get the chain 3000 */ 3001 hammer2_spin_unex(&parent->core.spin); 3002 chain = hammer2_chain_get(parent, generation, 3003 bref, how); 3004 if (chain == NULL) 3005 goto again; 3006 break; 3007 default: 3008 /* 3009 * No recursion, do not waste time instantiating 3010 * a chain, just iterate using the bref. 3011 */ 3012 hammer2_spin_unex(&parent->core.spin); 3013 break; 3014 } 3015 } else { 3016 /* 3017 * Recursion or not we need the chain in order to supply 3018 * the bref. 3019 */ 3020 hammer2_chain_ref(chain); 3021 hammer2_spin_unex(&parent->core.spin); 3022 hammer2_chain_lock(chain, how); 3023 } 3024 if (chain && 3025 (bcmp(bref, &chain->bref, sizeof(*bref)) || 3026 chain->parent != parent)) { 3027 hammer2_chain_unlock(chain); 3028 hammer2_chain_drop(chain); 3029 chain = NULL; 3030 goto again; 3031 } 3032 3033 /* 3034 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 3035 * 3036 * NOTE: chain's key range is not relevant as there might be 3037 * one-offs within the range that are not deleted. 3038 * 3039 * NOTE: XXX this could create problems with scans used in 3040 * situations other than mount-time recovery. 3041 * 3042 * NOTE: Lookups can race delete-duplicate because 3043 * delete-duplicate does not lock the parent's core 3044 * (they just use the spinlock on the core). 3045 */ 3046 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3047 hammer2_chain_unlock(chain); 3048 hammer2_chain_drop(chain); 3049 chain = NULL; 3050 3051 key = next_key; 3052 if (key == 0) { 3053 error |= HAMMER2_ERROR_EOF; 3054 goto done; 3055 } 3056 goto again; 3057 } 3058 3059 done: 3060 /* 3061 * All done, return the bref or NULL, supply chain if necessary. 3062 */ 3063 if (chain) 3064 *chainp = chain; 3065 return (error); 3066 } 3067 3068 /* 3069 * Create and return a new hammer2 system memory structure of the specified 3070 * key, type and size and insert it under (*parentp). This is a full 3071 * insertion, based on the supplied key/keybits, and may involve creating 3072 * indirect blocks and moving other chains around via delete/duplicate. 3073 * 3074 * This call can be made with parent == NULL as long as a non -1 methods 3075 * is supplied. hmp must also be supplied in this situation (otherwise 3076 * hmp is extracted from the supplied parent). The chain will be detached 3077 * from the topology. A later call with both parent and chain can be made 3078 * to attach it. 3079 * 3080 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 3081 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3082 * FULL. This typically means that the caller is creating the chain after 3083 * doing a hammer2_chain_lookup(). 3084 * 3085 * (*parentp) must be exclusive locked and may be replaced on return 3086 * depending on how much work the function had to do. 3087 * 3088 * (*parentp) must not be errored or this function will assert. 3089 * 3090 * (*chainp) usually starts out NULL and returns the newly created chain, 3091 * but if the caller desires the caller may allocate a disconnected chain 3092 * and pass it in instead. 3093 * 3094 * This function should NOT be used to insert INDIRECT blocks. It is 3095 * typically used to create/insert inodes and data blocks. 3096 * 3097 * Caller must pass-in an exclusively locked parent the new chain is to 3098 * be inserted under, and optionally pass-in a disconnected, exclusively 3099 * locked chain to insert (else we create a new chain). The function will 3100 * adjust (*parentp) as necessary, create or connect the chain, and 3101 * return an exclusively locked chain in *chainp. 3102 * 3103 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 3104 * and will be reassigned. 3105 * 3106 * NOTE: returns HAMMER_ERROR_* flags 3107 */ 3108 int 3109 hammer2_chain_create(hammer2_chain_t **parentp, hammer2_chain_t **chainp, 3110 hammer2_dev_t *hmp, hammer2_pfs_t *pmp, int methods, 3111 hammer2_key_t key, int keybits, int type, size_t bytes, 3112 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 3113 { 3114 hammer2_chain_t *chain; 3115 hammer2_chain_t *parent; 3116 hammer2_blockref_t *base; 3117 hammer2_blockref_t dummy; 3118 int allocated = 0; 3119 int error = 0; 3120 int count; 3121 int maxloops = 300000; 3122 3123 /* 3124 * Topology may be crossing a PFS boundary. 3125 */ 3126 parent = *parentp; 3127 if (parent) { 3128 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3129 KKASSERT(parent->error == 0); 3130 hmp = parent->hmp; 3131 } 3132 chain = *chainp; 3133 3134 if (chain == NULL) { 3135 /* 3136 * First allocate media space and construct the dummy bref, 3137 * then allocate the in-memory chain structure. Set the 3138 * INITIAL flag for fresh chains which do not have embedded 3139 * data. 3140 */ 3141 bzero(&dummy, sizeof(dummy)); 3142 dummy.type = type; 3143 dummy.key = key; 3144 dummy.keybits = keybits; 3145 dummy.data_off = hammer2_getradix(bytes); 3146 3147 /* 3148 * Inherit methods from parent by default. Primarily used 3149 * for BREF_TYPE_DATA. Non-data types *must* be set to 3150 * a non-NONE check algorithm. 3151 */ 3152 if (methods == HAMMER2_METH_DEFAULT) 3153 dummy.methods = parent->bref.methods; 3154 else 3155 dummy.methods = (uint8_t)methods; 3156 3157 if (type != HAMMER2_BREF_TYPE_DATA && 3158 HAMMER2_DEC_CHECK(dummy.methods) == HAMMER2_CHECK_NONE) { 3159 dummy.methods |= 3160 HAMMER2_ENC_CHECK(HAMMER2_CHECK_DEFAULT); 3161 } 3162 3163 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 3164 3165 /* 3166 * Lock the chain manually, chain_lock will load the chain 3167 * which we do NOT want to do. (note: chain->refs is set 3168 * to 1 by chain_alloc() for us, but lockcnt is not). 3169 */ 3170 chain->lockcnt = 1; 3171 hammer2_mtx_ex(&chain->lock); 3172 allocated = 1; 3173 3174 /* 3175 * Set INITIAL to optimize I/O. The flag will generally be 3176 * processed when we call hammer2_chain_modify(). 3177 */ 3178 switch(type) { 3179 case HAMMER2_BREF_TYPE_VOLUME: 3180 case HAMMER2_BREF_TYPE_FREEMAP: 3181 panic("hammer2_chain_create: called with volume type"); 3182 break; 3183 case HAMMER2_BREF_TYPE_INDIRECT: 3184 panic("hammer2_chain_create: cannot be used to" 3185 "create indirect block"); 3186 break; 3187 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3188 panic("hammer2_chain_create: cannot be used to" 3189 "create freemap root or node"); 3190 break; 3191 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3192 KKASSERT(bytes == sizeof(chain->data->bmdata)); 3193 /* fall through */ 3194 case HAMMER2_BREF_TYPE_DIRENT: 3195 case HAMMER2_BREF_TYPE_INODE: 3196 case HAMMER2_BREF_TYPE_DATA: 3197 default: 3198 /* 3199 * leave chain->data NULL, set INITIAL 3200 */ 3201 KKASSERT(chain->data == NULL); 3202 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 3203 break; 3204 } 3205 } else { 3206 /* 3207 * We are reattaching a previously deleted chain, possibly 3208 * under a new parent and possibly with a new key/keybits. 3209 * The chain does not have to be in a modified state. The 3210 * UPDATE flag will be set later on in this routine. 3211 * 3212 * Do NOT mess with the current state of the INITIAL flag. 3213 */ 3214 chain->bref.key = key; 3215 chain->bref.keybits = keybits; 3216 if (chain->flags & HAMMER2_CHAIN_DELETED) 3217 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3218 KKASSERT(chain->parent == NULL); 3219 } 3220 3221 /* 3222 * Set the appropriate bref flag if requested. 3223 * 3224 * NOTE! Callers can call this function to move chains without 3225 * knowing about special flags, so don't clear bref flags 3226 * here! 3227 */ 3228 if (flags & HAMMER2_INSERT_PFSROOT) 3229 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 3230 3231 if (parent == NULL) 3232 goto skip; 3233 3234 /* 3235 * Calculate how many entries we have in the blockref array and 3236 * determine if an indirect block is required when inserting into 3237 * the parent. 3238 */ 3239 again: 3240 if (--maxloops == 0) 3241 panic("hammer2_chain_create: maxloops"); 3242 3243 switch(parent->bref.type) { 3244 case HAMMER2_BREF_TYPE_INODE: 3245 if ((parent->data->ipdata.meta.op_flags & 3246 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 3247 kprintf("hammer2: parent set for direct-data! " 3248 "pkey=%016jx ckey=%016jx\n", 3249 parent->bref.key, 3250 chain->bref.key); 3251 } 3252 KKASSERT((parent->data->ipdata.meta.op_flags & 3253 HAMMER2_OPFLAG_DIRECTDATA) == 0); 3254 KKASSERT(parent->data != NULL); 3255 base = &parent->data->ipdata.u.blockset.blockref[0]; 3256 count = HAMMER2_SET_COUNT; 3257 break; 3258 case HAMMER2_BREF_TYPE_INDIRECT: 3259 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3260 if (parent->flags & HAMMER2_CHAIN_INITIAL) 3261 base = NULL; 3262 else 3263 base = &parent->data->npdata[0]; 3264 count = parent->bytes / sizeof(hammer2_blockref_t); 3265 break; 3266 case HAMMER2_BREF_TYPE_VOLUME: 3267 KKASSERT(parent->data != NULL); 3268 base = &parent->data->voldata.sroot_blockset.blockref[0]; 3269 count = HAMMER2_SET_COUNT; 3270 break; 3271 case HAMMER2_BREF_TYPE_FREEMAP: 3272 KKASSERT(parent->data != NULL); 3273 base = &parent->data->blkset.blockref[0]; 3274 count = HAMMER2_SET_COUNT; 3275 break; 3276 default: 3277 panic("hammer2_chain_create: unrecognized blockref type: %d", 3278 parent->bref.type); 3279 base = NULL; 3280 count = 0; 3281 break; 3282 } 3283 3284 /* 3285 * Make sure we've counted the brefs 3286 */ 3287 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 3288 hammer2_chain_countbrefs(parent, base, count); 3289 3290 KASSERT(parent->core.live_count >= 0 && 3291 parent->core.live_count <= count, 3292 ("bad live_count %d/%d (%02x, %d)", 3293 parent->core.live_count, count, 3294 parent->bref.type, parent->bytes)); 3295 3296 /* 3297 * If no free blockref could be found we must create an indirect 3298 * block and move a number of blockrefs into it. With the parent 3299 * locked we can safely lock each child in order to delete+duplicate 3300 * it without causing a deadlock. 3301 * 3302 * This may return the new indirect block or the old parent depending 3303 * on where the key falls. NULL is returned on error. 3304 */ 3305 if (parent->core.live_count == count) { 3306 hammer2_chain_t *nparent; 3307 3308 KKASSERT((flags & HAMMER2_INSERT_SAMEPARENT) == 0); 3309 3310 nparent = hammer2_chain_create_indirect(parent, key, keybits, 3311 mtid, type, &error); 3312 if (nparent == NULL) { 3313 if (allocated) 3314 hammer2_chain_drop(chain); 3315 chain = NULL; 3316 goto done; 3317 } 3318 if (parent != nparent) { 3319 hammer2_chain_unlock(parent); 3320 hammer2_chain_drop(parent); 3321 parent = *parentp = nparent; 3322 } 3323 goto again; 3324 } 3325 3326 /* 3327 * fall through if parent, or skip to here if no parent. 3328 */ 3329 skip: 3330 if (chain->flags & HAMMER2_CHAIN_DELETED) 3331 kprintf("Inserting deleted chain @%016jx\n", 3332 chain->bref.key); 3333 3334 /* 3335 * Link the chain into its parent. 3336 */ 3337 if (chain->parent != NULL) 3338 panic("hammer2: hammer2_chain_create: chain already connected"); 3339 KKASSERT(chain->parent == NULL); 3340 if (parent) { 3341 KKASSERT(parent->core.live_count < count); 3342 hammer2_chain_insert(parent, chain, 3343 HAMMER2_CHAIN_INSERT_SPIN | 3344 HAMMER2_CHAIN_INSERT_LIVE, 3345 0); 3346 } 3347 3348 if (allocated) { 3349 /* 3350 * Mark the newly created chain modified. This will cause 3351 * UPDATE to be set and process the INITIAL flag. 3352 * 3353 * Device buffers are not instantiated for DATA elements 3354 * as these are handled by logical buffers. 3355 * 3356 * Indirect and freemap node indirect blocks are handled 3357 * by hammer2_chain_create_indirect() and not by this 3358 * function. 3359 * 3360 * Data for all other bref types is expected to be 3361 * instantiated (INODE, LEAF). 3362 */ 3363 switch(chain->bref.type) { 3364 case HAMMER2_BREF_TYPE_DATA: 3365 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3366 case HAMMER2_BREF_TYPE_DIRENT: 3367 case HAMMER2_BREF_TYPE_INODE: 3368 error = hammer2_chain_modify(chain, mtid, dedup_off, 3369 HAMMER2_MODIFY_OPTDATA); 3370 break; 3371 default: 3372 /* 3373 * Remaining types are not supported by this function. 3374 * In particular, INDIRECT and LEAF_NODE types are 3375 * handled by create_indirect(). 3376 */ 3377 panic("hammer2_chain_create: bad type: %d", 3378 chain->bref.type); 3379 /* NOT REACHED */ 3380 break; 3381 } 3382 } else { 3383 /* 3384 * When reconnecting a chain we must set UPDATE and 3385 * setflush so the flush recognizes that it must update 3386 * the bref in the parent. 3387 */ 3388 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 3389 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3390 } 3391 3392 /* 3393 * We must setflush(parent) to ensure that it recurses through to 3394 * chain. setflush(chain) might not work because ONFLUSH is possibly 3395 * already set in the chain (so it won't recurse up to set it in the 3396 * parent). 3397 */ 3398 if (parent) 3399 hammer2_chain_setflush(parent); 3400 3401 done: 3402 *chainp = chain; 3403 3404 return (error); 3405 } 3406 3407 /* 3408 * Move the chain from its old parent to a new parent. The chain must have 3409 * already been deleted or already disconnected (or never associated) with 3410 * a parent. The chain is reassociated with the new parent and the deleted 3411 * flag will be cleared (no longer deleted). The chain's modification state 3412 * is not altered. 3413 * 3414 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 3415 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 3416 * FULL. This typically means that the caller is creating the chain after 3417 * doing a hammer2_chain_lookup(). 3418 * 3419 * Neither (parent) or (chain) can be errored. 3420 * 3421 * If (parent) is non-NULL then the chain is inserted under the parent. 3422 * 3423 * If (parent) is NULL then the newly duplicated chain is not inserted 3424 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 3425 * passing into hammer2_chain_create() after this function returns). 3426 * 3427 * WARNING! This function calls create which means it can insert indirect 3428 * blocks. This can cause other unrelated chains in the parent to 3429 * be moved to a newly inserted indirect block in addition to the 3430 * specific chain. 3431 */ 3432 void 3433 hammer2_chain_rename(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3434 hammer2_tid_t mtid, int flags) 3435 { 3436 hammer2_blockref_t *bref; 3437 hammer2_chain_t *parent; 3438 3439 /* 3440 * WARNING! We should never resolve DATA to device buffers 3441 * (XXX allow it if the caller did?), and since 3442 * we currently do not have the logical buffer cache 3443 * buffer in-hand to fix its cached physical offset 3444 * we also force the modify code to not COW it. XXX 3445 * 3446 * NOTE! We allow error'd chains to be renamed. The bref itself 3447 * is good and can be renamed. The content, however, may 3448 * be inaccessible. 3449 */ 3450 KKASSERT(chain->parent == NULL); 3451 /*KKASSERT(chain->error == 0); allow */ 3452 bref = &chain->bref; 3453 3454 /* 3455 * If parent is not NULL the duplicated chain will be entered under 3456 * the parent and the UPDATE bit set to tell flush to update 3457 * the blockref. 3458 * 3459 * We must setflush(parent) to ensure that it recurses through to 3460 * chain. setflush(chain) might not work because ONFLUSH is possibly 3461 * already set in the chain (so it won't recurse up to set it in the 3462 * parent). 3463 * 3464 * Having both chains locked is extremely important for atomicy. 3465 */ 3466 if (parentp && (parent = *parentp) != NULL) { 3467 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3468 KKASSERT(parent->refs > 0); 3469 KKASSERT(parent->error == 0); 3470 3471 hammer2_chain_create(parentp, &chain, NULL, chain->pmp, 3472 HAMMER2_METH_DEFAULT, 3473 bref->key, bref->keybits, bref->type, 3474 chain->bytes, mtid, 0, flags); 3475 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 3476 hammer2_chain_setflush(*parentp); 3477 } 3478 } 3479 3480 /* 3481 * This works in tandem with delete_obref() to install a blockref in 3482 * (typically) an indirect block that is associated with the chain being 3483 * moved to *parentp. 3484 * 3485 * The reason we need this function is that the caller needs to maintain 3486 * the blockref as it was, and not generate a new blockref for what might 3487 * be a modified chain. Otherwise stuff will leak into the flush that 3488 * the flush code's FLUSH_INODE_STOP flag is unable to catch. 3489 * 3490 * It is EXTREMELY important that we properly set CHAIN_BLKMAPUPD and 3491 * CHAIN_UPDATE. We must set BLKMAPUPD if the bref does not match, and 3492 * we must clear CHAIN_UPDATE (that was likely set by the chain_rename) if 3493 * it does. Otherwise we can end up in a situation where H2 is unable to 3494 * clean up the in-memory chain topology. 3495 * 3496 * The reason for this is that flushes do not generally flush through 3497 * BREF_TYPE_INODE chains and depend on a hammer2_inode_t queued to syncq 3498 * or sideq to properly flush and dispose of the related inode chain's flags. 3499 * Situations where the inode is not actually modified by the frontend, 3500 * but where we have to move the related chains around as we insert or cleanup 3501 * indirect blocks, can leave us with a 'dirty' (non-disposable) in-memory 3502 * inode chain that does not have a hammer2_inode_t associated with it. 3503 */ 3504 static void 3505 hammer2_chain_rename_obref(hammer2_chain_t **parentp, hammer2_chain_t *chain, 3506 hammer2_tid_t mtid, int flags, 3507 hammer2_blockref_t *obref) 3508 { 3509 hammer2_chain_rename(parentp, chain, mtid, flags); 3510 3511 if (obref->type != HAMMER2_BREF_TYPE_EMPTY) { 3512 hammer2_blockref_t *tbase; 3513 int tcount; 3514 3515 KKASSERT((chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0); 3516 hammer2_chain_modify(*parentp, mtid, 0, 0); 3517 tbase = hammer2_chain_base_and_count(*parentp, &tcount); 3518 hammer2_base_insert(*parentp, tbase, tcount, chain, obref); 3519 if (bcmp(obref, &chain->bref, sizeof(chain->bref))) { 3520 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPUPD | 3521 HAMMER2_CHAIN_UPDATE); 3522 } else { 3523 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 3524 } 3525 } 3526 } 3527 3528 /* 3529 * Helper function for deleting chains. 3530 * 3531 * The chain is removed from the live view (the RBTREE) as well as the parent's 3532 * blockmap. Both chain and its parent must be locked. 3533 * 3534 * parent may not be errored. chain can be errored. 3535 */ 3536 static int 3537 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 3538 hammer2_tid_t mtid, int flags, 3539 hammer2_blockref_t *obref) 3540 { 3541 int error = 0; 3542 3543 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0); 3544 KKASSERT(chain->parent == parent); 3545 3546 if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) { 3547 /* 3548 * Chain is blockmapped, so there must be a parent. 3549 * Atomically remove the chain from the parent and remove 3550 * the blockmap entry. The parent must be set modified 3551 * to remove the blockmap entry. 3552 */ 3553 hammer2_blockref_t *base; 3554 int count; 3555 3556 KKASSERT(parent != NULL); 3557 KKASSERT(parent->error == 0); 3558 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3559 error = hammer2_chain_modify(parent, mtid, 0, 0); 3560 if (error) 3561 goto done; 3562 3563 /* 3564 * Calculate blockmap pointer 3565 */ 3566 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3567 hammer2_spin_ex(&chain->core.spin); 3568 hammer2_spin_ex(&parent->core.spin); 3569 3570 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3571 atomic_add_int(&parent->core.live_count, -1); 3572 ++parent->core.generation; 3573 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3574 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3575 --parent->core.chain_count; 3576 chain->parent = NULL; 3577 3578 switch(parent->bref.type) { 3579 case HAMMER2_BREF_TYPE_INODE: 3580 /* 3581 * Access the inode's block array. However, there 3582 * is no block array if the inode is flagged 3583 * DIRECTDATA. 3584 */ 3585 if (parent->data && 3586 (parent->data->ipdata.meta.op_flags & 3587 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3588 base = 3589 &parent->data->ipdata.u.blockset.blockref[0]; 3590 } else { 3591 base = NULL; 3592 } 3593 count = HAMMER2_SET_COUNT; 3594 break; 3595 case HAMMER2_BREF_TYPE_INDIRECT: 3596 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3597 if (parent->data) 3598 base = &parent->data->npdata[0]; 3599 else 3600 base = NULL; 3601 count = parent->bytes / sizeof(hammer2_blockref_t); 3602 break; 3603 case HAMMER2_BREF_TYPE_VOLUME: 3604 base = &parent->data->voldata. 3605 sroot_blockset.blockref[0]; 3606 count = HAMMER2_SET_COUNT; 3607 break; 3608 case HAMMER2_BREF_TYPE_FREEMAP: 3609 base = &parent->data->blkset.blockref[0]; 3610 count = HAMMER2_SET_COUNT; 3611 break; 3612 default: 3613 base = NULL; 3614 count = 0; 3615 panic("_hammer2_chain_delete_helper: " 3616 "unrecognized blockref type: %d", 3617 parent->bref.type); 3618 break; 3619 } 3620 3621 /* 3622 * delete blockmapped chain from its parent. 3623 * 3624 * The parent is not affected by any statistics in chain 3625 * which are pending synchronization. That is, there is 3626 * nothing to undo in the parent since they have not yet 3627 * been incorporated into the parent. 3628 * 3629 * The parent is affected by statistics stored in inodes. 3630 * Those have already been synchronized, so they must be 3631 * undone. XXX split update possible w/delete in middle? 3632 */ 3633 if (base) { 3634 hammer2_base_delete(parent, base, count, chain, obref); 3635 } 3636 hammer2_spin_unex(&parent->core.spin); 3637 hammer2_spin_unex(&chain->core.spin); 3638 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3639 /* 3640 * Chain is not blockmapped but a parent is present. 3641 * Atomically remove the chain from the parent. There is 3642 * no blockmap entry to remove. 3643 * 3644 * Because chain was associated with a parent but not 3645 * synchronized, the chain's *_count_up fields contain 3646 * inode adjustment statistics which must be undone. 3647 */ 3648 hammer2_spin_ex(&chain->core.spin); 3649 hammer2_spin_ex(&parent->core.spin); 3650 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3651 atomic_add_int(&parent->core.live_count, -1); 3652 ++parent->core.generation; 3653 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3654 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3655 --parent->core.chain_count; 3656 chain->parent = NULL; 3657 hammer2_spin_unex(&parent->core.spin); 3658 hammer2_spin_unex(&chain->core.spin); 3659 } else { 3660 /* 3661 * Chain is not blockmapped and has no parent. This 3662 * is a degenerate case. 3663 */ 3664 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3665 } 3666 done: 3667 return error; 3668 } 3669 3670 /* 3671 * Create an indirect block that covers one or more of the elements in the 3672 * current parent. Either returns the existing parent with no locking or 3673 * ref changes or returns the new indirect block locked and referenced 3674 * and leaving the original parent lock/ref intact as well. 3675 * 3676 * If an error occurs, NULL is returned and *errorp is set to the H2 error. 3677 * 3678 * The returned chain depends on where the specified key falls. 3679 * 3680 * The key/keybits for the indirect mode only needs to follow three rules: 3681 * 3682 * (1) That all elements underneath it fit within its key space and 3683 * 3684 * (2) That all elements outside it are outside its key space. 3685 * 3686 * (3) When creating the new indirect block any elements in the current 3687 * parent that fit within the new indirect block's keyspace must be 3688 * moved into the new indirect block. 3689 * 3690 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3691 * keyspace the the current parent, but lookup/iteration rules will 3692 * ensure (and must ensure) that rule (2) for all parents leading up 3693 * to the nearest inode or the root volume header is adhered to. This 3694 * is accomplished by always recursing through matching keyspaces in 3695 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3696 * 3697 * The current implementation calculates the current worst-case keyspace by 3698 * iterating the current parent and then divides it into two halves, choosing 3699 * whichever half has the most elements (not necessarily the half containing 3700 * the requested key). 3701 * 3702 * We can also opt to use the half with the least number of elements. This 3703 * causes lower-numbered keys (aka logical file offsets) to recurse through 3704 * fewer indirect blocks and higher-numbered keys to recurse through more. 3705 * This also has the risk of not moving enough elements to the new indirect 3706 * block and being forced to create several indirect blocks before the element 3707 * can be inserted. 3708 * 3709 * Must be called with an exclusively locked parent. 3710 * 3711 * NOTE: *errorp set to HAMMER_ERROR_* flags 3712 */ 3713 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3714 hammer2_key_t *keyp, int keybits, 3715 hammer2_blockref_t *base, int count); 3716 static int hammer2_chain_indkey_file(hammer2_chain_t *parent, 3717 hammer2_key_t *keyp, int keybits, 3718 hammer2_blockref_t *base, int count, 3719 int ncount); 3720 static int hammer2_chain_indkey_dir(hammer2_chain_t *parent, 3721 hammer2_key_t *keyp, int keybits, 3722 hammer2_blockref_t *base, int count, 3723 int ncount); 3724 static 3725 hammer2_chain_t * 3726 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3727 hammer2_key_t create_key, int create_bits, 3728 hammer2_tid_t mtid, int for_type, int *errorp) 3729 { 3730 hammer2_dev_t *hmp; 3731 hammer2_blockref_t *base; 3732 hammer2_blockref_t *bref; 3733 hammer2_blockref_t bsave; 3734 hammer2_blockref_t dummy; 3735 hammer2_chain_t *chain; 3736 hammer2_chain_t *ichain; 3737 hammer2_key_t key = create_key; 3738 hammer2_key_t key_beg; 3739 hammer2_key_t key_end; 3740 hammer2_key_t key_next; 3741 int keybits = create_bits; 3742 int count; 3743 int ncount; 3744 int nbytes; 3745 int loops; 3746 int error; 3747 int reason; 3748 int generation; 3749 int maxloops = 300000; 3750 3751 /* 3752 * Calculate the base blockref pointer or NULL if the chain 3753 * is known to be empty. We need to calculate the array count 3754 * for RB lookups either way. 3755 */ 3756 hmp = parent->hmp; 3757 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3758 3759 /* 3760 * Pre-modify the parent now to avoid having to deal with error 3761 * processing if we tried to later (in the middle of our loop). 3762 * 3763 * We are going to be moving bref's around, the indirect blocks 3764 * cannot be in an initial state. Do not pass MODIFY_OPTDATA. 3765 */ 3766 *errorp = hammer2_chain_modify(parent, mtid, 0, 0); 3767 if (*errorp) { 3768 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3769 *errorp, hammer2_error_str(*errorp)); 3770 return NULL; 3771 } 3772 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3773 3774 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3775 base = hammer2_chain_base_and_count(parent, &count); 3776 3777 /* 3778 * How big should our new indirect block be? It has to be at least 3779 * as large as its parent for splits to work properly. 3780 * 3781 * The freemap uses a specific indirect block size. The number of 3782 * levels are built dynamically and ultimately depend on the size 3783 * volume. Because freemap blocks are taken from the reserved areas 3784 * of the volume our goal is efficiency (fewer levels) and not so 3785 * much to save disk space. 3786 * 3787 * The first indirect block level for a directory usually uses 3788 * HAMMER2_IND_BYTES_MIN (4KB = 32 directory entries). Due to 3789 * the hash mechanism, this typically gives us a nominal 3790 * 32 * 4 entries with one level of indirection. 3791 * 3792 * We use HAMMER2_IND_BYTES_NOM (16KB = 128 blockrefs) for FILE 3793 * indirect blocks. The initial 4 entries in the inode gives us 3794 * 256KB. Up to 4 indirect blocks gives us 32MB. Three levels 3795 * of indirection gives us 137GB, and so forth. H2 can support 3796 * huge file sizes but they are not typical, so we try to stick 3797 * with compactness and do not use a larger indirect block size. 3798 * 3799 * We could use 64KB (PBUFSIZE), giving us 512 blockrefs, but 3800 * due to the way indirect blocks are created this usually winds 3801 * up being extremely inefficient for small files. Even though 3802 * 16KB requires more levels of indirection for very large files, 3803 * the 16KB records can be ganged together into 64KB DIOs. 3804 */ 3805 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3806 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3807 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3808 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3809 if (parent->data->ipdata.meta.type == 3810 HAMMER2_OBJTYPE_DIRECTORY) 3811 nbytes = HAMMER2_IND_BYTES_MIN; /* 4KB = 32 entries */ 3812 else 3813 nbytes = HAMMER2_IND_BYTES_NOM; /* 16KB = ~8MB file */ 3814 3815 } else { 3816 nbytes = HAMMER2_IND_BYTES_NOM; 3817 } 3818 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3819 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3820 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3821 nbytes = count * sizeof(hammer2_blockref_t); 3822 } 3823 ncount = nbytes / sizeof(hammer2_blockref_t); 3824 3825 /* 3826 * When creating an indirect block for a freemap node or leaf 3827 * the key/keybits must be fitted to static radix levels because 3828 * particular radix levels use particular reserved blocks in the 3829 * related zone. 3830 * 3831 * This routine calculates the key/radix of the indirect block 3832 * we need to create, and whether it is on the high-side or the 3833 * low-side. 3834 */ 3835 switch(for_type) { 3836 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3837 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 3838 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3839 base, count); 3840 break; 3841 case HAMMER2_BREF_TYPE_DATA: 3842 keybits = hammer2_chain_indkey_file(parent, &key, keybits, 3843 base, count, ncount); 3844 break; 3845 case HAMMER2_BREF_TYPE_DIRENT: 3846 case HAMMER2_BREF_TYPE_INODE: 3847 keybits = hammer2_chain_indkey_dir(parent, &key, keybits, 3848 base, count, ncount); 3849 break; 3850 default: 3851 panic("illegal indirect block for bref type %d", for_type); 3852 break; 3853 } 3854 3855 /* 3856 * Normalize the key for the radix being represented, keeping the 3857 * high bits and throwing away the low bits. 3858 */ 3859 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3860 3861 /* 3862 * Ok, create our new indirect block 3863 */ 3864 bzero(&dummy, sizeof(dummy)); 3865 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3866 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3867 dummy.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3868 } else { 3869 dummy.type = HAMMER2_BREF_TYPE_INDIRECT; 3870 } 3871 dummy.key = key; 3872 dummy.keybits = keybits; 3873 dummy.data_off = hammer2_getradix(nbytes); 3874 dummy.methods = 3875 HAMMER2_ENC_CHECK(HAMMER2_DEC_CHECK(parent->bref.methods)) | 3876 HAMMER2_ENC_COMP(HAMMER2_COMP_NONE); 3877 3878 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy); 3879 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3880 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3881 /* ichain has one ref at this point */ 3882 3883 /* 3884 * We have to mark it modified to allocate its block, but use 3885 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3886 * it won't be acted upon by the flush code. 3887 * 3888 * XXX remove OPTDATA, we need a fully initialized indirect block to 3889 * be able to move the original blockref. 3890 */ 3891 *errorp = hammer2_chain_modify(ichain, mtid, 0, 0); 3892 if (*errorp) { 3893 kprintf("hammer2_chain_create_indirect: error %08x %s\n", 3894 *errorp, hammer2_error_str(*errorp)); 3895 hammer2_chain_unlock(ichain); 3896 hammer2_chain_drop(ichain); 3897 return NULL; 3898 } 3899 KKASSERT((ichain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3900 3901 /* 3902 * Iterate the original parent and move the matching brefs into 3903 * the new indirect block. 3904 * 3905 * XXX handle flushes. 3906 */ 3907 key_beg = 0; 3908 key_end = HAMMER2_KEY_MAX; 3909 key_next = 0; /* avoid gcc warnings */ 3910 hammer2_spin_ex(&parent->core.spin); 3911 loops = 0; 3912 reason = 0; 3913 3914 for (;;) { 3915 /* 3916 * Parent may have been modified, relocating its block array. 3917 * Reload the base pointer. 3918 */ 3919 base = hammer2_chain_base_and_count(parent, &count); 3920 3921 if (++loops > 100000) { 3922 hammer2_spin_unex(&parent->core.spin); 3923 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3924 reason, parent, base, count, key_next); 3925 } 3926 3927 /* 3928 * NOTE: spinlock stays intact, returned chain (if not NULL) 3929 * is not referenced or locked which means that we 3930 * cannot safely check its flagged / deletion status 3931 * until we lock it. 3932 */ 3933 chain = hammer2_combined_find(parent, base, count, 3934 &key_next, 3935 key_beg, key_end, 3936 &bref); 3937 generation = parent->core.generation; 3938 if (bref == NULL) 3939 break; 3940 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3941 3942 /* 3943 * Skip keys that are not within the key/radix of the new 3944 * indirect block. They stay in the parent. 3945 */ 3946 if (rounddown2(key ^ bref->key, (hammer2_key_t)1 << keybits) != 0) { 3947 goto next_key_spinlocked; 3948 } 3949 3950 /* 3951 * Load the new indirect block by acquiring the related 3952 * chains (potentially from media as it might not be 3953 * in-memory). Then move it to the new parent (ichain). 3954 * 3955 * chain is referenced but not locked. We must lock the 3956 * chain to obtain definitive state. 3957 */ 3958 bsave = *bref; 3959 if (chain) { 3960 /* 3961 * Use chain already present in the RBTREE 3962 */ 3963 hammer2_chain_ref(chain); 3964 hammer2_spin_unex(&parent->core.spin); 3965 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3966 } else { 3967 /* 3968 * Get chain for blockref element. _get returns NULL 3969 * on insertion race. 3970 */ 3971 hammer2_spin_unex(&parent->core.spin); 3972 chain = hammer2_chain_get(parent, generation, &bsave, 3973 HAMMER2_RESOLVE_NEVER); 3974 if (chain == NULL) { 3975 reason = 1; 3976 hammer2_spin_ex(&parent->core.spin); 3977 continue; 3978 } 3979 } 3980 3981 /* 3982 * This is always live so if the chain has been deleted 3983 * we raced someone and we have to retry. 3984 * 3985 * NOTE: Lookups can race delete-duplicate because 3986 * delete-duplicate does not lock the parent's core 3987 * (they just use the spinlock on the core). 3988 * 3989 * (note reversed logic for this one) 3990 */ 3991 if (bcmp(&bsave, &chain->bref, sizeof(bsave)) || 3992 chain->parent != parent || 3993 (chain->flags & HAMMER2_CHAIN_DELETED)) { 3994 hammer2_chain_unlock(chain); 3995 hammer2_chain_drop(chain); 3996 if (hammer2_debug & 0x0040) { 3997 kprintf("LOST PARENT RETRY " 3998 "RETRY (%p,%p)->%p %08x\n", 3999 parent, chain->parent, chain, chain->flags); 4000 } 4001 hammer2_spin_ex(&parent->core.spin); 4002 continue; 4003 } 4004 4005 /* 4006 * Shift the chain to the indirect block. 4007 * 4008 * WARNING! No reason for us to load chain data, pass NOSTATS 4009 * to prevent delete/insert from trying to access 4010 * inode stats (and thus asserting if there is no 4011 * chain->data loaded). 4012 * 4013 * WARNING! The (parent, chain) deletion may modify the parent 4014 * and invalidate the base pointer. 4015 * 4016 * WARNING! Parent must already be marked modified, so we 4017 * can assume that chain_delete always suceeds. 4018 * 4019 * WARNING! hammer2_chain_repchange() does not have to be 4020 * called (and doesn't work anyway because we are 4021 * only doing a partial shift). A recursion that is 4022 * in-progress can continue at the current parent 4023 * and will be able to properly find its next key. 4024 */ 4025 error = hammer2_chain_delete_obref(parent, chain, mtid, 0, 4026 &bsave); 4027 KKASSERT(error == 0); 4028 hammer2_chain_rename_obref(&ichain, chain, mtid, 0, &bsave); 4029 hammer2_chain_unlock(chain); 4030 hammer2_chain_drop(chain); 4031 KKASSERT(parent->refs > 0); 4032 chain = NULL; 4033 base = NULL; /* safety */ 4034 hammer2_spin_ex(&parent->core.spin); 4035 next_key_spinlocked: 4036 if (--maxloops == 0) 4037 panic("hammer2_chain_create_indirect: maxloops"); 4038 reason = 4; 4039 if (key_next == 0 || key_next > key_end) 4040 break; 4041 key_beg = key_next; 4042 /* loop */ 4043 } 4044 hammer2_spin_unex(&parent->core.spin); 4045 4046 /* 4047 * Insert the new indirect block into the parent now that we've 4048 * cleared out some entries in the parent. We calculated a good 4049 * insertion index in the loop above (ichain->index). 4050 * 4051 * We don't have to set UPDATE here because we mark ichain 4052 * modified down below (so the normal modified -> flush -> set-moved 4053 * sequence applies). 4054 * 4055 * The insertion shouldn't race as this is a completely new block 4056 * and the parent is locked. 4057 */ 4058 base = NULL; /* safety, parent modify may change address */ 4059 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 4060 KKASSERT(parent->core.live_count < count); 4061 hammer2_chain_insert(parent, ichain, 4062 HAMMER2_CHAIN_INSERT_SPIN | 4063 HAMMER2_CHAIN_INSERT_LIVE, 4064 0); 4065 4066 /* 4067 * Make sure flushes propogate after our manual insertion. 4068 */ 4069 hammer2_chain_setflush(ichain); 4070 hammer2_chain_setflush(parent); 4071 4072 /* 4073 * Figure out what to return. 4074 */ 4075 if (rounddown2(create_key ^ key, (hammer2_key_t)1 << keybits) != 0) { 4076 /* 4077 * Key being created is outside the key range, 4078 * return the original parent. 4079 */ 4080 hammer2_chain_unlock(ichain); 4081 hammer2_chain_drop(ichain); 4082 } else { 4083 /* 4084 * Otherwise its in the range, return the new parent. 4085 * (leave both the new and old parent locked). 4086 */ 4087 parent = ichain; 4088 } 4089 4090 return(parent); 4091 } 4092 4093 /* 4094 * Do maintenance on an indirect chain. Both parent and chain are locked. 4095 * 4096 * Returns non-zero if (chain) is deleted, either due to being empty or 4097 * because its children were safely moved into the parent. 4098 */ 4099 int 4100 hammer2_chain_indirect_maintenance(hammer2_chain_t *parent, 4101 hammer2_chain_t *chain) 4102 { 4103 hammer2_blockref_t *chain_base; 4104 hammer2_blockref_t *base; 4105 hammer2_blockref_t *bref; 4106 hammer2_blockref_t bsave; 4107 hammer2_key_t key_next; 4108 hammer2_key_t key_beg; 4109 hammer2_key_t key_end; 4110 hammer2_chain_t *sub; 4111 int chain_count; 4112 int count; 4113 int error; 4114 int generation; 4115 4116 /* 4117 * Make sure we have an accurate live_count 4118 */ 4119 if ((chain->flags & (HAMMER2_CHAIN_INITIAL | 4120 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4121 base = &chain->data->npdata[0]; 4122 count = chain->bytes / sizeof(hammer2_blockref_t); 4123 hammer2_chain_countbrefs(chain, base, count); 4124 } 4125 4126 /* 4127 * If the indirect block is empty we can delete it. 4128 * (ignore deletion error) 4129 */ 4130 if (chain->core.live_count == 0 && RB_EMPTY(&chain->core.rbtree)) { 4131 hammer2_chain_delete(parent, chain, 4132 chain->bref.modify_tid, 4133 HAMMER2_DELETE_PERMANENT); 4134 hammer2_chain_repchange(parent, chain); 4135 return 1; 4136 } 4137 4138 base = hammer2_chain_base_and_count(parent, &count); 4139 4140 if ((parent->flags & (HAMMER2_CHAIN_INITIAL | 4141 HAMMER2_CHAIN_COUNTEDBREFS)) == 0) { 4142 hammer2_chain_countbrefs(parent, base, count); 4143 } 4144 4145 /* 4146 * Determine if we can collapse chain into parent, calculate 4147 * hysteresis for chain emptiness. 4148 */ 4149 if (parent->core.live_count + chain->core.live_count - 1 > count) 4150 return 0; 4151 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4152 if (chain->core.live_count > chain_count * 3 / 4) 4153 return 0; 4154 4155 /* 4156 * Ok, theoretically we can collapse chain's contents into 4157 * parent. chain is locked, but any in-memory children of chain 4158 * are not. For this to work, we must be able to dispose of any 4159 * in-memory children of chain. 4160 * 4161 * For now require that there are no in-memory children of chain. 4162 * 4163 * WARNING! Both chain and parent must remain locked across this 4164 * entire operation. 4165 */ 4166 4167 /* 4168 * Parent must be marked modified. Don't try to collapse it if we 4169 * can't mark it modified. Once modified, destroy chain to make room 4170 * and to get rid of what will be a conflicting key (this is included 4171 * in the calculation above). Finally, move the children of chain 4172 * into chain's parent. 4173 * 4174 * This order creates an accounting problem for bref.embed.stats 4175 * because we destroy chain before we remove its children. Any 4176 * elements whos blockref is already synchronized will be counted 4177 * twice. To deal with the problem we clean out chain's stats prior 4178 * to deleting it. 4179 */ 4180 error = hammer2_chain_modify(parent, 0, 0, 0); 4181 if (error) { 4182 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4183 hammer2_error_str(error)); 4184 return 0; 4185 } 4186 error = hammer2_chain_modify(chain, chain->bref.modify_tid, 0, 0); 4187 if (error) { 4188 krateprintf(&krate_h2me, "hammer2: indirect_maint: %s\n", 4189 hammer2_error_str(error)); 4190 return 0; 4191 } 4192 4193 chain->bref.embed.stats.inode_count = 0; 4194 chain->bref.embed.stats.data_count = 0; 4195 error = hammer2_chain_delete(parent, chain, 4196 chain->bref.modify_tid, 4197 HAMMER2_DELETE_PERMANENT); 4198 KKASSERT(error == 0); 4199 4200 /* 4201 * The combined_find call requires core.spin to be held. One would 4202 * think there wouldn't be any conflicts since we hold chain 4203 * exclusively locked, but the caching mechanism for 0-ref children 4204 * does not require a chain lock. 4205 */ 4206 hammer2_spin_ex(&chain->core.spin); 4207 4208 key_next = 0; 4209 key_beg = 0; 4210 key_end = HAMMER2_KEY_MAX; 4211 for (;;) { 4212 chain_base = &chain->data->npdata[0]; 4213 chain_count = chain->bytes / sizeof(hammer2_blockref_t); 4214 sub = hammer2_combined_find(chain, chain_base, chain_count, 4215 &key_next, 4216 key_beg, key_end, 4217 &bref); 4218 generation = chain->core.generation; 4219 if (bref == NULL) 4220 break; 4221 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4222 4223 bsave = *bref; 4224 if (sub) { 4225 hammer2_chain_ref(sub); 4226 hammer2_spin_unex(&chain->core.spin); 4227 hammer2_chain_lock(sub, HAMMER2_RESOLVE_NEVER); 4228 } else { 4229 hammer2_spin_unex(&chain->core.spin); 4230 sub = hammer2_chain_get(chain, generation, &bsave, 4231 HAMMER2_RESOLVE_NEVER); 4232 if (sub == NULL) { 4233 hammer2_spin_ex(&chain->core.spin); 4234 continue; 4235 } 4236 } 4237 if (bcmp(&bsave, &sub->bref, sizeof(bsave)) || 4238 sub->parent != chain || 4239 (sub->flags & HAMMER2_CHAIN_DELETED)) { 4240 hammer2_chain_unlock(sub); 4241 hammer2_chain_drop(sub); 4242 hammer2_spin_ex(&chain->core.spin); 4243 sub = NULL; /* safety */ 4244 continue; 4245 } 4246 error = hammer2_chain_delete_obref(chain, sub, 4247 sub->bref.modify_tid, 0, 4248 &bsave); 4249 KKASSERT(error == 0); 4250 hammer2_chain_rename_obref(&parent, sub, 4251 sub->bref.modify_tid, 4252 HAMMER2_INSERT_SAMEPARENT, &bsave); 4253 hammer2_chain_unlock(sub); 4254 hammer2_chain_drop(sub); 4255 hammer2_spin_ex(&chain->core.spin); 4256 4257 if (key_next == 0) 4258 break; 4259 key_beg = key_next; 4260 } 4261 hammer2_spin_unex(&chain->core.spin); 4262 4263 hammer2_chain_repchange(parent, chain); 4264 4265 return 1; 4266 } 4267 4268 /* 4269 * Freemap indirect blocks 4270 * 4271 * Calculate the keybits and highside/lowside of the freemap node the 4272 * caller is creating. 4273 * 4274 * This routine will specify the next higher-level freemap key/radix 4275 * representing the lowest-ordered set. By doing so, eventually all 4276 * low-ordered sets will be moved one level down. 4277 * 4278 * We have to be careful here because the freemap reserves a limited 4279 * number of blocks for a limited number of levels. So we can't just 4280 * push indiscriminately. 4281 */ 4282 int 4283 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 4284 int keybits, hammer2_blockref_t *base, int count) 4285 { 4286 hammer2_chain_t *chain; 4287 hammer2_blockref_t *bref; 4288 hammer2_key_t key; 4289 hammer2_key_t key_beg; 4290 hammer2_key_t key_end; 4291 hammer2_key_t key_next; 4292 int maxloops = 300000; 4293 4294 key = *keyp; 4295 keybits = 64; 4296 4297 /* 4298 * Calculate the range of keys in the array being careful to skip 4299 * slots which are overridden with a deletion. 4300 */ 4301 key_beg = 0; 4302 key_end = HAMMER2_KEY_MAX; 4303 hammer2_spin_ex(&parent->core.spin); 4304 4305 for (;;) { 4306 if (--maxloops == 0) { 4307 panic("indkey_freemap shit %p %p:%d\n", 4308 parent, base, count); 4309 } 4310 chain = hammer2_combined_find(parent, base, count, 4311 &key_next, 4312 key_beg, key_end, 4313 &bref); 4314 4315 /* 4316 * Exhausted search 4317 */ 4318 if (bref == NULL) 4319 break; 4320 4321 /* 4322 * Skip deleted chains. 4323 */ 4324 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4325 if (key_next == 0 || key_next > key_end) 4326 break; 4327 key_beg = key_next; 4328 continue; 4329 } 4330 4331 /* 4332 * Use the full live (not deleted) element for the scan 4333 * iteration. HAMMER2 does not allow partial replacements. 4334 * 4335 * XXX should be built into hammer2_combined_find(). 4336 */ 4337 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4338 4339 if (keybits > bref->keybits) { 4340 key = bref->key; 4341 keybits = bref->keybits; 4342 } else if (keybits == bref->keybits && bref->key < key) { 4343 key = bref->key; 4344 } 4345 if (key_next == 0) 4346 break; 4347 key_beg = key_next; 4348 } 4349 hammer2_spin_unex(&parent->core.spin); 4350 4351 /* 4352 * Return the keybits for a higher-level FREEMAP_NODE covering 4353 * this node. 4354 */ 4355 switch(keybits) { 4356 case HAMMER2_FREEMAP_LEVEL0_RADIX: 4357 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 4358 break; 4359 case HAMMER2_FREEMAP_LEVEL1_RADIX: 4360 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 4361 break; 4362 case HAMMER2_FREEMAP_LEVEL2_RADIX: 4363 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 4364 break; 4365 case HAMMER2_FREEMAP_LEVEL3_RADIX: 4366 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 4367 break; 4368 case HAMMER2_FREEMAP_LEVEL4_RADIX: 4369 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 4370 break; 4371 case HAMMER2_FREEMAP_LEVEL5_RADIX: 4372 panic("hammer2_chain_indkey_freemap: level too high"); 4373 break; 4374 default: 4375 panic("hammer2_chain_indkey_freemap: bad radix"); 4376 break; 4377 } 4378 *keyp = key; 4379 4380 return (keybits); 4381 } 4382 4383 /* 4384 * File indirect blocks 4385 * 4386 * Calculate the key/keybits for the indirect block to create by scanning 4387 * existing keys. The key being created is also passed in *keyp and can be 4388 * inside or outside the indirect block. Regardless, the indirect block 4389 * must hold at least two keys in order to guarantee sufficient space. 4390 * 4391 * We use a modified version of the freemap's fixed radix tree, but taylored 4392 * for file data. Basically we configure an indirect block encompassing the 4393 * smallest key. 4394 */ 4395 static int 4396 hammer2_chain_indkey_file(hammer2_chain_t *parent, hammer2_key_t *keyp, 4397 int keybits, hammer2_blockref_t *base, int count, 4398 int ncount) 4399 { 4400 hammer2_chain_t *chain; 4401 hammer2_blockref_t *bref; 4402 hammer2_key_t key; 4403 hammer2_key_t key_beg; 4404 hammer2_key_t key_end; 4405 hammer2_key_t key_next; 4406 int nradix; 4407 int maxloops = 300000; 4408 4409 key = *keyp; 4410 keybits = 64; 4411 4412 /* 4413 * Calculate the range of keys in the array being careful to skip 4414 * slots which are overridden with a deletion. 4415 * 4416 * Locate the smallest key. 4417 */ 4418 key_beg = 0; 4419 key_end = HAMMER2_KEY_MAX; 4420 hammer2_spin_ex(&parent->core.spin); 4421 4422 for (;;) { 4423 if (--maxloops == 0) { 4424 panic("indkey_freemap shit %p %p:%d\n", 4425 parent, base, count); 4426 } 4427 chain = hammer2_combined_find(parent, base, count, 4428 &key_next, 4429 key_beg, key_end, 4430 &bref); 4431 4432 /* 4433 * Exhausted search 4434 */ 4435 if (bref == NULL) 4436 break; 4437 4438 /* 4439 * Skip deleted chains. 4440 */ 4441 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4442 if (key_next == 0 || key_next > key_end) 4443 break; 4444 key_beg = key_next; 4445 continue; 4446 } 4447 4448 /* 4449 * Use the full live (not deleted) element for the scan 4450 * iteration. HAMMER2 does not allow partial replacements. 4451 * 4452 * XXX should be built into hammer2_combined_find(). 4453 */ 4454 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4455 4456 if (keybits > bref->keybits) { 4457 key = bref->key; 4458 keybits = bref->keybits; 4459 } else if (keybits == bref->keybits && bref->key < key) { 4460 key = bref->key; 4461 } 4462 if (key_next == 0) 4463 break; 4464 key_beg = key_next; 4465 } 4466 hammer2_spin_unex(&parent->core.spin); 4467 4468 /* 4469 * Calculate the static keybits for a higher-level indirect block 4470 * that contains the key. 4471 */ 4472 *keyp = key; 4473 4474 switch(ncount) { 4475 case HAMMER2_IND_COUNT_MIN: 4476 nradix = HAMMER2_IND_RADIX_MIN - HAMMER2_BLOCKREF_RADIX; 4477 break; 4478 case HAMMER2_IND_COUNT_NOM: 4479 nradix = HAMMER2_IND_RADIX_NOM - HAMMER2_BLOCKREF_RADIX; 4480 break; 4481 case HAMMER2_IND_COUNT_MAX: 4482 nradix = HAMMER2_IND_RADIX_MAX - HAMMER2_BLOCKREF_RADIX; 4483 break; 4484 default: 4485 panic("bad ncount %d\n", ncount); 4486 nradix = 0; 4487 break; 4488 } 4489 4490 /* 4491 * The largest radix that can be returned for an indirect block is 4492 * 63 bits. (The largest practical indirect block radix is actually 4493 * 62 bits because the top-level inode or volume root contains four 4494 * entries, but allow 63 to be returned). 4495 */ 4496 if (nradix >= 64) 4497 nradix = 63; 4498 4499 return keybits + nradix; 4500 } 4501 4502 #if 1 4503 4504 /* 4505 * Directory indirect blocks. 4506 * 4507 * Covers both the inode index (directory of inodes), and directory contents 4508 * (filenames hardlinked to inodes). 4509 * 4510 * Because directory keys are hashed we generally try to cut the space in 4511 * half. We accomodate the inode index (which tends to have linearly 4512 * increasing inode numbers) by ensuring that the keyspace is at least large 4513 * enough to fill up the indirect block being created. 4514 */ 4515 static int 4516 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4517 int keybits, hammer2_blockref_t *base, int count, 4518 int ncount) 4519 { 4520 hammer2_blockref_t *bref; 4521 hammer2_chain_t *chain; 4522 hammer2_key_t key_beg; 4523 hammer2_key_t key_end; 4524 hammer2_key_t key_next; 4525 hammer2_key_t key; 4526 int nkeybits; 4527 int locount; 4528 int hicount; 4529 int maxloops = 300000; 4530 4531 /* 4532 * NOTE: We can't take a shortcut here anymore for inodes because 4533 * the root directory can contain a mix of inodes and directory 4534 * entries (we used to just return 63 if parent->bref.type was 4535 * HAMMER2_BREF_TYPE_INODE. 4536 */ 4537 key = *keyp; 4538 locount = 0; 4539 hicount = 0; 4540 4541 /* 4542 * Calculate the range of keys in the array being careful to skip 4543 * slots which are overridden with a deletion. 4544 */ 4545 key_beg = 0; 4546 key_end = HAMMER2_KEY_MAX; 4547 hammer2_spin_ex(&parent->core.spin); 4548 4549 for (;;) { 4550 if (--maxloops == 0) { 4551 panic("indkey_freemap shit %p %p:%d\n", 4552 parent, base, count); 4553 } 4554 chain = hammer2_combined_find(parent, base, count, 4555 &key_next, 4556 key_beg, key_end, 4557 &bref); 4558 4559 /* 4560 * Exhausted search 4561 */ 4562 if (bref == NULL) 4563 break; 4564 4565 /* 4566 * Deleted object 4567 */ 4568 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4569 if (key_next == 0 || key_next > key_end) 4570 break; 4571 key_beg = key_next; 4572 continue; 4573 } 4574 4575 /* 4576 * Use the full live (not deleted) element for the scan 4577 * iteration. HAMMER2 does not allow partial replacements. 4578 * 4579 * XXX should be built into hammer2_combined_find(). 4580 */ 4581 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4582 4583 /* 4584 * Expand our calculated key range (key, keybits) to fit 4585 * the scanned key. nkeybits represents the full range 4586 * that we will later cut in half (two halves @ nkeybits - 1). 4587 */ 4588 nkeybits = keybits; 4589 if (nkeybits < bref->keybits) { 4590 if (bref->keybits > 64) { 4591 kprintf("bad bref chain %p bref %p\n", 4592 chain, bref); 4593 Debugger("fubar"); 4594 } 4595 nkeybits = bref->keybits; 4596 } 4597 while (nkeybits < 64 && 4598 rounddown2(key ^ bref->key, (hammer2_key_t)1 << nkeybits) != 0) { 4599 ++nkeybits; 4600 } 4601 4602 /* 4603 * If the new key range is larger we have to determine 4604 * which side of the new key range the existing keys fall 4605 * under by checking the high bit, then collapsing the 4606 * locount into the hicount or vise-versa. 4607 */ 4608 if (keybits != nkeybits) { 4609 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4610 hicount += locount; 4611 locount = 0; 4612 } else { 4613 locount += hicount; 4614 hicount = 0; 4615 } 4616 keybits = nkeybits; 4617 } 4618 4619 /* 4620 * The newly scanned key will be in the lower half or the 4621 * upper half of the (new) key range. 4622 */ 4623 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4624 ++hicount; 4625 else 4626 ++locount; 4627 4628 if (key_next == 0) 4629 break; 4630 key_beg = key_next; 4631 } 4632 hammer2_spin_unex(&parent->core.spin); 4633 bref = NULL; /* now invalid (safety) */ 4634 4635 /* 4636 * Adjust keybits to represent half of the full range calculated 4637 * above (radix 63 max) for our new indirect block. 4638 */ 4639 --keybits; 4640 4641 /* 4642 * Expand keybits to hold at least ncount elements. ncount will be 4643 * a power of 2. This is to try to completely fill leaf nodes (at 4644 * least for keys which are not hashes). 4645 * 4646 * We aren't counting 'in' or 'out', we are counting 'high side' 4647 * and 'low side' based on the bit at (1LL << keybits). We want 4648 * everything to be inside in these cases so shift it all to 4649 * the low or high side depending on the new high bit. 4650 */ 4651 while (((hammer2_key_t)1 << keybits) < ncount) { 4652 ++keybits; 4653 if (key & ((hammer2_key_t)1 << keybits)) { 4654 hicount += locount; 4655 locount = 0; 4656 } else { 4657 locount += hicount; 4658 hicount = 0; 4659 } 4660 } 4661 4662 if (hicount > locount) 4663 key |= (hammer2_key_t)1 << keybits; 4664 else 4665 key &= ~(hammer2_key_t)1 << keybits; 4666 4667 *keyp = key; 4668 4669 return (keybits); 4670 } 4671 4672 #else 4673 4674 /* 4675 * Directory indirect blocks. 4676 * 4677 * Covers both the inode index (directory of inodes), and directory contents 4678 * (filenames hardlinked to inodes). 4679 * 4680 * Because directory keys are hashed we generally try to cut the space in 4681 * half. We accomodate the inode index (which tends to have linearly 4682 * increasing inode numbers) by ensuring that the keyspace is at least large 4683 * enough to fill up the indirect block being created. 4684 */ 4685 static int 4686 hammer2_chain_indkey_dir(hammer2_chain_t *parent, hammer2_key_t *keyp, 4687 int keybits, hammer2_blockref_t *base, int count, 4688 int ncount) 4689 { 4690 hammer2_blockref_t *bref; 4691 hammer2_chain_t *chain; 4692 hammer2_key_t key_beg; 4693 hammer2_key_t key_end; 4694 hammer2_key_t key_next; 4695 hammer2_key_t key; 4696 int nkeybits; 4697 int locount; 4698 int hicount; 4699 int maxloops = 300000; 4700 4701 /* 4702 * Shortcut if the parent is the inode. In this situation the 4703 * parent has 4+1 directory entries and we are creating an indirect 4704 * block capable of holding many more. 4705 */ 4706 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4707 return 63; 4708 } 4709 4710 key = *keyp; 4711 locount = 0; 4712 hicount = 0; 4713 4714 /* 4715 * Calculate the range of keys in the array being careful to skip 4716 * slots which are overridden with a deletion. 4717 */ 4718 key_beg = 0; 4719 key_end = HAMMER2_KEY_MAX; 4720 hammer2_spin_ex(&parent->core.spin); 4721 4722 for (;;) { 4723 if (--maxloops == 0) { 4724 panic("indkey_freemap shit %p %p:%d\n", 4725 parent, base, count); 4726 } 4727 chain = hammer2_combined_find(parent, base, count, 4728 &key_next, 4729 key_beg, key_end, 4730 &bref); 4731 4732 /* 4733 * Exhausted search 4734 */ 4735 if (bref == NULL) 4736 break; 4737 4738 /* 4739 * Deleted object 4740 */ 4741 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 4742 if (key_next == 0 || key_next > key_end) 4743 break; 4744 key_beg = key_next; 4745 continue; 4746 } 4747 4748 /* 4749 * Use the full live (not deleted) element for the scan 4750 * iteration. HAMMER2 does not allow partial replacements. 4751 * 4752 * XXX should be built into hammer2_combined_find(). 4753 */ 4754 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 4755 4756 /* 4757 * Expand our calculated key range (key, keybits) to fit 4758 * the scanned key. nkeybits represents the full range 4759 * that we will later cut in half (two halves @ nkeybits - 1). 4760 */ 4761 nkeybits = keybits; 4762 if (nkeybits < bref->keybits) { 4763 if (bref->keybits > 64) { 4764 kprintf("bad bref chain %p bref %p\n", 4765 chain, bref); 4766 Debugger("fubar"); 4767 } 4768 nkeybits = bref->keybits; 4769 } 4770 while (nkeybits < 64 && 4771 (~(((hammer2_key_t)1 << nkeybits) - 1) & 4772 (key ^ bref->key)) != 0) { 4773 ++nkeybits; 4774 } 4775 4776 /* 4777 * If the new key range is larger we have to determine 4778 * which side of the new key range the existing keys fall 4779 * under by checking the high bit, then collapsing the 4780 * locount into the hicount or vise-versa. 4781 */ 4782 if (keybits != nkeybits) { 4783 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 4784 hicount += locount; 4785 locount = 0; 4786 } else { 4787 locount += hicount; 4788 hicount = 0; 4789 } 4790 keybits = nkeybits; 4791 } 4792 4793 /* 4794 * The newly scanned key will be in the lower half or the 4795 * upper half of the (new) key range. 4796 */ 4797 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 4798 ++hicount; 4799 else 4800 ++locount; 4801 4802 if (key_next == 0) 4803 break; 4804 key_beg = key_next; 4805 } 4806 hammer2_spin_unex(&parent->core.spin); 4807 bref = NULL; /* now invalid (safety) */ 4808 4809 /* 4810 * Adjust keybits to represent half of the full range calculated 4811 * above (radix 63 max) for our new indirect block. 4812 */ 4813 --keybits; 4814 4815 /* 4816 * Expand keybits to hold at least ncount elements. ncount will be 4817 * a power of 2. This is to try to completely fill leaf nodes (at 4818 * least for keys which are not hashes). 4819 * 4820 * We aren't counting 'in' or 'out', we are counting 'high side' 4821 * and 'low side' based on the bit at (1LL << keybits). We want 4822 * everything to be inside in these cases so shift it all to 4823 * the low or high side depending on the new high bit. 4824 */ 4825 while (((hammer2_key_t)1 << keybits) < ncount) { 4826 ++keybits; 4827 if (key & ((hammer2_key_t)1 << keybits)) { 4828 hicount += locount; 4829 locount = 0; 4830 } else { 4831 locount += hicount; 4832 hicount = 0; 4833 } 4834 } 4835 4836 if (hicount > locount) 4837 key |= (hammer2_key_t)1 << keybits; 4838 else 4839 key &= ~(hammer2_key_t)1 << keybits; 4840 4841 *keyp = key; 4842 4843 return (keybits); 4844 } 4845 4846 #endif 4847 4848 /* 4849 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 4850 * it exists. 4851 * 4852 * Both parent and chain must be locked exclusively. 4853 * 4854 * This function will modify the parent if the blockref requires removal 4855 * from the parent's block table. 4856 * 4857 * This function is NOT recursive. Any entity already pushed into the 4858 * chain (such as an inode) may still need visibility into its contents, 4859 * as well as the ability to read and modify the contents. For example, 4860 * for an unlinked file which is still open. 4861 * 4862 * Also note that the flusher is responsible for cleaning up empty 4863 * indirect blocks. 4864 */ 4865 int 4866 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 4867 hammer2_tid_t mtid, int flags) 4868 { 4869 int error = 0; 4870 4871 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4872 4873 /* 4874 * Nothing to do if already marked. 4875 * 4876 * We need the spinlock on the core whos RBTREE contains chain 4877 * to protect against races. 4878 */ 4879 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4880 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4881 chain->parent == parent); 4882 error = _hammer2_chain_delete_helper(parent, chain, 4883 mtid, flags, NULL); 4884 } 4885 4886 /* 4887 * Permanent deletions mark the chain as destroyed. 4888 * 4889 * NOTE: We do not setflush the chain unless the deletion is 4890 * permanent, since the deletion of a chain does not actually 4891 * require it to be flushed. 4892 */ 4893 if (error == 0) { 4894 if (flags & HAMMER2_DELETE_PERMANENT) { 4895 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4896 hammer2_chain_setflush(chain); 4897 } 4898 } 4899 4900 return error; 4901 } 4902 4903 static int 4904 hammer2_chain_delete_obref(hammer2_chain_t *parent, hammer2_chain_t *chain, 4905 hammer2_tid_t mtid, int flags, 4906 hammer2_blockref_t *obref) 4907 { 4908 int error = 0; 4909 4910 KKASSERT(hammer2_mtx_owned(&chain->lock)); 4911 4912 /* 4913 * Nothing to do if already marked. 4914 * 4915 * We need the spinlock on the core whos RBTREE contains chain 4916 * to protect against races. 4917 */ 4918 obref->type = HAMMER2_BREF_TYPE_EMPTY; 4919 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 4920 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 4921 chain->parent == parent); 4922 error = _hammer2_chain_delete_helper(parent, chain, 4923 mtid, flags, obref); 4924 } 4925 4926 /* 4927 * Permanent deletions mark the chain as destroyed. 4928 * 4929 * NOTE: We do not setflush the chain unless the deletion is 4930 * permanent, since the deletion of a chain does not actually 4931 * require it to be flushed. 4932 */ 4933 if (error == 0) { 4934 if (flags & HAMMER2_DELETE_PERMANENT) { 4935 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 4936 hammer2_chain_setflush(chain); 4937 } 4938 } 4939 4940 return error; 4941 } 4942 4943 /* 4944 * Returns the index of the nearest element in the blockref array >= elm. 4945 * Returns (count) if no element could be found. 4946 * 4947 * Sets *key_nextp to the next key for loop purposes but does not modify 4948 * it if the next key would be higher than the current value of *key_nextp. 4949 * Note that *key_nexp can overflow to 0, which should be tested by the 4950 * caller. 4951 * 4952 * WARNING! Must be called with parent's spinlock held. Spinlock remains 4953 * held through the operation. 4954 */ 4955 static int 4956 hammer2_base_find(hammer2_chain_t *parent, 4957 hammer2_blockref_t *base, int count, 4958 hammer2_key_t *key_nextp, 4959 hammer2_key_t key_beg, hammer2_key_t key_end) 4960 { 4961 hammer2_blockref_t *scan; 4962 hammer2_key_t scan_end; 4963 int i; 4964 int limit; 4965 4966 /* 4967 * Require the live chain's already have their core's counted 4968 * so we can optimize operations. 4969 */ 4970 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 4971 4972 /* 4973 * Degenerate case 4974 */ 4975 if (count == 0 || base == NULL) 4976 return(count); 4977 4978 /* 4979 * Sequential optimization using parent->cache_index. This is 4980 * the most likely scenario. 4981 * 4982 * We can avoid trailing empty entries on live chains, otherwise 4983 * we might have to check the whole block array. 4984 */ 4985 i = parent->cache_index; /* SMP RACE OK */ 4986 cpu_ccfence(); 4987 limit = parent->core.live_zero; 4988 if (i >= limit) 4989 i = limit - 1; 4990 if (i < 0) 4991 i = 0; 4992 KKASSERT(i < count); 4993 4994 /* 4995 * Search backwards 4996 */ 4997 scan = &base[i]; 4998 while (i > 0 && (scan->type == HAMMER2_BREF_TYPE_EMPTY || 4999 scan->key > key_beg)) { 5000 --scan; 5001 --i; 5002 } 5003 parent->cache_index = i; 5004 5005 /* 5006 * Search forwards, stop when we find a scan element which 5007 * encloses the key or until we know that there are no further 5008 * elements. 5009 */ 5010 while (i < count) { 5011 if (scan->type != HAMMER2_BREF_TYPE_EMPTY) { 5012 scan_end = scan->key + 5013 ((hammer2_key_t)1 << scan->keybits) - 1; 5014 if (scan->key > key_beg || scan_end >= key_beg) 5015 break; 5016 } 5017 if (i >= limit) 5018 return (count); 5019 ++scan; 5020 ++i; 5021 } 5022 if (i != count) { 5023 parent->cache_index = i; 5024 if (i >= limit) { 5025 i = count; 5026 } else { 5027 scan_end = scan->key + 5028 ((hammer2_key_t)1 << scan->keybits); 5029 if (scan_end && (*key_nextp > scan_end || 5030 *key_nextp == 0)) { 5031 *key_nextp = scan_end; 5032 } 5033 } 5034 } 5035 return (i); 5036 } 5037 5038 /* 5039 * Do a combined search and return the next match either from the blockref 5040 * array or from the in-memory chain. Sets *brefp to the returned bref in 5041 * both cases, or sets it to NULL if the search exhausted. Only returns 5042 * a non-NULL chain if the search matched from the in-memory chain. 5043 * 5044 * When no in-memory chain has been found and a non-NULL bref is returned 5045 * in *brefp. 5046 * 5047 * 5048 * The returned chain is not locked or referenced. Use the returned bref 5049 * to determine if the search exhausted or not. Iterate if the base find 5050 * is chosen but matches a deleted chain. 5051 * 5052 * WARNING! Must be called with parent's spinlock held. Spinlock remains 5053 * held through the operation. 5054 */ 5055 static hammer2_chain_t * 5056 hammer2_combined_find(hammer2_chain_t *parent, 5057 hammer2_blockref_t *base, int count, 5058 hammer2_key_t *key_nextp, 5059 hammer2_key_t key_beg, hammer2_key_t key_end, 5060 hammer2_blockref_t **brefp) 5061 { 5062 hammer2_blockref_t *bref; 5063 hammer2_chain_t *chain; 5064 int i; 5065 5066 /* 5067 * Lookup in block array and in rbtree. 5068 */ 5069 *key_nextp = key_end + 1; 5070 i = hammer2_base_find(parent, base, count, key_nextp, 5071 key_beg, key_end); 5072 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 5073 5074 /* 5075 * Neither matched 5076 */ 5077 if (i == count && chain == NULL) { 5078 *brefp = NULL; 5079 return(NULL); 5080 } 5081 5082 /* 5083 * Only chain matched. 5084 */ 5085 if (i == count) { 5086 bref = &chain->bref; 5087 goto found; 5088 } 5089 5090 /* 5091 * Only blockref matched. 5092 */ 5093 if (chain == NULL) { 5094 bref = &base[i]; 5095 goto found; 5096 } 5097 5098 /* 5099 * Both in-memory and blockref matched, select the nearer element. 5100 * 5101 * If both are flush with the left-hand side or both are the 5102 * same distance away, select the chain. In this situation the 5103 * chain must have been loaded from the matching blockmap. 5104 */ 5105 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 5106 chain->bref.key == base[i].key) { 5107 KKASSERT(chain->bref.key == base[i].key); 5108 bref = &chain->bref; 5109 goto found; 5110 } 5111 5112 /* 5113 * Select the nearer key 5114 */ 5115 if (chain->bref.key < base[i].key) { 5116 bref = &chain->bref; 5117 } else { 5118 bref = &base[i]; 5119 chain = NULL; 5120 } 5121 5122 /* 5123 * If the bref is out of bounds we've exhausted our search. 5124 */ 5125 found: 5126 if (bref->key > key_end) { 5127 *brefp = NULL; 5128 chain = NULL; 5129 } else { 5130 *brefp = bref; 5131 } 5132 return(chain); 5133 } 5134 5135 /* 5136 * Locate the specified block array element and delete it. The element 5137 * must exist. 5138 * 5139 * The spin lock on the related chain must be held. 5140 * 5141 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5142 * need to be adjusted when we commit the media change. 5143 */ 5144 void 5145 hammer2_base_delete(hammer2_chain_t *parent, 5146 hammer2_blockref_t *base, int count, 5147 hammer2_chain_t *chain, 5148 hammer2_blockref_t *obref) 5149 { 5150 hammer2_blockref_t *elm = &chain->bref; 5151 hammer2_blockref_t *scan; 5152 hammer2_key_t key_next; 5153 int i; 5154 5155 /* 5156 * Delete element. Expect the element to exist. 5157 * 5158 * XXX see caller, flush code not yet sophisticated enough to prevent 5159 * re-flushed in some cases. 5160 */ 5161 key_next = 0; /* max range */ 5162 i = hammer2_base_find(parent, base, count, &key_next, 5163 elm->key, elm->key); 5164 scan = &base[i]; 5165 if (i == count || scan->type == HAMMER2_BREF_TYPE_EMPTY || 5166 scan->key != elm->key || 5167 ((chain->flags & HAMMER2_CHAIN_BLKMAPUPD) == 0 && 5168 scan->keybits != elm->keybits)) { 5169 hammer2_spin_unex(&parent->core.spin); 5170 panic("delete base %p element not found at %d/%d elm %p\n", 5171 base, i, count, elm); 5172 return; 5173 } 5174 5175 /* 5176 * Update stats and zero the entry. 5177 * 5178 * NOTE: Handle radix == 0 (0 bytes) case. 5179 */ 5180 if ((int)(scan->data_off & HAMMER2_OFF_MASK_RADIX)) { 5181 parent->bref.embed.stats.data_count -= (hammer2_off_t)1 << 5182 (int)(scan->data_off & HAMMER2_OFF_MASK_RADIX); 5183 } 5184 switch(scan->type) { 5185 case HAMMER2_BREF_TYPE_INODE: 5186 --parent->bref.embed.stats.inode_count; 5187 /* fall through */ 5188 case HAMMER2_BREF_TYPE_DATA: 5189 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5190 atomic_set_int(&chain->flags, 5191 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5192 } else { 5193 if (parent->bref.leaf_count) 5194 --parent->bref.leaf_count; 5195 } 5196 /* fall through */ 5197 case HAMMER2_BREF_TYPE_INDIRECT: 5198 if (scan->type != HAMMER2_BREF_TYPE_DATA) { 5199 parent->bref.embed.stats.data_count -= 5200 scan->embed.stats.data_count; 5201 parent->bref.embed.stats.inode_count -= 5202 scan->embed.stats.inode_count; 5203 } 5204 if (scan->type == HAMMER2_BREF_TYPE_INODE) 5205 break; 5206 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5207 atomic_set_int(&chain->flags, 5208 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5209 } else { 5210 if (parent->bref.leaf_count <= scan->leaf_count) 5211 parent->bref.leaf_count = 0; 5212 else 5213 parent->bref.leaf_count -= scan->leaf_count; 5214 } 5215 break; 5216 case HAMMER2_BREF_TYPE_DIRENT: 5217 if (parent->bref.leaf_count == HAMMER2_BLOCKREF_LEAF_MAX) { 5218 atomic_set_int(&chain->flags, 5219 HAMMER2_CHAIN_HINT_LEAF_COUNT); 5220 } else { 5221 if (parent->bref.leaf_count) 5222 --parent->bref.leaf_count; 5223 } 5224 default: 5225 break; 5226 } 5227 5228 if (obref) 5229 *obref = *scan; 5230 bzero(scan, sizeof(*scan)); 5231 5232 /* 5233 * We can only optimize parent->core.live_zero for live chains. 5234 */ 5235 if (parent->core.live_zero == i + 1) { 5236 while (--i >= 0 && base[i].type == HAMMER2_BREF_TYPE_EMPTY) 5237 ; 5238 parent->core.live_zero = i + 1; 5239 } 5240 5241 /* 5242 * Clear appropriate blockmap flags in chain. 5243 */ 5244 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED | 5245 HAMMER2_CHAIN_BLKMAPUPD); 5246 } 5247 5248 /* 5249 * Insert the specified element. The block array must not already have the 5250 * element and must have space available for the insertion. 5251 * 5252 * The spin lock on the related chain must be held. 5253 * 5254 * NOTE: live_count was adjusted when the chain was deleted, so it does not 5255 * need to be adjusted when we commit the media change. 5256 */ 5257 void 5258 hammer2_base_insert(hammer2_chain_t *parent, 5259 hammer2_blockref_t *base, int count, 5260 hammer2_chain_t *chain, hammer2_blockref_t *elm) 5261 { 5262 hammer2_key_t key_next; 5263 hammer2_key_t xkey; 5264 int i; 5265 int j; 5266 int k; 5267 int l; 5268 int u = 1; 5269 5270 /* 5271 * Insert new element. Expect the element to not already exist 5272 * unless we are replacing it. 5273 * 5274 * XXX see caller, flush code not yet sophisticated enough to prevent 5275 * re-flushed in some cases. 5276 */ 5277 key_next = 0; /* max range */ 5278 i = hammer2_base_find(parent, base, count, &key_next, 5279 elm->key, elm->key); 5280 5281 /* 5282 * Shortcut fill optimization, typical ordered insertion(s) may not 5283 * require a search. 5284 */ 5285 KKASSERT(i >= 0 && i <= count); 5286 5287 /* 5288 * Set appropriate blockmap flags in chain (if not NULL) 5289 */ 5290 if (chain) 5291 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED); 5292 5293 /* 5294 * Update stats and zero the entry 5295 */ 5296 if ((int)(elm->data_off & HAMMER2_OFF_MASK_RADIX)) { 5297 parent->bref.embed.stats.data_count += (hammer2_off_t)1 << 5298 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 5299 } 5300 switch(elm->type) { 5301 case HAMMER2_BREF_TYPE_INODE: 5302 ++parent->bref.embed.stats.inode_count; 5303 /* fall through */ 5304 case HAMMER2_BREF_TYPE_DATA: 5305 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5306 ++parent->bref.leaf_count; 5307 /* fall through */ 5308 case HAMMER2_BREF_TYPE_INDIRECT: 5309 if (elm->type != HAMMER2_BREF_TYPE_DATA) { 5310 parent->bref.embed.stats.data_count += 5311 elm->embed.stats.data_count; 5312 parent->bref.embed.stats.inode_count += 5313 elm->embed.stats.inode_count; 5314 } 5315 if (elm->type == HAMMER2_BREF_TYPE_INODE) 5316 break; 5317 if (parent->bref.leaf_count + elm->leaf_count < 5318 HAMMER2_BLOCKREF_LEAF_MAX) { 5319 parent->bref.leaf_count += elm->leaf_count; 5320 } else { 5321 parent->bref.leaf_count = HAMMER2_BLOCKREF_LEAF_MAX; 5322 } 5323 break; 5324 case HAMMER2_BREF_TYPE_DIRENT: 5325 if (parent->bref.leaf_count != HAMMER2_BLOCKREF_LEAF_MAX) 5326 ++parent->bref.leaf_count; 5327 break; 5328 default: 5329 break; 5330 } 5331 5332 5333 /* 5334 * We can only optimize parent->core.live_zero for live chains. 5335 */ 5336 if (i == count && parent->core.live_zero < count) { 5337 i = parent->core.live_zero++; 5338 base[i] = *elm; 5339 return; 5340 } 5341 5342 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 5343 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 5344 hammer2_spin_unex(&parent->core.spin); 5345 panic("insert base %p overlapping elements at %d elm %p\n", 5346 base, i, elm); 5347 } 5348 5349 /* 5350 * Try to find an empty slot before or after. 5351 */ 5352 j = i; 5353 k = i; 5354 while (j > 0 || k < count) { 5355 --j; 5356 if (j >= 0 && base[j].type == HAMMER2_BREF_TYPE_EMPTY) { 5357 if (j == i - 1) { 5358 base[j] = *elm; 5359 } else { 5360 bcopy(&base[j+1], &base[j], 5361 (i - j - 1) * sizeof(*base)); 5362 base[i - 1] = *elm; 5363 } 5364 goto validate; 5365 } 5366 ++k; 5367 if (k < count && base[k].type == HAMMER2_BREF_TYPE_EMPTY) { 5368 bcopy(&base[i], &base[i+1], 5369 (k - i) * sizeof(hammer2_blockref_t)); 5370 base[i] = *elm; 5371 5372 /* 5373 * We can only update parent->core.live_zero for live 5374 * chains. 5375 */ 5376 if (parent->core.live_zero <= k) 5377 parent->core.live_zero = k + 1; 5378 u = 2; 5379 goto validate; 5380 } 5381 } 5382 panic("hammer2_base_insert: no room!"); 5383 5384 /* 5385 * Debugging 5386 */ 5387 validate: 5388 key_next = 0; 5389 for (l = 0; l < count; ++l) { 5390 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5391 key_next = base[l].key + 5392 ((hammer2_key_t)1 << base[l].keybits) - 1; 5393 break; 5394 } 5395 } 5396 while (++l < count) { 5397 if (base[l].type != HAMMER2_BREF_TYPE_EMPTY) { 5398 if (base[l].key <= key_next) 5399 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 5400 key_next = base[l].key + 5401 ((hammer2_key_t)1 << base[l].keybits) - 1; 5402 5403 } 5404 } 5405 5406 } 5407 5408 #if 0 5409 5410 /* 5411 * Sort the blockref array for the chain. Used by the flush code to 5412 * sort the blockref[] array. 5413 * 5414 * The chain must be exclusively locked AND spin-locked. 5415 */ 5416 typedef hammer2_blockref_t *hammer2_blockref_p; 5417 5418 static 5419 int 5420 hammer2_base_sort_callback(const void *v1, const void *v2) 5421 { 5422 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 5423 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 5424 5425 /* 5426 * Make sure empty elements are placed at the end of the array 5427 */ 5428 if (bref1->type == HAMMER2_BREF_TYPE_EMPTY) { 5429 if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) 5430 return(0); 5431 return(1); 5432 } else if (bref2->type == HAMMER2_BREF_TYPE_EMPTY) { 5433 return(-1); 5434 } 5435 5436 /* 5437 * Sort by key 5438 */ 5439 if (bref1->key < bref2->key) 5440 return(-1); 5441 if (bref1->key > bref2->key) 5442 return(1); 5443 return(0); 5444 } 5445 5446 void 5447 hammer2_base_sort(hammer2_chain_t *chain) 5448 { 5449 hammer2_blockref_t *base; 5450 int count; 5451 5452 switch(chain->bref.type) { 5453 case HAMMER2_BREF_TYPE_INODE: 5454 /* 5455 * Special shortcut for embedded data returns the inode 5456 * itself. Callers must detect this condition and access 5457 * the embedded data (the strategy code does this for us). 5458 * 5459 * This is only applicable to regular files and softlinks. 5460 */ 5461 if (chain->data->ipdata.meta.op_flags & 5462 HAMMER2_OPFLAG_DIRECTDATA) { 5463 return; 5464 } 5465 base = &chain->data->ipdata.u.blockset.blockref[0]; 5466 count = HAMMER2_SET_COUNT; 5467 break; 5468 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 5469 case HAMMER2_BREF_TYPE_INDIRECT: 5470 /* 5471 * Optimize indirect blocks in the INITIAL state to avoid 5472 * I/O. 5473 */ 5474 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 5475 base = &chain->data->npdata[0]; 5476 count = chain->bytes / sizeof(hammer2_blockref_t); 5477 break; 5478 case HAMMER2_BREF_TYPE_VOLUME: 5479 base = &chain->data->voldata.sroot_blockset.blockref[0]; 5480 count = HAMMER2_SET_COUNT; 5481 break; 5482 case HAMMER2_BREF_TYPE_FREEMAP: 5483 base = &chain->data->blkset.blockref[0]; 5484 count = HAMMER2_SET_COUNT; 5485 break; 5486 default: 5487 panic("hammer2_base_sort: unrecognized " 5488 "blockref(A) type: %d", 5489 chain->bref.type); 5490 base = NULL; /* safety */ 5491 count = 0; /* safety */ 5492 break; 5493 } 5494 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 5495 } 5496 5497 #endif 5498 5499 /* 5500 * Set the check data for a chain. This can be a heavy-weight operation 5501 * and typically only runs on-flush. For file data check data is calculated 5502 * when the logical buffers are flushed. 5503 */ 5504 void 5505 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 5506 { 5507 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_NOTTESTED); 5508 5509 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5510 case HAMMER2_CHECK_NONE: 5511 break; 5512 case HAMMER2_CHECK_DISABLED: 5513 break; 5514 case HAMMER2_CHECK_ISCSI32: 5515 chain->bref.check.iscsi32.value = 5516 hammer2_icrc32(bdata, chain->bytes); 5517 break; 5518 case HAMMER2_CHECK_XXHASH64: 5519 chain->bref.check.xxhash64.value = 5520 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5521 break; 5522 case HAMMER2_CHECK_SHA192: 5523 assert(0); /* XXX unsupported */ 5524 /* 5525 { 5526 SHA256_CTX hash_ctx; 5527 union { 5528 uint8_t digest[SHA256_DIGEST_LENGTH]; 5529 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5530 } u; 5531 5532 SHA256_Init(&hash_ctx); 5533 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5534 SHA256_Final(u.digest, &hash_ctx); 5535 u.digest64[2] ^= u.digest64[3]; 5536 bcopy(u.digest, 5537 chain->bref.check.sha192.data, 5538 sizeof(chain->bref.check.sha192.data)); 5539 } 5540 */ 5541 break; 5542 case HAMMER2_CHECK_FREEMAP: 5543 chain->bref.check.freemap.icrc32 = 5544 hammer2_icrc32(bdata, chain->bytes); 5545 break; 5546 default: 5547 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 5548 chain->bref.methods); 5549 break; 5550 } 5551 } 5552 5553 /* 5554 * Characterize a failed check code and try to trace back to the inode. 5555 */ 5556 static void 5557 hammer2_characterize_failed_chain(hammer2_chain_t *chain, uint64_t check, 5558 int bits) 5559 { 5560 hammer2_chain_t *lchain; 5561 hammer2_chain_t *ochain; 5562 int did; 5563 5564 did = krateprintf(&krate_h2chk, 5565 "chain %016jx.%02x (%s) meth=%02x CHECK FAIL " 5566 "(flags=%08x, bref/data ", 5567 chain->bref.data_off, 5568 chain->bref.type, 5569 hammer2_bref_type_str(chain->bref.type), 5570 chain->bref.methods, 5571 chain->flags); 5572 if (did == 0) 5573 return; 5574 5575 if (bits == 32) { 5576 kprintf("%08x/%08x)\n", 5577 chain->bref.check.iscsi32.value, 5578 (uint32_t)check); 5579 } else { 5580 kprintf("%016jx/%016jx)\n", 5581 chain->bref.check.xxhash64.value, 5582 check); 5583 } 5584 5585 /* 5586 * Run up the chains to try to find the governing inode so we 5587 * can report it. 5588 * 5589 * XXX This error reporting is not really MPSAFE 5590 */ 5591 ochain = chain; 5592 lchain = chain; 5593 while (chain && chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 5594 lchain = chain; 5595 chain = chain->parent; 5596 } 5597 5598 if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5599 ((chain->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) == 0 || 5600 (lchain->bref.key & HAMMER2_DIRHASH_VISIBLE))) { 5601 kprintf(" Resides at/in inode %ld\n", 5602 (long)chain->bref.key); 5603 } else if (chain && chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5604 kprintf(" Resides in inode index - CRITICAL!!!\n"); 5605 } else { 5606 kprintf(" Resides in root index - CRITICAL!!!\n"); 5607 } 5608 if (ochain->hmp) { 5609 const char *pfsname = "UNKNOWN"; 5610 int i; 5611 5612 if (ochain->pmp) { 5613 for (i = 0; i < HAMMER2_MAXCLUSTER; ++i) { 5614 if (ochain->pmp->pfs_hmps[i] == ochain->hmp && 5615 ochain->pmp->pfs_names[i]) { 5616 pfsname = ochain->pmp->pfs_names[i]; 5617 break; 5618 } 5619 } 5620 } 5621 kprintf(" In pfs %s on device %s\n", 5622 pfsname, ochain->hmp->devrepname); 5623 } 5624 } 5625 5626 /* 5627 * Returns non-zero on success, 0 on failure. 5628 */ 5629 int 5630 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 5631 { 5632 uint32_t check32; 5633 uint64_t check64; 5634 int r; 5635 5636 if (chain->flags & HAMMER2_CHAIN_NOTTESTED) 5637 return 1; 5638 5639 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 5640 case HAMMER2_CHECK_NONE: 5641 r = 1; 5642 break; 5643 case HAMMER2_CHECK_DISABLED: 5644 r = 1; 5645 break; 5646 case HAMMER2_CHECK_ISCSI32: 5647 check32 = hammer2_icrc32(bdata, chain->bytes); 5648 r = (chain->bref.check.iscsi32.value == check32); 5649 if (r == 0) { 5650 hammer2_characterize_failed_chain(chain, check32, 32); 5651 } 5652 hammer2_process_icrc32 += chain->bytes; 5653 break; 5654 case HAMMER2_CHECK_XXHASH64: 5655 check64 = XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 5656 r = (chain->bref.check.xxhash64.value == check64); 5657 if (r == 0) { 5658 hammer2_characterize_failed_chain(chain, check64, 64); 5659 } 5660 hammer2_process_xxhash64 += chain->bytes; 5661 break; 5662 case HAMMER2_CHECK_SHA192: 5663 assert(0); /* XXX unsupported */ 5664 /* 5665 { 5666 SHA256_CTX hash_ctx; 5667 union { 5668 uint8_t digest[SHA256_DIGEST_LENGTH]; 5669 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 5670 } u; 5671 5672 SHA256_Init(&hash_ctx); 5673 SHA256_Update(&hash_ctx, bdata, chain->bytes); 5674 SHA256_Final(u.digest, &hash_ctx); 5675 u.digest64[2] ^= u.digest64[3]; 5676 if (bcmp(u.digest, 5677 chain->bref.check.sha192.data, 5678 sizeof(chain->bref.check.sha192.data)) == 0) { 5679 r = 1; 5680 } else { 5681 r = 0; 5682 krateprintf(&krate_h2chk, 5683 "chain %016jx.%02x meth=%02x " 5684 "CHECK FAIL\n", 5685 chain->bref.data_off, 5686 chain->bref.type, 5687 chain->bref.methods); 5688 } 5689 } 5690 */ 5691 break; 5692 case HAMMER2_CHECK_FREEMAP: 5693 r = (chain->bref.check.freemap.icrc32 == 5694 hammer2_icrc32(bdata, chain->bytes)); 5695 if (r == 0) { 5696 int did; 5697 5698 did = krateprintf(&krate_h2chk, 5699 "chain %016jx.%02x meth=%02x " 5700 "CHECK FAIL\n", 5701 chain->bref.data_off, 5702 chain->bref.type, 5703 chain->bref.methods); 5704 if (did) { 5705 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 5706 chain->bref.check.freemap.icrc32, 5707 hammer2_icrc32(bdata, chain->bytes), 5708 chain->bytes); 5709 if (chain->dio) { 5710 kprintf("dio %p buf %016jx,%ld " 5711 "bdata %p/%p\n", 5712 chain->dio, 5713 (intmax_t)chain->dio->bp->b_loffset, 5714 chain->dio->bp->b_bufsize, 5715 bdata, 5716 chain->dio->bp->b_data); 5717 } 5718 } 5719 } 5720 break; 5721 default: 5722 kprintf("hammer2_chain_testcheck: unknown check type %02x\n", 5723 chain->bref.methods); 5724 r = 1; 5725 break; 5726 } 5727 return r; 5728 } 5729 5730 /* 5731 * Acquire the chain and parent representing the specified inode for the 5732 * device at the specified cluster index. 5733 * 5734 * The flags passed in are LOOKUP flags, not RESOLVE flags. 5735 * 5736 * If we are unable to locate the inode, HAMMER2_ERROR_EIO or HAMMER2_ERROR_CHECK 5737 * is returned. In case of error, *chainp and/or *parentp may still be returned 5738 * non-NULL. 5739 * 5740 * The caller may pass-in a locked *parentp and/or *chainp, or neither. 5741 * They will be unlocked and released by this function. The *parentp and 5742 * *chainp representing the located inode are returned locked. 5743 * 5744 * The returned error includes any error on the returned chain in addition to 5745 * errors incurred while trying to lookup the inode. However, a chain->error 5746 * might not be recognized if HAMMER2_LOOKUP_NODATA is passed. This flag may 5747 * not be passed to this function. 5748 */ 5749 int 5750 hammer2_chain_inode_find(hammer2_pfs_t *pmp, hammer2_key_t inum, 5751 int clindex, int flags, 5752 hammer2_chain_t **parentp, hammer2_chain_t **chainp) 5753 { 5754 hammer2_chain_t *parent; 5755 hammer2_chain_t *rchain; 5756 hammer2_key_t key_dummy; 5757 hammer2_inode_t *ip; 5758 int resolve_flags; 5759 int error; 5760 5761 KKASSERT((flags & HAMMER2_LOOKUP_NODATA) == 0); 5762 5763 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 5764 HAMMER2_RESOLVE_SHARED : 0; 5765 5766 /* 5767 * Caller expects us to replace these. 5768 */ 5769 if (*chainp) { 5770 hammer2_chain_unlock(*chainp); 5771 hammer2_chain_drop(*chainp); 5772 *chainp = NULL; 5773 } 5774 if (*parentp) { 5775 hammer2_chain_unlock(*parentp); 5776 hammer2_chain_drop(*parentp); 5777 *parentp = NULL; 5778 } 5779 5780 /* 5781 * Be very careful, this is a backend function and we CANNOT 5782 * lock any frontend inode structure we find. But we have to 5783 * look the inode up this way first in case it exists but is 5784 * detached from the radix tree. 5785 */ 5786 ip = hammer2_inode_lookup(pmp, inum); 5787 if (ip) { 5788 *chainp = hammer2_inode_chain_and_parent(ip, clindex, 5789 parentp, 5790 resolve_flags); 5791 hammer2_inode_drop(ip); 5792 if (*chainp) 5793 return (*chainp)->error; 5794 hammer2_chain_unlock(*chainp); 5795 hammer2_chain_drop(*chainp); 5796 *chainp = NULL; 5797 if (*parentp) { 5798 hammer2_chain_unlock(*parentp); 5799 hammer2_chain_drop(*parentp); 5800 *parentp = NULL; 5801 } 5802 } 5803 5804 /* 5805 * Inodes hang off of the iroot (bit 63 is clear, differentiating 5806 * inodes from root directory entries in the key lookup). 5807 */ 5808 parent = hammer2_inode_chain(pmp->iroot, clindex, resolve_flags); 5809 rchain = NULL; 5810 if (parent) { 5811 /* 5812 * NOTE: rchain can be returned as NULL even if error == 0 5813 * (i.e. not found) 5814 */ 5815 rchain = hammer2_chain_lookup(&parent, &key_dummy, 5816 inum, inum, 5817 &error, flags); 5818 /* 5819 * Propagate a chain-specific error to caller. 5820 * 5821 * If the chain is not errored, we must still validate that the inode 5822 * number is correct, because all hell will break loose if it isn't 5823 * correct. It should always be correct so print to the console and 5824 * simulate a CHECK error if it is not. 5825 */ 5826 if (error == 0 && rchain) { 5827 error = rchain->error; 5828 if (error == 0 && rchain->data) { 5829 if (inum != rchain->data->ipdata.meta.inum) { 5830 kprintf("hammer2_chain_inode_find: lookup inum %ld, " 5831 "got valid inode but with inum %ld\n", 5832 (long)inum, (long)rchain->data->ipdata.meta.inum); 5833 error = HAMMER2_ERROR_CHECK; 5834 rchain->error = error; 5835 } 5836 } 5837 } 5838 } else { 5839 error = HAMMER2_ERROR_EIO; 5840 } 5841 *parentp = parent; 5842 *chainp = rchain; 5843 5844 return error; 5845 } 5846 5847 /* 5848 * Used by the bulkscan code to snapshot the synchronized storage for 5849 * a volume, allowing it to be scanned concurrently against normal 5850 * operation. 5851 */ 5852 hammer2_chain_t * 5853 hammer2_chain_bulksnap(hammer2_dev_t *hmp) 5854 { 5855 hammer2_chain_t *copy; 5856 5857 copy = hammer2_chain_alloc(hmp, hmp->spmp, &hmp->vchain.bref); 5858 copy->data = kmalloc(sizeof(copy->data->voldata), 5859 hmp->mmsg, M_WAITOK | M_ZERO); 5860 hammer2_voldata_lock(hmp); 5861 copy->data->voldata = hmp->volsync; 5862 hammer2_voldata_unlock(hmp); 5863 5864 return copy; 5865 } 5866 5867 void 5868 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 5869 { 5870 KKASSERT(copy->bref.type == HAMMER2_BREF_TYPE_VOLUME); 5871 KKASSERT(copy->data); 5872 kfree(copy->data, copy->hmp->mmsg); 5873 copy->data = NULL; 5874 hammer2_chain_drop(copy); 5875 } 5876 5877 /* 5878 * Returns non-zero if the chain (INODE or DIRENT) matches the 5879 * filename. 5880 */ 5881 int 5882 hammer2_chain_dirent_test(hammer2_chain_t *chain, const char *name, 5883 size_t name_len) 5884 { 5885 const hammer2_inode_data_t *ripdata; 5886 5887 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) { 5888 ripdata = &chain->data->ipdata; 5889 if (ripdata->meta.name_len == name_len && 5890 bcmp(ripdata->filename, name, name_len) == 0) { 5891 return 1; 5892 } 5893 } 5894 if (chain->bref.type == HAMMER2_BREF_TYPE_DIRENT && 5895 chain->bref.embed.dirent.namlen == name_len) { 5896 if (name_len > sizeof(chain->bref.check.buf) && 5897 bcmp(chain->data->buf, name, name_len) == 0) { 5898 return 1; 5899 } 5900 if (name_len <= sizeof(chain->bref.check.buf) && 5901 bcmp(chain->bref.check.buf, name, name_len) == 0) { 5902 return 1; 5903 } 5904 } 5905 return 0; 5906 } 5907 5908 /* 5909 * Debugging 5910 */ 5911 void 5912 hammer2_dump_chain(hammer2_chain_t *chain, int tab, int bi, int *countp, 5913 char pfx, u_int flags) 5914 { 5915 hammer2_chain_t *scan; 5916 hammer2_chain_t *parent; 5917 5918 --*countp; 5919 if (*countp == 0) { 5920 kprintf("%*.*s...\n", tab, tab, ""); 5921 return; 5922 } 5923 if (*countp < 0) 5924 return; 5925 kprintf("%*.*s%c-chain %p %s.%-3d %016jx %016jx/%-2d mir=%016jx\n", 5926 tab, tab, "", pfx, chain, 5927 hammer2_bref_type_str(chain->bref.type), bi, 5928 chain->bref.data_off, chain->bref.key, chain->bref.keybits, 5929 chain->bref.mirror_tid); 5930 5931 kprintf("%*.*s [%08x] (%s) refs=%d", 5932 tab, tab, "", 5933 chain->flags, 5934 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE && 5935 chain->data) ? (char *)chain->data->ipdata.filename : "?"), 5936 chain->refs); 5937 5938 parent = chain->parent; 5939 if (parent) 5940 kprintf("\n%*.*s p=%p [pflags %08x prefs %d]", 5941 tab, tab, "", 5942 parent, parent->flags, parent->refs); 5943 if (RB_EMPTY(&chain->core.rbtree)) { 5944 kprintf("\n"); 5945 } else { 5946 int bi = 0; 5947 kprintf(" {\n"); 5948 RB_FOREACH(scan, hammer2_chain_tree, &chain->core.rbtree) { 5949 if ((scan->flags & flags) || flags == (u_int)-1) { 5950 hammer2_dump_chain(scan, tab + 4, bi, countp, 5951 'a', flags); 5952 } 5953 bi++; 5954 } 5955 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && chain->data) 5956 kprintf("%*.*s}(%s)\n", tab, tab, "", 5957 chain->data->ipdata.filename); 5958 else 5959 kprintf("%*.*s}\n", tab, tab, ""); 5960 } 5961 } 5962 5963 void 5964 hammer2_dump_chains(hammer2_dev_t *hmp, char vpfx, char fpfx) 5965 { 5966 int dumpcnt; 5967 5968 dumpcnt = 50; 5969 hammer2_dump_chain(&hmp->vchain, 0, 0, &dumpcnt, vpfx, (u_int)-1); 5970 5971 dumpcnt = 50; 5972 hammer2_dump_chain(&hmp->fchain, 0, 0, &dumpcnt, fpfx, (u_int)-1); 5973 } 5974