1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 68 69 static hammer2_chain_t *hammer2_chain_create_indirect( 70 hammer2_chain_t *parent, 71 hammer2_key_t key, int keybits, 72 hammer2_tid_t mtid, int for_type, int *errorp); 73 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 int *cache_indexp, hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **bresp); 80 81 /* 82 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 83 * overlap in the RB trees. Deleted chains are moved from rbtree to either 84 * dbtree or to dbq. 85 * 86 * Chains in delete-duplicate sequences can always iterate through core_entry 87 * to locate the live version of the chain. 88 */ 89 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 90 91 int 92 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 93 { 94 hammer2_key_t c1_beg; 95 hammer2_key_t c1_end; 96 hammer2_key_t c2_beg; 97 hammer2_key_t c2_end; 98 99 /* 100 * Compare chains. Overlaps are not supposed to happen and catch 101 * any software issues early we count overlaps as a match. 102 */ 103 c1_beg = chain1->bref.key; 104 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 105 c2_beg = chain2->bref.key; 106 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 107 108 if (c1_end < c2_beg) /* fully to the left */ 109 return(-1); 110 if (c1_beg > c2_end) /* fully to the right */ 111 return(1); 112 return(0); /* overlap (must not cross edge boundary) */ 113 } 114 115 static __inline 116 int 117 hammer2_isclusterable(hammer2_chain_t *chain) 118 { 119 if (hammer2_cluster_enable) { 120 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 121 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 122 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 123 return(1); 124 } 125 } 126 return(0); 127 } 128 129 /* 130 * Make a chain visible to the flusher. The flusher needs to be able to 131 * do flushes of subdirectory chains or single files so it does a top-down 132 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 133 * or UPDATE chains and flushes back up the chain to the volume root. 134 * 135 * This routine sets ONFLUSH upward until it hits the volume root. For 136 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 137 * Extra ONFLUSH flagging doesn't hurt the filesystem. 138 */ 139 void 140 hammer2_chain_setflush(hammer2_chain_t *chain) 141 { 142 hammer2_chain_t *parent; 143 144 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 145 hammer2_spin_sh(&chain->core.spin); 146 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 147 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 148 if ((parent = chain->parent) == NULL) 149 break; 150 hammer2_spin_sh(&parent->core.spin); 151 hammer2_spin_unsh(&chain->core.spin); 152 chain = parent; 153 } 154 hammer2_spin_unsh(&chain->core.spin); 155 } 156 } 157 158 /* 159 * Allocate a new disconnected chain element representing the specified 160 * bref. chain->refs is set to 1 and the passed bref is copied to 161 * chain->bref. chain->bytes is derived from the bref. 162 * 163 * chain->pmp inherits pmp unless the chain is an inode (other than the 164 * super-root inode). 165 * 166 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 167 */ 168 hammer2_chain_t * 169 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 170 hammer2_blockref_t *bref) 171 { 172 hammer2_chain_t *chain; 173 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 174 175 /* 176 * Construct the appropriate system structure. 177 */ 178 switch(bref->type) { 179 case HAMMER2_BREF_TYPE_INODE: 180 case HAMMER2_BREF_TYPE_INDIRECT: 181 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 182 case HAMMER2_BREF_TYPE_DATA: 183 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 184 /* 185 * Chain's are really only associated with the hmp but we 186 * maintain a pmp association for per-mount memory tracking 187 * purposes. The pmp can be NULL. 188 */ 189 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 190 break; 191 case HAMMER2_BREF_TYPE_VOLUME: 192 case HAMMER2_BREF_TYPE_FREEMAP: 193 /* 194 * Only hammer2_chain_bulksnap() calls this function with these 195 * types. 196 */ 197 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 198 break; 199 default: 200 chain = NULL; 201 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 202 bref->type); 203 } 204 205 /* 206 * Initialize the new chain structure. pmp must be set to NULL for 207 * chains belonging to the super-root topology of a device mount. 208 */ 209 if (pmp == hmp->spmp) 210 chain->pmp = NULL; 211 else 212 chain->pmp = pmp; 213 chain->hmp = hmp; 214 chain->bref = *bref; 215 chain->bytes = bytes; 216 chain->refs = 1; 217 chain->flags = HAMMER2_CHAIN_ALLOCATED; 218 219 /* 220 * Set the PFS boundary flag if this chain represents a PFS root. 221 */ 222 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 223 chain->flags |= HAMMER2_CHAIN_PFSBOUNDARY; 224 hammer2_chain_core_init(chain); 225 226 return (chain); 227 } 228 229 /* 230 * Initialize a chain's core structure. This structure used to be allocated 231 * but is now embedded. 232 * 233 * The core is not locked. No additional refs on the chain are made. 234 * (trans) must not be NULL if (core) is not NULL. 235 */ 236 void 237 hammer2_chain_core_init(hammer2_chain_t *chain) 238 { 239 /* 240 * Fresh core under nchain (no multi-homing of ochain's 241 * sub-tree). 242 */ 243 RB_INIT(&chain->core.rbtree); /* live chains */ 244 hammer2_mtx_init(&chain->lock, "h2chain"); 245 } 246 247 /* 248 * Add a reference to a chain element, preventing its destruction. 249 * 250 * (can be called with spinlock held) 251 */ 252 void 253 hammer2_chain_ref(hammer2_chain_t *chain) 254 { 255 atomic_add_int(&chain->refs, 1); 256 #if 0 257 kprintf("REFC %p %d %08x\n", chain, chain->refs - 1, chain->flags); 258 print_backtrace(8); 259 #endif 260 } 261 262 /* 263 * Insert the chain in the core rbtree. 264 * 265 * Normal insertions are placed in the live rbtree. Insertion of a deleted 266 * chain is a special case used by the flush code that is placed on the 267 * unstaged deleted list to avoid confusing the live view. 268 */ 269 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 270 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 271 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 272 273 static 274 int 275 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 276 int flags, int generation) 277 { 278 hammer2_chain_t *xchain; 279 int error = 0; 280 281 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 282 hammer2_spin_ex(&parent->core.spin); 283 284 /* 285 * Interlocked by spinlock, check for race 286 */ 287 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 288 parent->core.generation != generation) { 289 error = EAGAIN; 290 goto failed; 291 } 292 293 /* 294 * Insert chain 295 */ 296 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 297 KASSERT(xchain == NULL, 298 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 299 chain, xchain, chain->bref.key)); 300 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 301 chain->parent = parent; 302 ++parent->core.chain_count; 303 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 304 305 /* 306 * We have to keep track of the effective live-view blockref count 307 * so the create code knows when to push an indirect block. 308 */ 309 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 310 atomic_add_int(&parent->core.live_count, 1); 311 failed: 312 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 313 hammer2_spin_unex(&parent->core.spin); 314 return error; 315 } 316 317 /* 318 * Drop the caller's reference to the chain. When the ref count drops to 319 * zero this function will try to disassociate the chain from its parent and 320 * deallocate it, then recursely drop the parent using the implied ref 321 * from the chain's chain->parent. 322 */ 323 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain); 324 325 void 326 hammer2_chain_drop(hammer2_chain_t *chain) 327 { 328 u_int refs; 329 330 if (hammer2_debug & 0x200000) 331 Debugger("drop"); 332 #if 0 333 kprintf("DROP %p %d %08x\n", chain, chain->refs - 1, chain->flags); 334 print_backtrace(8); 335 #endif 336 337 KKASSERT(chain->refs > 0); 338 339 while (chain) { 340 refs = chain->refs; 341 cpu_ccfence(); 342 KKASSERT(refs > 0); 343 344 if (refs == 1) { 345 chain = hammer2_chain_lastdrop(chain); 346 } else { 347 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 348 break; 349 /* retry the same chain */ 350 } 351 } 352 } 353 354 /* 355 * Safe handling of the 1->0 transition on chain. Returns a chain for 356 * recursive drop or NULL, possibly returning the same chain if the atomic 357 * op fails. 358 * 359 * Whem two chains need to be recursively dropped we use the chain 360 * we would otherwise free to placehold the additional chain. It's a bit 361 * convoluted but we can't just recurse without potentially blowing out 362 * the kernel stack. 363 * 364 * The chain cannot be freed if it has any children. 365 * The chain cannot be freed if flagged MODIFIED unless we can dispose of that. 366 * The chain cannot be freed if flagged UPDATE unless we can dispose of that. 367 * 368 * The core spinlock is allowed nest child-to-parent (not parent-to-child). 369 */ 370 static 371 hammer2_chain_t * 372 hammer2_chain_lastdrop(hammer2_chain_t *chain) 373 { 374 hammer2_pfs_t *pmp; 375 hammer2_dev_t *hmp; 376 hammer2_chain_t *parent; 377 hammer2_chain_t *rdrop; 378 379 /* 380 * Critical field access. 381 */ 382 hammer2_spin_ex(&chain->core.spin); 383 384 if (chain->parent) { 385 /* 386 * If the chain has a parent the UPDATE bit prevents scrapping 387 * as the chain is needed to properly flush the parent. Try 388 * to complete the 1->0 transition and return NULL. Retry 389 * (return chain) if we are unable to complete the 1->0 390 * transition, else return NULL (nothing more to do). 391 * 392 * If the chain has a parent the MODIFIED bit prevents 393 * scrapping. 394 */ 395 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 396 HAMMER2_CHAIN_MODIFIED)) { 397 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 398 hammer2_spin_unex(&chain->core.spin); 399 chain = NULL; 400 } else { 401 hammer2_spin_unex(&chain->core.spin); 402 } 403 return (chain); 404 } 405 /* spinlock still held */ 406 } else { 407 /* 408 * The chain has no parent and can be flagged for destruction. 409 * Since it has no parent, UPDATE can also be cleared. 410 */ 411 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 412 if (chain->flags & HAMMER2_CHAIN_UPDATE) 413 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 414 415 /* 416 * If the chain has children or if it has been MODIFIED and 417 * also recorded for DEDUP, we must still flush the chain. 418 * 419 * In the case where it has children, the DESTROY flag test 420 * in the flush code will prevent unnecessary flushes of 421 * MODIFIED chains that are not flagged DEDUP so don't worry 422 * about that here. 423 */ 424 if (chain->core.chain_count || 425 (chain->flags & (HAMMER2_CHAIN_MODIFIED | 426 HAMMER2_CHAIN_DEDUP)) == 427 (HAMMER2_CHAIN_MODIFIED | 428 HAMMER2_CHAIN_DEDUP)) { 429 /* 430 * Put on flushq (should ensure refs > 1), retry 431 * the drop. 432 */ 433 hammer2_spin_unex(&chain->core.spin); 434 hammer2_delayed_flush(chain); 435 return(chain); /* retry drop */ 436 } 437 438 /* 439 * Otherwise we can scrap the MODIFIED bit if it is set, 440 * and continue along the freeing path. 441 */ 442 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 443 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 444 atomic_add_long(&hammer2_count_modified_chains, -1); 445 } 446 /* spinlock still held */ 447 } 448 449 /* 450 * If any children exist we must leave the chain intact with refs == 0. 451 * They exist because chains are retained below us which have refs or 452 * may require flushing. This case can occur when parent != NULL. 453 * 454 * Retry (return chain) if we fail to transition the refs to 0, else 455 * return NULL indication nothing more to do. 456 */ 457 if (chain->core.chain_count) { 458 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 459 hammer2_spin_unex(&chain->core.spin); 460 chain = NULL; 461 } else { 462 hammer2_spin_unex(&chain->core.spin); 463 } 464 return (chain); 465 } 466 /* spinlock still held */ 467 /* no chains left under us */ 468 469 /* 470 * chain->core has no children left so no accessors can get to our 471 * chain from there. Now we have to lock the parent core to interlock 472 * remaining possible accessors that might bump chain's refs before 473 * we can safely drop chain's refs with intent to free the chain. 474 */ 475 hmp = chain->hmp; 476 pmp = chain->pmp; /* can be NULL */ 477 rdrop = NULL; 478 479 /* 480 * Spinlock the parent and try to drop the last ref on chain. 481 * On success remove chain from its parent, otherwise return NULL. 482 * 483 * (normal core locks are top-down recursive but we define core 484 * spinlocks as bottom-up recursive, so this is safe). 485 */ 486 if ((parent = chain->parent) != NULL) { 487 hammer2_spin_ex(&parent->core.spin); 488 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 489 /* 1->0 transition failed, retry */ 490 hammer2_spin_unex(&parent->core.spin); 491 hammer2_spin_unex(&chain->core.spin); 492 return(chain); 493 } 494 495 /* 496 * 1->0 transition successful, remove chain from the 497 * parent. 498 */ 499 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 500 RB_REMOVE(hammer2_chain_tree, 501 &parent->core.rbtree, chain); 502 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 503 --parent->core.chain_count; 504 chain->parent = NULL; 505 } 506 507 /* 508 * If our chain was the last chain in the parent's core the 509 * core is now empty and its parent might have to be 510 * re-dropped if it has 0 refs. 511 */ 512 if (parent->core.chain_count == 0) { 513 rdrop = parent; 514 atomic_add_int(&rdrop->refs, 1); 515 /* 516 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 517 rdrop = NULL; 518 */ 519 } 520 hammer2_spin_unex(&parent->core.spin); 521 parent = NULL; /* safety */ 522 } 523 524 /* 525 * Successful 1->0 transition and the chain can be destroyed now. 526 * 527 * We still have the core spinlock, and core's chain_count is 0. 528 * Any parent spinlock is gone. 529 */ 530 hammer2_spin_unex(&chain->core.spin); 531 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 532 chain->core.chain_count == 0); 533 534 /* 535 * All spin locks are gone, no pointers remain to the chain, finish 536 * freeing it. 537 */ 538 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 539 HAMMER2_CHAIN_MODIFIED)) == 0); 540 hammer2_chain_drop_data(chain, 1); 541 542 KKASSERT(chain->dio == NULL); 543 544 /* 545 * Once chain resources are gone we can use the now dead chain 546 * structure to placehold what might otherwise require a recursive 547 * drop, because we have potentially two things to drop and can only 548 * return one directly. 549 */ 550 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 551 chain->flags &= ~HAMMER2_CHAIN_ALLOCATED; 552 chain->hmp = NULL; 553 kfree(chain, hmp->mchain); 554 } 555 556 /* 557 * Possible chaining loop when parent re-drop needed. 558 */ 559 return(rdrop); 560 } 561 562 /* 563 * On either last lock release or last drop 564 */ 565 static void 566 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 567 { 568 /*hammer2_dev_t *hmp = chain->hmp;*/ 569 570 switch(chain->bref.type) { 571 case HAMMER2_BREF_TYPE_VOLUME: 572 case HAMMER2_BREF_TYPE_FREEMAP: 573 if (lastdrop) 574 chain->data = NULL; 575 break; 576 default: 577 KKASSERT(chain->data == NULL); 578 break; 579 } 580 } 581 582 /* 583 * Lock a referenced chain element, acquiring its data with I/O if necessary, 584 * and specify how you would like the data to be resolved. 585 * 586 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 587 * 588 * The lock is allowed to recurse, multiple locking ops will aggregate 589 * the requested resolve types. Once data is assigned it will not be 590 * removed until the last unlock. 591 * 592 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 593 * (typically used to avoid device/logical buffer 594 * aliasing for data) 595 * 596 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 597 * the INITIAL-create state (indirect blocks only). 598 * 599 * Do not resolve data elements for DATA chains. 600 * (typically used to avoid device/logical buffer 601 * aliasing for data) 602 * 603 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 604 * 605 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 606 * it will be locked exclusive. 607 * 608 * NOTE: Embedded elements (volume header, inodes) are always resolved 609 * regardless. 610 * 611 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 612 * element will instantiate and zero its buffer, and flush it on 613 * release. 614 * 615 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 616 * so as not to instantiate a device buffer, which could alias against 617 * a logical file buffer. However, if ALWAYS is specified the 618 * device buffer will be instantiated anyway. 619 * 620 * WARNING! This function blocks on I/O if data needs to be fetched. This 621 * blocking can run concurrent with other compatible lock holders 622 * who do not need data returning. The lock is not upgraded to 623 * exclusive during a data fetch, a separate bit is used to 624 * interlock I/O. However, an exclusive lock holder can still count 625 * on being interlocked against an I/O fetch managed by a shared 626 * lock holder. 627 */ 628 void 629 hammer2_chain_lock(hammer2_chain_t *chain, int how) 630 { 631 /* 632 * Ref and lock the element. Recursive locks are allowed. 633 */ 634 KKASSERT(chain->refs > 0); 635 atomic_add_int(&chain->lockcnt, 1); 636 637 /* 638 * Get the appropriate lock. 639 */ 640 if (how & HAMMER2_RESOLVE_SHARED) 641 hammer2_mtx_sh(&chain->lock); 642 else 643 hammer2_mtx_ex(&chain->lock); 644 ++curthread->td_tracker; 645 646 /* 647 * If we already have a valid data pointer no further action is 648 * necessary. 649 */ 650 if (chain->data) 651 return; 652 653 /* 654 * Do we have to resolve the data? 655 */ 656 switch(how & HAMMER2_RESOLVE_MASK) { 657 case HAMMER2_RESOLVE_NEVER: 658 return; 659 case HAMMER2_RESOLVE_MAYBE: 660 if (chain->flags & HAMMER2_CHAIN_INITIAL) 661 return; 662 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 663 return; 664 #if 0 665 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 666 return; 667 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 668 return; 669 #endif 670 /* fall through */ 671 case HAMMER2_RESOLVE_ALWAYS: 672 default: 673 break; 674 } 675 676 /* 677 * Caller requires data 678 */ 679 hammer2_chain_load_data(chain); 680 } 681 682 /* 683 * Downgrade an exclusive chain lock to a shared chain lock. 684 * 685 * NOTE: There is no upgrade equivalent due to the ease of 686 * deadlocks in that direction. 687 */ 688 void 689 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 690 { 691 hammer2_mtx_downgrade(&chain->lock); 692 } 693 694 /* 695 * Obtains a second shared lock on the chain, does not account the second 696 * shared lock as being owned by the current thread. 697 * 698 * Caller must already own a shared lock on this chain. 699 * 700 * The lock function is required to obtain the second shared lock without 701 * blocking on pending exclusive requests. 702 */ 703 void 704 hammer2_chain_push_shared_lock(hammer2_chain_t *chain) 705 { 706 hammer2_mtx_sh_again(&chain->lock); 707 atomic_add_int(&chain->lockcnt, 1); 708 /* do not count in td_tracker for this thread */ 709 } 710 711 /* 712 * Accounts for a shared lock that was pushed to us as being owned by our 713 * thread. 714 */ 715 void 716 hammer2_chain_pull_shared_lock(hammer2_chain_t *chain) 717 { 718 ++curthread->td_tracker; 719 } 720 721 /* 722 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 723 * may be of any type. 724 * 725 * Once chain->data is set it cannot be disposed of until all locks are 726 * released. 727 */ 728 void 729 hammer2_chain_load_data(hammer2_chain_t *chain) 730 { 731 hammer2_blockref_t *bref; 732 hammer2_dev_t *hmp; 733 char *bdata; 734 int error; 735 736 /* 737 * Degenerate case, data already present. 738 */ 739 if (chain->data) 740 return; 741 742 hmp = chain->hmp; 743 KKASSERT(hmp != NULL); 744 745 /* 746 * Gain the IOINPROG bit, interlocked block. 747 */ 748 for (;;) { 749 u_int oflags; 750 u_int nflags; 751 752 oflags = chain->flags; 753 cpu_ccfence(); 754 if (oflags & HAMMER2_CHAIN_IOINPROG) { 755 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 756 tsleep_interlock(&chain->flags, 0); 757 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 758 tsleep(&chain->flags, PINTERLOCKED, 759 "h2iocw", 0); 760 } 761 /* retry */ 762 } else { 763 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 764 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 765 break; 766 } 767 /* retry */ 768 } 769 } 770 771 /* 772 * We own CHAIN_IOINPROG 773 * 774 * Degenerate case if we raced another load. 775 */ 776 if (chain->data) 777 goto done; 778 779 /* 780 * We must resolve to a device buffer, either by issuing I/O or 781 * by creating a zero-fill element. We do not mark the buffer 782 * dirty when creating a zero-fill element (the hammer2_chain_modify() 783 * API must still be used to do that). 784 * 785 * The device buffer is variable-sized in powers of 2 down 786 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 787 * chunk always contains buffers of the same size. (XXX) 788 * 789 * The minimum physical IO size may be larger than the variable 790 * block size. 791 */ 792 bref = &chain->bref; 793 794 /* 795 * The getblk() optimization can only be used on newly created 796 * elements if the physical block size matches the request. 797 */ 798 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 799 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, 800 &chain->dio); 801 } else { 802 error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 803 &chain->dio); 804 hammer2_adjreadcounter(&chain->bref, chain->bytes); 805 } 806 if (error) { 807 chain->error = HAMMER2_ERROR_IO; 808 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 809 (intmax_t)bref->data_off, error); 810 hammer2_io_bqrelse(&chain->dio); 811 goto done; 812 } 813 chain->error = 0; 814 815 /* 816 * NOTE: A locked chain's data cannot be modified without first 817 * calling hammer2_chain_modify(). 818 */ 819 820 /* 821 * Clear INITIAL. In this case we used io_new() and the buffer has 822 * been zero'd and marked dirty. 823 */ 824 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 825 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 826 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 827 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 828 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 829 /* 830 * check data not currently synchronized due to 831 * modification. XXX assumes data stays in the buffer 832 * cache, which might not be true (need biodep on flush 833 * to calculate crc? or simple crc?). 834 */ 835 } else { 836 if (hammer2_chain_testcheck(chain, bdata) == 0) { 837 kprintf("chain %016jx.%02x meth=%02x " 838 "CHECK FAIL %08x (flags=%08x)\n", 839 chain->bref.data_off, 840 chain->bref.type, 841 chain->bref.methods, 842 hammer2_icrc32(bdata, chain->bytes), 843 chain->flags); 844 chain->error = HAMMER2_ERROR_CHECK; 845 } 846 } 847 848 /* 849 * Setup the data pointer, either pointing it to an embedded data 850 * structure and copying the data from the buffer, or pointing it 851 * into the buffer. 852 * 853 * The buffer is not retained when copying to an embedded data 854 * structure in order to avoid potential deadlocks or recursions 855 * on the same physical buffer. 856 * 857 * WARNING! Other threads can start using the data the instant we 858 * set chain->data non-NULL. 859 */ 860 switch (bref->type) { 861 case HAMMER2_BREF_TYPE_VOLUME: 862 case HAMMER2_BREF_TYPE_FREEMAP: 863 /* 864 * Copy data from bp to embedded buffer 865 */ 866 panic("hammer2_chain_lock: called on unresolved volume header"); 867 break; 868 case HAMMER2_BREF_TYPE_INODE: 869 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 870 case HAMMER2_BREF_TYPE_INDIRECT: 871 case HAMMER2_BREF_TYPE_DATA: 872 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 873 default: 874 /* 875 * Point data at the device buffer and leave dio intact. 876 */ 877 chain->data = (void *)bdata; 878 break; 879 } 880 881 /* 882 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 883 */ 884 done: 885 for (;;) { 886 u_int oflags; 887 u_int nflags; 888 889 oflags = chain->flags; 890 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 891 HAMMER2_CHAIN_IOSIGNAL); 892 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 893 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 894 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 895 wakeup(&chain->flags); 896 break; 897 } 898 } 899 } 900 901 /* 902 * Unlock and deref a chain element. 903 * 904 * On the last lock release any non-embedded data (chain->dio) will be 905 * retired. 906 */ 907 void 908 hammer2_chain_unlock(hammer2_chain_t *chain) 909 { 910 hammer2_mtx_state_t ostate; 911 long *counterp; 912 u_int lockcnt; 913 914 --curthread->td_tracker; 915 /* 916 * If multiple locks are present (or being attempted) on this 917 * particular chain we can just unlock, drop refs, and return. 918 * 919 * Otherwise fall-through on the 1->0 transition. 920 */ 921 for (;;) { 922 lockcnt = chain->lockcnt; 923 KKASSERT(lockcnt > 0); 924 cpu_ccfence(); 925 if (lockcnt > 1) { 926 if (atomic_cmpset_int(&chain->lockcnt, 927 lockcnt, lockcnt - 1)) { 928 hammer2_mtx_unlock(&chain->lock); 929 return; 930 } 931 } else { 932 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 933 break; 934 } 935 /* retry */ 936 } 937 938 /* 939 * On the 1->0 transition we upgrade the core lock (if necessary) 940 * to exclusive for terminal processing. If after upgrading we find 941 * that lockcnt is non-zero, another thread is racing us and will 942 * handle the unload for us later on, so just cleanup and return 943 * leaving the data/io intact 944 * 945 * Otherwise if lockcnt is still 0 it is possible for it to become 946 * non-zero and race, but since we hold the core->lock exclusively 947 * all that will happen is that the chain will be reloaded after we 948 * unload it. 949 */ 950 ostate = hammer2_mtx_upgrade(&chain->lock); 951 if (chain->lockcnt) { 952 hammer2_mtx_unlock(&chain->lock); 953 return; 954 } 955 956 /* 957 * Shortcut the case if the data is embedded or not resolved. 958 * Only drop non-DIO-based data if the chain is not modified. 959 * 960 * Do NOT NULL out chain->data (e.g. inode data), it might be 961 * dirty. 962 */ 963 if (chain->dio == NULL) { 964 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 965 hammer2_chain_drop_data(chain, 0); 966 hammer2_mtx_unlock(&chain->lock); 967 return; 968 } 969 970 /* 971 * Statistics 972 */ 973 if (hammer2_io_isdirty(chain->dio)) { 974 switch(chain->bref.type) { 975 case HAMMER2_BREF_TYPE_DATA: 976 counterp = &hammer2_iod_file_write; 977 break; 978 case HAMMER2_BREF_TYPE_INODE: 979 counterp = &hammer2_iod_meta_write; 980 break; 981 case HAMMER2_BREF_TYPE_INDIRECT: 982 counterp = &hammer2_iod_indr_write; 983 break; 984 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 985 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 986 counterp = &hammer2_iod_fmap_write; 987 break; 988 default: 989 counterp = &hammer2_iod_volu_write; 990 break; 991 } 992 *counterp += chain->bytes; 993 } 994 995 /* 996 * Clean out the dio. 997 * 998 * NOTE: Freemap leaf's use reserved blocks and thus no aliasing 999 * is possible. 1000 */ 1001 chain->data = NULL; 1002 hammer2_io_bqrelse(&chain->dio); 1003 hammer2_mtx_unlock(&chain->lock); 1004 } 1005 1006 /* 1007 * Helper to obtain the blockref[] array base and count for a chain. 1008 * 1009 * XXX Not widely used yet, various use cases need to be validated and 1010 * converted to use this function. 1011 */ 1012 static 1013 hammer2_blockref_t * 1014 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1015 { 1016 hammer2_blockref_t *base; 1017 int count; 1018 1019 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1020 base = NULL; 1021 1022 switch(parent->bref.type) { 1023 case HAMMER2_BREF_TYPE_INODE: 1024 count = HAMMER2_SET_COUNT; 1025 break; 1026 case HAMMER2_BREF_TYPE_INDIRECT: 1027 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1028 count = parent->bytes / sizeof(hammer2_blockref_t); 1029 break; 1030 case HAMMER2_BREF_TYPE_VOLUME: 1031 count = HAMMER2_SET_COUNT; 1032 break; 1033 case HAMMER2_BREF_TYPE_FREEMAP: 1034 count = HAMMER2_SET_COUNT; 1035 break; 1036 default: 1037 panic("hammer2_chain_create_indirect: " 1038 "unrecognized blockref type: %d", 1039 parent->bref.type); 1040 count = 0; 1041 break; 1042 } 1043 } else { 1044 switch(parent->bref.type) { 1045 case HAMMER2_BREF_TYPE_INODE: 1046 base = &parent->data->ipdata.u.blockset.blockref[0]; 1047 count = HAMMER2_SET_COUNT; 1048 break; 1049 case HAMMER2_BREF_TYPE_INDIRECT: 1050 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1051 base = &parent->data->npdata[0]; 1052 count = parent->bytes / sizeof(hammer2_blockref_t); 1053 break; 1054 case HAMMER2_BREF_TYPE_VOLUME: 1055 base = &parent->data->voldata. 1056 sroot_blockset.blockref[0]; 1057 count = HAMMER2_SET_COUNT; 1058 break; 1059 case HAMMER2_BREF_TYPE_FREEMAP: 1060 base = &parent->data->blkset.blockref[0]; 1061 count = HAMMER2_SET_COUNT; 1062 break; 1063 default: 1064 panic("hammer2_chain_create_indirect: " 1065 "unrecognized blockref type: %d", 1066 parent->bref.type); 1067 count = 0; 1068 break; 1069 } 1070 } 1071 *countp = count; 1072 1073 return base; 1074 } 1075 1076 /* 1077 * This counts the number of live blockrefs in a block array and 1078 * also calculates the point at which all remaining blockrefs are empty. 1079 * This routine can only be called on a live chain (DUPLICATED flag not set). 1080 * 1081 * NOTE: Flag is not set until after the count is complete, allowing 1082 * callers to test the flag without holding the spinlock. 1083 * 1084 * NOTE: If base is NULL the related chain is still in the INITIAL 1085 * state and there are no blockrefs to count. 1086 * 1087 * NOTE: live_count may already have some counts accumulated due to 1088 * creation and deletion and could even be initially negative. 1089 */ 1090 void 1091 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1092 hammer2_blockref_t *base, int count) 1093 { 1094 hammer2_spin_ex(&chain->core.spin); 1095 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1096 if (base) { 1097 while (--count >= 0) { 1098 if (base[count].type) 1099 break; 1100 } 1101 chain->core.live_zero = count + 1; 1102 while (count >= 0) { 1103 if (base[count].type) 1104 atomic_add_int(&chain->core.live_count, 1105 1); 1106 --count; 1107 } 1108 } else { 1109 chain->core.live_zero = 0; 1110 } 1111 /* else do not modify live_count */ 1112 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1113 } 1114 hammer2_spin_unex(&chain->core.spin); 1115 } 1116 1117 /* 1118 * Resize the chain's physical storage allocation in-place. This function does 1119 * not adjust the data pointer and must be followed by (typically) a 1120 * hammer2_chain_modify() call to copy any old data over and adjust the 1121 * data pointer. 1122 * 1123 * Chains can be resized smaller without reallocating the storage. Resizing 1124 * larger will reallocate the storage. Excess or prior storage is reclaimed 1125 * asynchronously at a later time. 1126 * 1127 * Must be passed an exclusively locked parent and chain. 1128 * 1129 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1130 * to avoid instantiating a device buffer that conflicts with the vnode data 1131 * buffer. However, because H2 can compress or encrypt data, the chain may 1132 * have a dio assigned to it in those situations, and they do not conflict. 1133 * 1134 * XXX return error if cannot resize. 1135 */ 1136 void 1137 hammer2_chain_resize(hammer2_inode_t *ip, 1138 hammer2_chain_t *parent, hammer2_chain_t *chain, 1139 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1140 int nradix, int flags) 1141 { 1142 hammer2_dev_t *hmp; 1143 size_t obytes; 1144 size_t nbytes; 1145 1146 hmp = chain->hmp; 1147 1148 /* 1149 * Only data and indirect blocks can be resized for now. 1150 * (The volu root, inodes, and freemap elements use a fixed size). 1151 */ 1152 KKASSERT(chain != &hmp->vchain); 1153 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1154 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 1155 KKASSERT(chain->parent == parent); 1156 1157 /* 1158 * Nothing to do if the element is already the proper size 1159 */ 1160 obytes = chain->bytes; 1161 nbytes = 1U << nradix; 1162 if (obytes == nbytes) 1163 return; 1164 1165 /* 1166 * Make sure the old data is instantiated so we can copy it. If this 1167 * is a data block, the device data may be superfluous since the data 1168 * might be in a logical block, but compressed or encrypted data is 1169 * another matter. 1170 * 1171 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1172 */ 1173 hammer2_chain_modify(chain, mtid, dedup_off, 0); 1174 1175 /* 1176 * Relocate the block, even if making it smaller (because different 1177 * block sizes may be in different regions). 1178 * 1179 * (data blocks only, we aren't copying the storage here). 1180 */ 1181 hammer2_freemap_alloc(chain, nbytes); 1182 chain->bytes = nbytes; 1183 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1184 1185 /* 1186 * We don't want the followup chain_modify() to try to copy data 1187 * from the old (wrong-sized) buffer. It won't know how much to 1188 * copy. This case should only occur during writes when the 1189 * originator already has the data to write in-hand. 1190 */ 1191 if (chain->dio) { 1192 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1193 hammer2_io_brelse(&chain->dio); 1194 chain->data = NULL; 1195 } 1196 } 1197 1198 /* 1199 * Set the chain modified so its data can be changed by the caller. 1200 * 1201 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1202 * is a CLC (cluster level change) field and is not updated by parent 1203 * propagation during a flush. 1204 * 1205 * If the caller passes a non-zero dedup_off we assign data_off to that 1206 * instead of allocating a ne block. Caller must not modify the data already 1207 * present at the target offset. 1208 */ 1209 void 1210 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1211 hammer2_off_t dedup_off, int flags) 1212 { 1213 hammer2_blockref_t obref; 1214 hammer2_dev_t *hmp; 1215 hammer2_io_t *dio; 1216 int error; 1217 int wasinitial; 1218 int newmod; 1219 char *bdata; 1220 1221 hmp = chain->hmp; 1222 obref = chain->bref; 1223 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1224 1225 /* 1226 * Data is not optional for freemap chains (we must always be sure 1227 * to copy the data on COW storage allocations). 1228 */ 1229 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1230 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1231 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1232 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1233 } 1234 1235 /* 1236 * Data must be resolved if already assigned, unless explicitly 1237 * flagged otherwise. 1238 */ 1239 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1240 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1241 hammer2_chain_load_data(chain); 1242 } 1243 1244 /* 1245 * Set MODIFIED to indicate that the chain has been modified. 1246 * Set UPDATE to ensure that the blockref is updated in the parent. 1247 */ 1248 if ((chain->flags & (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) == 1249 (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) { 1250 /* 1251 * Modified already set but a new allocation is needed 1252 * anyway because we recorded this data_off for possible 1253 * dedup operation. 1254 */ 1255 newmod = 1; 1256 } else if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1257 /* 1258 * Must set modified bit. 1259 */ 1260 atomic_add_long(&hammer2_count_modified_chains, 1); 1261 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1262 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1263 newmod = 1; 1264 } else { 1265 /* 1266 * Already flagged modified, no new allocation is needed. 1267 */ 1268 newmod = 0; 1269 } 1270 1271 /* 1272 * Flag parent update required, clear DEDUP flag (already processed 1273 * above). 1274 */ 1275 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 1276 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1277 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DEDUP); 1278 1279 /* 1280 * The modification or re-modification requires an allocation and 1281 * possible COW. 1282 * 1283 * If dedup_off is non-zero, caller already has a data offset 1284 * containing the caller's desired data. The dedup offset is 1285 * allowed to be in a partially free state and we must be sure 1286 * to reset it to a fully allocated state to force two bulkfree 1287 * passes to free it again. 1288 * 1289 * XXX can a chain already be marked MODIFIED without a data 1290 * assignment? If not, assert here instead of testing the case. 1291 */ 1292 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1293 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1294 newmod 1295 ) { 1296 if (dedup_off) { 1297 chain->bref.data_off = dedup_off; 1298 chain->bytes = 1 << (dedup_off & 1299 HAMMER2_OFF_MASK_RADIX); 1300 atomic_set_int(&chain->flags, 1301 HAMMER2_CHAIN_DEDUP); 1302 hammer2_freemap_adjust(hmp, &chain->bref, 1303 HAMMER2_FREEMAP_DORECOVER); 1304 } else { 1305 hammer2_freemap_alloc(chain, chain->bytes); 1306 } 1307 /* XXX failed allocation */ 1308 } 1309 } 1310 1311 /* 1312 * Update mirror_tid and modify_tid. modify_tid is only updated 1313 * if not passed as zero (during flushes, parent propagation passes 1314 * the value 0). 1315 * 1316 * NOTE: chain->pmp could be the device spmp. 1317 */ 1318 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1319 if (mtid) 1320 chain->bref.modify_tid = mtid; 1321 1322 /* 1323 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1324 * requires updating as well as to tell the delete code that the 1325 * chain's blockref might not exactly match (in terms of physical size 1326 * or block offset) the one in the parent's blocktable. The base key 1327 * of course will still match. 1328 */ 1329 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1330 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1331 1332 /* 1333 * Short-cut data blocks which the caller does not need an actual 1334 * data reference to (aka OPTDATA), as long as the chain does not 1335 * already have a data pointer to the data. This generally means 1336 * that the modifications are being done via the logical buffer cache. 1337 * The INITIAL flag relates only to the device data buffer and thus 1338 * remains unchange in this situation. 1339 */ 1340 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1341 (flags & HAMMER2_MODIFY_OPTDATA) && 1342 chain->data == NULL) { 1343 goto skip2; 1344 } 1345 1346 /* 1347 * Clearing the INITIAL flag (for indirect blocks) indicates that 1348 * we've processed the uninitialized storage allocation. 1349 * 1350 * If this flag is already clear we are likely in a copy-on-write 1351 * situation but we have to be sure NOT to bzero the storage if 1352 * no data is present. 1353 */ 1354 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1355 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1356 wasinitial = 1; 1357 } else { 1358 wasinitial = 0; 1359 } 1360 1361 /* 1362 * Instantiate data buffer and possibly execute COW operation 1363 */ 1364 switch(chain->bref.type) { 1365 case HAMMER2_BREF_TYPE_VOLUME: 1366 case HAMMER2_BREF_TYPE_FREEMAP: 1367 /* 1368 * The data is embedded, no copy-on-write operation is 1369 * needed. 1370 */ 1371 KKASSERT(chain->dio == NULL); 1372 break; 1373 case HAMMER2_BREF_TYPE_INODE: 1374 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1375 case HAMMER2_BREF_TYPE_DATA: 1376 case HAMMER2_BREF_TYPE_INDIRECT: 1377 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1378 /* 1379 * Perform the copy-on-write operation 1380 * 1381 * zero-fill or copy-on-write depending on whether 1382 * chain->data exists or not and set the dirty state for 1383 * the new buffer. hammer2_io_new() will handle the 1384 * zero-fill. 1385 * 1386 * If a dedup_off was supplied this is an existing block 1387 * and no COW, copy, or further modification is required. 1388 */ 1389 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1390 1391 if (wasinitial && dedup_off == 0) { 1392 error = hammer2_io_new(hmp, chain->bref.data_off, 1393 chain->bytes, &dio); 1394 } else { 1395 error = hammer2_io_bread(hmp, chain->bref.data_off, 1396 chain->bytes, &dio); 1397 } 1398 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1399 1400 /* 1401 * If an I/O error occurs make sure callers cannot accidently 1402 * modify the old buffer's contents and corrupt the filesystem. 1403 */ 1404 if (error) { 1405 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1406 hmp); 1407 chain->error = HAMMER2_ERROR_IO; 1408 hammer2_io_brelse(&dio); 1409 hammer2_io_brelse(&chain->dio); 1410 chain->data = NULL; 1411 break; 1412 } 1413 chain->error = 0; 1414 bdata = hammer2_io_data(dio, chain->bref.data_off); 1415 1416 if (chain->data) { 1417 /* 1418 * COW (unless a dedup). 1419 */ 1420 KKASSERT(chain->dio != NULL); 1421 if (chain->data != (void *)bdata && dedup_off == 0) { 1422 bcopy(chain->data, bdata, chain->bytes); 1423 } 1424 } else if (wasinitial == 0) { 1425 /* 1426 * We have a problem. We were asked to COW but 1427 * we don't have any data to COW with! 1428 */ 1429 panic("hammer2_chain_modify: having a COW %p\n", 1430 chain); 1431 } 1432 1433 /* 1434 * Retire the old buffer, replace with the new. Dirty or 1435 * redirty the new buffer. 1436 * 1437 * WARNING! The system buffer cache may have already flushed 1438 * the buffer, so we must be sure to [re]dirty it 1439 * for further modification. 1440 * 1441 * If dedup_off was supplied, the caller is not 1442 * expected to make any further modification to the 1443 * buffer. 1444 */ 1445 if (chain->dio) 1446 hammer2_io_bqrelse(&chain->dio); 1447 chain->data = (void *)bdata; 1448 chain->dio = dio; 1449 if (dedup_off == 0) 1450 hammer2_io_setdirty(dio); 1451 break; 1452 default: 1453 panic("hammer2_chain_modify: illegal non-embedded type %d", 1454 chain->bref.type); 1455 break; 1456 1457 } 1458 skip2: 1459 /* 1460 * setflush on parent indicating that the parent must recurse down 1461 * to us. Do not call on chain itself which might already have it 1462 * set. 1463 */ 1464 if (chain->parent) 1465 hammer2_chain_setflush(chain->parent); 1466 } 1467 1468 /* 1469 * Modify the chain associated with an inode. 1470 */ 1471 void 1472 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1473 hammer2_tid_t mtid, int flags) 1474 { 1475 hammer2_inode_modify(ip); 1476 hammer2_chain_modify(chain, mtid, 0, flags); 1477 } 1478 1479 /* 1480 * Volume header data locks 1481 */ 1482 void 1483 hammer2_voldata_lock(hammer2_dev_t *hmp) 1484 { 1485 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 1486 } 1487 1488 void 1489 hammer2_voldata_unlock(hammer2_dev_t *hmp) 1490 { 1491 lockmgr(&hmp->vollk, LK_RELEASE); 1492 } 1493 1494 void 1495 hammer2_voldata_modify(hammer2_dev_t *hmp) 1496 { 1497 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1498 atomic_add_long(&hammer2_count_modified_chains, 1); 1499 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 1500 hammer2_pfs_memory_inc(hmp->vchain.pmp); 1501 } 1502 } 1503 1504 /* 1505 * This function returns the chain at the nearest key within the specified 1506 * range. The returned chain will be referenced but not locked. 1507 * 1508 * This function will recurse through chain->rbtree as necessary and will 1509 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1510 * the iteration value is less than the current value of *key_nextp. 1511 * 1512 * The caller should use (*key_nextp) to calculate the actual range of 1513 * the returned element, which will be (key_beg to *key_nextp - 1), because 1514 * there might be another element which is superior to the returned element 1515 * and overlaps it. 1516 * 1517 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1518 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1519 * it will wind up being (key_end + 1). 1520 * 1521 * WARNING! Must be called with child's spinlock held. Spinlock remains 1522 * held through the operation. 1523 */ 1524 struct hammer2_chain_find_info { 1525 hammer2_chain_t *best; 1526 hammer2_key_t key_beg; 1527 hammer2_key_t key_end; 1528 hammer2_key_t key_next; 1529 }; 1530 1531 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1532 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1533 1534 static 1535 hammer2_chain_t * 1536 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1537 hammer2_key_t key_beg, hammer2_key_t key_end) 1538 { 1539 struct hammer2_chain_find_info info; 1540 1541 info.best = NULL; 1542 info.key_beg = key_beg; 1543 info.key_end = key_end; 1544 info.key_next = *key_nextp; 1545 1546 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1547 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1548 &info); 1549 *key_nextp = info.key_next; 1550 #if 0 1551 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1552 parent, key_beg, key_end, *key_nextp); 1553 #endif 1554 1555 return (info.best); 1556 } 1557 1558 static 1559 int 1560 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1561 { 1562 struct hammer2_chain_find_info *info = data; 1563 hammer2_key_t child_beg; 1564 hammer2_key_t child_end; 1565 1566 child_beg = child->bref.key; 1567 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1568 1569 if (child_end < info->key_beg) 1570 return(-1); 1571 if (child_beg > info->key_end) 1572 return(1); 1573 return(0); 1574 } 1575 1576 static 1577 int 1578 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1579 { 1580 struct hammer2_chain_find_info *info = data; 1581 hammer2_chain_t *best; 1582 hammer2_key_t child_end; 1583 1584 /* 1585 * WARNING! Do not discard DUPLICATED chains, it is possible that 1586 * we are catching an insertion half-way done. If a 1587 * duplicated chain turns out to be the best choice the 1588 * caller will re-check its flags after locking it. 1589 * 1590 * WARNING! Layerq is scanned forwards, exact matches should keep 1591 * the existing info->best. 1592 */ 1593 if ((best = info->best) == NULL) { 1594 /* 1595 * No previous best. Assign best 1596 */ 1597 info->best = child; 1598 } else if (best->bref.key <= info->key_beg && 1599 child->bref.key <= info->key_beg) { 1600 /* 1601 * Illegal overlap. 1602 */ 1603 KKASSERT(0); 1604 /*info->best = child;*/ 1605 } else if (child->bref.key < best->bref.key) { 1606 /* 1607 * Child has a nearer key and best is not flush with key_beg. 1608 * Set best to child. Truncate key_next to the old best key. 1609 */ 1610 info->best = child; 1611 if (info->key_next > best->bref.key || info->key_next == 0) 1612 info->key_next = best->bref.key; 1613 } else if (child->bref.key == best->bref.key) { 1614 /* 1615 * If our current best is flush with the child then this 1616 * is an illegal overlap. 1617 * 1618 * key_next will automatically be limited to the smaller of 1619 * the two end-points. 1620 */ 1621 KKASSERT(0); 1622 info->best = child; 1623 } else { 1624 /* 1625 * Keep the current best but truncate key_next to the child's 1626 * base. 1627 * 1628 * key_next will also automatically be limited to the smaller 1629 * of the two end-points (probably not necessary for this case 1630 * but we do it anyway). 1631 */ 1632 if (info->key_next > child->bref.key || info->key_next == 0) 1633 info->key_next = child->bref.key; 1634 } 1635 1636 /* 1637 * Always truncate key_next based on child's end-of-range. 1638 */ 1639 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1640 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1641 info->key_next = child_end; 1642 1643 return(0); 1644 } 1645 1646 /* 1647 * Retrieve the specified chain from a media blockref, creating the 1648 * in-memory chain structure which reflects it. 1649 * 1650 * To handle insertion races pass the INSERT_RACE flag along with the 1651 * generation number of the core. NULL will be returned if the generation 1652 * number changes before we have a chance to insert the chain. Insert 1653 * races can occur because the parent might be held shared. 1654 * 1655 * Caller must hold the parent locked shared or exclusive since we may 1656 * need the parent's bref array to find our block. 1657 * 1658 * WARNING! chain->pmp is always set to NULL for any chain representing 1659 * part of the super-root topology. 1660 */ 1661 hammer2_chain_t * 1662 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1663 hammer2_blockref_t *bref) 1664 { 1665 hammer2_dev_t *hmp = parent->hmp; 1666 hammer2_chain_t *chain; 1667 int error; 1668 1669 /* 1670 * Allocate a chain structure representing the existing media 1671 * entry. Resulting chain has one ref and is not locked. 1672 */ 1673 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 1674 chain = hammer2_chain_alloc(hmp, NULL, bref); 1675 else 1676 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 1677 /* ref'd chain returned */ 1678 1679 /* 1680 * Flag that the chain is in the parent's blockmap so delete/flush 1681 * knows what to do with it. 1682 */ 1683 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1684 1685 /* 1686 * Link the chain into its parent. A spinlock is required to safely 1687 * access the RBTREE, and it is possible to collide with another 1688 * hammer2_chain_get() operation because the caller might only hold 1689 * a shared lock on the parent. 1690 */ 1691 KKASSERT(parent->refs > 0); 1692 error = hammer2_chain_insert(parent, chain, 1693 HAMMER2_CHAIN_INSERT_SPIN | 1694 HAMMER2_CHAIN_INSERT_RACE, 1695 generation); 1696 if (error) { 1697 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 1698 kprintf("chain %p get race\n", chain); 1699 hammer2_chain_drop(chain); 1700 chain = NULL; 1701 } else { 1702 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1703 } 1704 1705 /* 1706 * Return our new chain referenced but not locked, or NULL if 1707 * a race occurred. 1708 */ 1709 return (chain); 1710 } 1711 1712 /* 1713 * Lookup initialization/completion API 1714 */ 1715 hammer2_chain_t * 1716 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1717 { 1718 hammer2_chain_ref(parent); 1719 if (flags & HAMMER2_LOOKUP_SHARED) { 1720 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1721 HAMMER2_RESOLVE_SHARED); 1722 } else { 1723 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1724 } 1725 return (parent); 1726 } 1727 1728 void 1729 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1730 { 1731 if (parent) { 1732 hammer2_chain_unlock(parent); 1733 hammer2_chain_drop(parent); 1734 } 1735 } 1736 1737 hammer2_chain_t * 1738 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1739 { 1740 hammer2_chain_t *oparent; 1741 hammer2_chain_t *nparent; 1742 1743 /* 1744 * Be careful of order, oparent must be unlocked before nparent 1745 * is locked below to avoid a deadlock. 1746 */ 1747 oparent = *parentp; 1748 hammer2_spin_ex(&oparent->core.spin); 1749 nparent = oparent->parent; 1750 hammer2_chain_ref(nparent); 1751 hammer2_spin_unex(&oparent->core.spin); 1752 if (oparent) { 1753 hammer2_chain_unlock(oparent); 1754 hammer2_chain_drop(oparent); 1755 oparent = NULL; 1756 } 1757 1758 hammer2_chain_lock(nparent, how); 1759 *parentp = nparent; 1760 1761 return (nparent); 1762 } 1763 1764 /* 1765 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1766 * (*parentp) typically points to an inode but can also point to a related 1767 * indirect block and this function will recurse upwards and find the inode 1768 * again. 1769 * 1770 * (*parentp) must be exclusively locked and referenced and can be an inode 1771 * or an existing indirect block within the inode. 1772 * 1773 * On return (*parentp) will be modified to point at the deepest parent chain 1774 * element encountered during the search, as a helper for an insertion or 1775 * deletion. The new (*parentp) will be locked and referenced and the old 1776 * will be unlocked and dereferenced (no change if they are both the same). 1777 * 1778 * The matching chain will be returned exclusively locked. If NOLOCK is 1779 * requested the chain will be returned only referenced. Note that the 1780 * parent chain must always be locked shared or exclusive, matching the 1781 * HAMMER2_LOOKUP_SHARED flag. We can conceivably lock it SHARED temporarily 1782 * when NOLOCK is specified but that complicates matters if *parentp must 1783 * inherit the chain. 1784 * 1785 * NOLOCK also implies NODATA, since an unlocked chain usually has a NULL 1786 * data pointer or can otherwise be in flux. 1787 * 1788 * NULL is returned if no match was found, but (*parentp) will still 1789 * potentially be adjusted. 1790 * 1791 * If a fatal error occurs (typically an I/O error), a dummy chain is 1792 * returned with chain->error and error-identifying information set. This 1793 * chain will assert if you try to do anything fancy with it. 1794 * 1795 * XXX Depending on where the error occurs we should allow continued iteration. 1796 * 1797 * On return (*key_nextp) will point to an iterative value for key_beg. 1798 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 1799 * 1800 * This function will also recurse up the chain if the key is not within the 1801 * current parent's range. (*parentp) can never be set to NULL. An iteration 1802 * can simply allow (*parentp) to float inside the loop. 1803 * 1804 * NOTE! chain->data is not always resolved. By default it will not be 1805 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1806 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1807 * BREF_TYPE_DATA as the device buffer can alias the logical file 1808 * buffer). 1809 */ 1810 hammer2_chain_t * 1811 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1812 hammer2_key_t key_beg, hammer2_key_t key_end, 1813 int *cache_indexp, int flags) 1814 { 1815 hammer2_dev_t *hmp; 1816 hammer2_chain_t *parent; 1817 hammer2_chain_t *chain; 1818 hammer2_blockref_t *base; 1819 hammer2_blockref_t *bref; 1820 hammer2_blockref_t bcopy; 1821 hammer2_key_t scan_beg; 1822 hammer2_key_t scan_end; 1823 int count = 0; 1824 int how_always = HAMMER2_RESOLVE_ALWAYS; 1825 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1826 int how; 1827 int generation; 1828 int maxloops = 300000; 1829 1830 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1831 how_maybe = how_always; 1832 how = HAMMER2_RESOLVE_ALWAYS; 1833 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1834 how = HAMMER2_RESOLVE_NEVER; 1835 } else { 1836 how = HAMMER2_RESOLVE_MAYBE; 1837 } 1838 if (flags & HAMMER2_LOOKUP_SHARED) { 1839 how_maybe |= HAMMER2_RESOLVE_SHARED; 1840 how_always |= HAMMER2_RESOLVE_SHARED; 1841 how |= HAMMER2_RESOLVE_SHARED; 1842 } 1843 1844 /* 1845 * Recurse (*parentp) upward if necessary until the parent completely 1846 * encloses the key range or we hit the inode. 1847 * 1848 * This function handles races against the flusher doing a delete- 1849 * duplicate above us and re-homes the parent to the duplicate in 1850 * that case, otherwise we'd wind up recursing down a stale chain. 1851 */ 1852 parent = *parentp; 1853 hmp = parent->hmp; 1854 1855 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1856 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1857 scan_beg = parent->bref.key; 1858 scan_end = scan_beg + 1859 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1860 if (key_beg >= scan_beg && key_end <= scan_end) 1861 break; 1862 parent = hammer2_chain_getparent(parentp, how_maybe); 1863 } 1864 1865 again: 1866 if (--maxloops == 0) 1867 panic("hammer2_chain_lookup: maxloops"); 1868 /* 1869 * Locate the blockref array. Currently we do a fully associative 1870 * search through the array. 1871 */ 1872 switch(parent->bref.type) { 1873 case HAMMER2_BREF_TYPE_INODE: 1874 /* 1875 * Special shortcut for embedded data returns the inode 1876 * itself. Callers must detect this condition and access 1877 * the embedded data (the strategy code does this for us). 1878 * 1879 * This is only applicable to regular files and softlinks. 1880 */ 1881 if (parent->data->ipdata.meta.op_flags & 1882 HAMMER2_OPFLAG_DIRECTDATA) { 1883 if (flags & HAMMER2_LOOKUP_NODIRECT) { 1884 chain = NULL; 1885 *key_nextp = key_end + 1; 1886 goto done; 1887 } 1888 hammer2_chain_ref(parent); 1889 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) 1890 hammer2_chain_lock(parent, how_always); 1891 *key_nextp = key_end + 1; 1892 return (parent); 1893 } 1894 base = &parent->data->ipdata.u.blockset.blockref[0]; 1895 count = HAMMER2_SET_COUNT; 1896 break; 1897 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1898 case HAMMER2_BREF_TYPE_INDIRECT: 1899 /* 1900 * Handle MATCHIND on the parent 1901 */ 1902 if (flags & HAMMER2_LOOKUP_MATCHIND) { 1903 scan_beg = parent->bref.key; 1904 scan_end = scan_beg + 1905 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1906 if (key_beg == scan_beg && key_end == scan_end) { 1907 chain = parent; 1908 hammer2_chain_ref(chain); 1909 hammer2_chain_lock(chain, how_maybe); 1910 *key_nextp = scan_end + 1; 1911 goto done; 1912 } 1913 } 1914 /* 1915 * Optimize indirect blocks in the INITIAL state to avoid 1916 * I/O. 1917 */ 1918 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1919 base = NULL; 1920 } else { 1921 if (parent->data == NULL) 1922 panic("parent->data is NULL"); 1923 base = &parent->data->npdata[0]; 1924 } 1925 count = parent->bytes / sizeof(hammer2_blockref_t); 1926 break; 1927 case HAMMER2_BREF_TYPE_VOLUME: 1928 base = &parent->data->voldata.sroot_blockset.blockref[0]; 1929 count = HAMMER2_SET_COUNT; 1930 break; 1931 case HAMMER2_BREF_TYPE_FREEMAP: 1932 base = &parent->data->blkset.blockref[0]; 1933 count = HAMMER2_SET_COUNT; 1934 break; 1935 default: 1936 kprintf("hammer2_chain_lookup: unrecognized " 1937 "blockref(B) type: %d", 1938 parent->bref.type); 1939 while (1) 1940 tsleep(&base, 0, "dead", 0); 1941 panic("hammer2_chain_lookup: unrecognized " 1942 "blockref(B) type: %d", 1943 parent->bref.type); 1944 base = NULL; /* safety */ 1945 count = 0; /* safety */ 1946 } 1947 1948 /* 1949 * Merged scan to find next candidate. 1950 * 1951 * hammer2_base_*() functions require the parent->core.live_* fields 1952 * to be synchronized. 1953 * 1954 * We need to hold the spinlock to access the block array and RB tree 1955 * and to interlock chain creation. 1956 */ 1957 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 1958 hammer2_chain_countbrefs(parent, base, count); 1959 1960 /* 1961 * Combined search 1962 */ 1963 hammer2_spin_ex(&parent->core.spin); 1964 chain = hammer2_combined_find(parent, base, count, 1965 cache_indexp, key_nextp, 1966 key_beg, key_end, 1967 &bref); 1968 generation = parent->core.generation; 1969 1970 /* 1971 * Exhausted parent chain, iterate. 1972 */ 1973 if (bref == NULL) { 1974 hammer2_spin_unex(&parent->core.spin); 1975 if (key_beg == key_end) /* short cut single-key case */ 1976 return (NULL); 1977 1978 /* 1979 * Stop if we reached the end of the iteration. 1980 */ 1981 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 1982 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1983 return (NULL); 1984 } 1985 1986 /* 1987 * Calculate next key, stop if we reached the end of the 1988 * iteration, otherwise go up one level and loop. 1989 */ 1990 key_beg = parent->bref.key + 1991 ((hammer2_key_t)1 << parent->bref.keybits); 1992 if (key_beg == 0 || key_beg > key_end) 1993 return (NULL); 1994 parent = hammer2_chain_getparent(parentp, how_maybe); 1995 goto again; 1996 } 1997 1998 /* 1999 * Selected from blockref or in-memory chain. 2000 */ 2001 if (chain == NULL) { 2002 bcopy = *bref; 2003 hammer2_spin_unex(&parent->core.spin); 2004 chain = hammer2_chain_get(parent, generation, 2005 &bcopy); 2006 if (chain == NULL) { 2007 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 2008 parent, key_beg, key_end); 2009 goto again; 2010 } 2011 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2012 hammer2_chain_drop(chain); 2013 goto again; 2014 } 2015 } else { 2016 hammer2_chain_ref(chain); 2017 hammer2_spin_unex(&parent->core.spin); 2018 } 2019 2020 /* 2021 * chain is referenced but not locked. We must lock the chain 2022 * to obtain definitive DUPLICATED/DELETED state 2023 */ 2024 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2025 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2026 hammer2_chain_lock(chain, how_maybe); 2027 } else { 2028 hammer2_chain_lock(chain, how); 2029 } 2030 2031 /* 2032 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2033 * 2034 * NOTE: Chain's key range is not relevant as there might be 2035 * one-offs within the range that are not deleted. 2036 * 2037 * NOTE: Lookups can race delete-duplicate because 2038 * delete-duplicate does not lock the parent's core 2039 * (they just use the spinlock on the core). We must 2040 * check for races by comparing the DUPLICATED flag before 2041 * releasing the spinlock with the flag after locking the 2042 * chain. 2043 */ 2044 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2045 hammer2_chain_unlock(chain); 2046 hammer2_chain_drop(chain); 2047 key_beg = *key_nextp; 2048 if (key_beg == 0 || key_beg > key_end) 2049 return(NULL); 2050 goto again; 2051 } 2052 2053 /* 2054 * If the chain element is an indirect block it becomes the new 2055 * parent and we loop on it. We must maintain our top-down locks 2056 * to prevent the flusher from interfering (i.e. doing a 2057 * delete-duplicate and leaving us recursing down a deleted chain). 2058 * 2059 * The parent always has to be locked with at least RESOLVE_MAYBE 2060 * so we can access its data. It might need a fixup if the caller 2061 * passed incompatible flags. Be careful not to cause a deadlock 2062 * as a data-load requires an exclusive lock. 2063 * 2064 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2065 * range is within the requested key range we return the indirect 2066 * block and do NOT loop. This is usually only used to acquire 2067 * freemap nodes. 2068 */ 2069 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2070 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2071 hammer2_chain_unlock(parent); 2072 hammer2_chain_drop(parent); 2073 *parentp = parent = chain; 2074 goto again; 2075 } 2076 done: 2077 /* 2078 * All done, return the chain. 2079 * 2080 * If the caller does not want a locked chain, replace the lock with 2081 * a ref. Perhaps this can eventually be optimized to not obtain the 2082 * lock in the first place for situations where the data does not 2083 * need to be resolved. 2084 */ 2085 if (chain) { 2086 if (flags & HAMMER2_LOOKUP_NOLOCK) 2087 hammer2_chain_unlock(chain); 2088 } 2089 2090 return (chain); 2091 } 2092 2093 /* 2094 * After having issued a lookup we can iterate all matching keys. 2095 * 2096 * If chain is non-NULL we continue the iteration from just after it's index. 2097 * 2098 * If chain is NULL we assume the parent was exhausted and continue the 2099 * iteration at the next parent. 2100 * 2101 * If a fatal error occurs (typically an I/O error), a dummy chain is 2102 * returned with chain->error and error-identifying information set. This 2103 * chain will assert if you try to do anything fancy with it. 2104 * 2105 * XXX Depending on where the error occurs we should allow continued iteration. 2106 * 2107 * parent must be locked on entry and remains locked throughout. chain's 2108 * lock status must match flags. Chain is always at least referenced. 2109 * 2110 * WARNING! The MATCHIND flag does not apply to this function. 2111 */ 2112 hammer2_chain_t * 2113 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2114 hammer2_key_t *key_nextp, 2115 hammer2_key_t key_beg, hammer2_key_t key_end, 2116 int *cache_indexp, int flags) 2117 { 2118 hammer2_chain_t *parent; 2119 int how_maybe; 2120 2121 /* 2122 * Calculate locking flags for upward recursion. 2123 */ 2124 how_maybe = HAMMER2_RESOLVE_MAYBE; 2125 if (flags & HAMMER2_LOOKUP_SHARED) 2126 how_maybe |= HAMMER2_RESOLVE_SHARED; 2127 2128 parent = *parentp; 2129 2130 /* 2131 * Calculate the next index and recalculate the parent if necessary. 2132 */ 2133 if (chain) { 2134 key_beg = chain->bref.key + 2135 ((hammer2_key_t)1 << chain->bref.keybits); 2136 if ((flags & (HAMMER2_LOOKUP_NOLOCK | 2137 HAMMER2_LOOKUP_NOUNLOCK)) == 0) { 2138 hammer2_chain_unlock(chain); 2139 } 2140 hammer2_chain_drop(chain); 2141 2142 /* 2143 * chain invalid past this point, but we can still do a 2144 * pointer comparison w/parent. 2145 * 2146 * Any scan where the lookup returned degenerate data embedded 2147 * in the inode has an invalid index and must terminate. 2148 */ 2149 if (chain == parent) 2150 return(NULL); 2151 if (key_beg == 0 || key_beg > key_end) 2152 return(NULL); 2153 chain = NULL; 2154 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2155 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2156 /* 2157 * We reached the end of the iteration. 2158 */ 2159 return (NULL); 2160 } else { 2161 /* 2162 * Continue iteration with next parent unless the current 2163 * parent covers the range. 2164 */ 2165 key_beg = parent->bref.key + 2166 ((hammer2_key_t)1 << parent->bref.keybits); 2167 if (key_beg == 0 || key_beg > key_end) 2168 return (NULL); 2169 parent = hammer2_chain_getparent(parentp, how_maybe); 2170 } 2171 2172 /* 2173 * And execute 2174 */ 2175 return (hammer2_chain_lookup(parentp, key_nextp, 2176 key_beg, key_end, 2177 cache_indexp, flags)); 2178 } 2179 2180 /* 2181 * The raw scan function is similar to lookup/next but does not seek to a key. 2182 * Blockrefs are iterated via first_bref = (parent, NULL) and 2183 * next_chain = (parent, bref). 2184 * 2185 * The passed-in parent must be locked and its data resolved. The function 2186 * nominally returns a locked and referenced *chainp != NULL for chains 2187 * the caller might need to recurse on (and will dipose of any *chainp passed 2188 * in). The caller must check the chain->bref.type either way. 2189 * 2190 * *chainp is not set for leaf elements. 2191 * 2192 * This function takes a pointer to a stack-based bref structure whos 2193 * contents is updated for each iteration. The same pointer is returned, 2194 * or NULL when the iteration is complete. *firstp must be set to 1 for 2195 * the first ieration. This function will set it to 0. 2196 */ 2197 hammer2_blockref_t * 2198 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2199 hammer2_blockref_t *bref, int *firstp, 2200 int *cache_indexp, int flags) 2201 { 2202 hammer2_dev_t *hmp; 2203 hammer2_blockref_t *base; 2204 hammer2_blockref_t *bref_ptr; 2205 hammer2_key_t key; 2206 hammer2_key_t next_key; 2207 hammer2_chain_t *chain = NULL; 2208 int count = 0; 2209 int how_always = HAMMER2_RESOLVE_ALWAYS; 2210 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2211 int how; 2212 int generation; 2213 int maxloops = 300000; 2214 2215 hmp = parent->hmp; 2216 2217 /* 2218 * Scan flags borrowed from lookup. 2219 */ 2220 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2221 how_maybe = how_always; 2222 how = HAMMER2_RESOLVE_ALWAYS; 2223 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2224 how = HAMMER2_RESOLVE_NEVER; 2225 } else { 2226 how = HAMMER2_RESOLVE_MAYBE; 2227 } 2228 if (flags & HAMMER2_LOOKUP_SHARED) { 2229 how_maybe |= HAMMER2_RESOLVE_SHARED; 2230 how_always |= HAMMER2_RESOLVE_SHARED; 2231 how |= HAMMER2_RESOLVE_SHARED; 2232 } 2233 2234 /* 2235 * Calculate key to locate first/next element, unlocking the previous 2236 * element as we go. Be careful, the key calculation can overflow. 2237 * 2238 * (also reset bref to NULL) 2239 */ 2240 if (*firstp) { 2241 key = 0; 2242 *firstp = 0; 2243 } else { 2244 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2245 if ((chain = *chainp) != NULL) { 2246 *chainp = NULL; 2247 hammer2_chain_unlock(chain); 2248 hammer2_chain_drop(chain); 2249 chain = NULL; 2250 } 2251 if (key == 0) { 2252 bref = NULL; 2253 goto done; 2254 } 2255 } 2256 2257 again: 2258 KKASSERT(parent->error == 0); /* XXX case not handled yet */ 2259 if (--maxloops == 0) 2260 panic("hammer2_chain_scan: maxloops"); 2261 /* 2262 * Locate the blockref array. Currently we do a fully associative 2263 * search through the array. 2264 */ 2265 switch(parent->bref.type) { 2266 case HAMMER2_BREF_TYPE_INODE: 2267 /* 2268 * An inode with embedded data has no sub-chains. 2269 * 2270 * WARNING! Bulk scan code may pass a static chain marked 2271 * as BREF_TYPE_INODE with a copy of the volume 2272 * root blockset to snapshot the volume. 2273 */ 2274 if (parent->data->ipdata.meta.op_flags & 2275 HAMMER2_OPFLAG_DIRECTDATA) { 2276 bref = NULL; 2277 goto done; 2278 } 2279 base = &parent->data->ipdata.u.blockset.blockref[0]; 2280 count = HAMMER2_SET_COUNT; 2281 break; 2282 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2283 case HAMMER2_BREF_TYPE_INDIRECT: 2284 /* 2285 * Optimize indirect blocks in the INITIAL state to avoid 2286 * I/O. 2287 */ 2288 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2289 base = NULL; 2290 } else { 2291 if (parent->data == NULL) 2292 panic("parent->data is NULL"); 2293 base = &parent->data->npdata[0]; 2294 } 2295 count = parent->bytes / sizeof(hammer2_blockref_t); 2296 break; 2297 case HAMMER2_BREF_TYPE_VOLUME: 2298 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2299 count = HAMMER2_SET_COUNT; 2300 break; 2301 case HAMMER2_BREF_TYPE_FREEMAP: 2302 base = &parent->data->blkset.blockref[0]; 2303 count = HAMMER2_SET_COUNT; 2304 break; 2305 default: 2306 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2307 parent->bref.type); 2308 base = NULL; /* safety */ 2309 count = 0; /* safety */ 2310 } 2311 2312 /* 2313 * Merged scan to find next candidate. 2314 * 2315 * hammer2_base_*() functions require the parent->core.live_* fields 2316 * to be synchronized. 2317 * 2318 * We need to hold the spinlock to access the block array and RB tree 2319 * and to interlock chain creation. 2320 */ 2321 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2322 hammer2_chain_countbrefs(parent, base, count); 2323 2324 next_key = 0; 2325 bref_ptr = NULL; 2326 hammer2_spin_ex(&parent->core.spin); 2327 chain = hammer2_combined_find(parent, base, count, 2328 cache_indexp, &next_key, 2329 key, HAMMER2_KEY_MAX, 2330 &bref_ptr); 2331 generation = parent->core.generation; 2332 2333 /* 2334 * Exhausted parent chain, we're done. 2335 */ 2336 if (bref_ptr == NULL) { 2337 hammer2_spin_unex(&parent->core.spin); 2338 KKASSERT(chain == NULL); 2339 bref = NULL; 2340 goto done; 2341 } 2342 2343 /* 2344 * Copy into the supplied stack-based blockref. 2345 */ 2346 *bref = *bref_ptr; 2347 2348 /* 2349 * Selected from blockref or in-memory chain. 2350 */ 2351 if (chain == NULL) { 2352 switch(bref->type) { 2353 case HAMMER2_BREF_TYPE_INODE: 2354 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2355 case HAMMER2_BREF_TYPE_INDIRECT: 2356 case HAMMER2_BREF_TYPE_VOLUME: 2357 case HAMMER2_BREF_TYPE_FREEMAP: 2358 /* 2359 * Recursion, always get the chain 2360 */ 2361 hammer2_spin_unex(&parent->core.spin); 2362 chain = hammer2_chain_get(parent, generation, bref); 2363 if (chain == NULL) { 2364 kprintf("retry scan parent %p keys %016jx\n", 2365 parent, key); 2366 goto again; 2367 } 2368 if (bcmp(bref, bref_ptr, sizeof(*bref))) { 2369 hammer2_chain_drop(chain); 2370 chain = NULL; 2371 goto again; 2372 } 2373 break; 2374 default: 2375 /* 2376 * No recursion, do not waste time instantiating 2377 * a chain, just iterate using the bref. 2378 */ 2379 hammer2_spin_unex(&parent->core.spin); 2380 break; 2381 } 2382 } else { 2383 /* 2384 * Recursion or not we need the chain in order to supply 2385 * the bref. 2386 */ 2387 hammer2_chain_ref(chain); 2388 hammer2_spin_unex(&parent->core.spin); 2389 } 2390 2391 /* 2392 * chain is referenced but not locked. We must lock the chain 2393 * to obtain definitive DUPLICATED/DELETED state 2394 */ 2395 if (chain) 2396 hammer2_chain_lock(chain, how); 2397 2398 /* 2399 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2400 * 2401 * NOTE: chain's key range is not relevant as there might be 2402 * one-offs within the range that are not deleted. 2403 * 2404 * NOTE: XXX this could create problems with scans used in 2405 * situations other than mount-time recovery. 2406 * 2407 * NOTE: Lookups can race delete-duplicate because 2408 * delete-duplicate does not lock the parent's core 2409 * (they just use the spinlock on the core). We must 2410 * check for races by comparing the DUPLICATED flag before 2411 * releasing the spinlock with the flag after locking the 2412 * chain. 2413 */ 2414 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 2415 hammer2_chain_unlock(chain); 2416 hammer2_chain_drop(chain); 2417 chain = NULL; 2418 2419 key = next_key; 2420 if (key == 0) { 2421 bref = NULL; 2422 goto done; 2423 } 2424 goto again; 2425 } 2426 2427 done: 2428 /* 2429 * All done, return the bref or NULL, supply chain if necessary. 2430 */ 2431 if (chain) 2432 *chainp = chain; 2433 return (bref); 2434 } 2435 2436 /* 2437 * Create and return a new hammer2 system memory structure of the specified 2438 * key, type and size and insert it under (*parentp). This is a full 2439 * insertion, based on the supplied key/keybits, and may involve creating 2440 * indirect blocks and moving other chains around via delete/duplicate. 2441 * 2442 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2443 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2444 * FULL. This typically means that the caller is creating the chain after 2445 * doing a hammer2_chain_lookup(). 2446 * 2447 * (*parentp) must be exclusive locked and may be replaced on return 2448 * depending on how much work the function had to do. 2449 * 2450 * (*parentp) must not be errored or this function will assert. 2451 * 2452 * (*chainp) usually starts out NULL and returns the newly created chain, 2453 * but if the caller desires the caller may allocate a disconnected chain 2454 * and pass it in instead. 2455 * 2456 * This function should NOT be used to insert INDIRECT blocks. It is 2457 * typically used to create/insert inodes and data blocks. 2458 * 2459 * Caller must pass-in an exclusively locked parent the new chain is to 2460 * be inserted under, and optionally pass-in a disconnected, exclusively 2461 * locked chain to insert (else we create a new chain). The function will 2462 * adjust (*parentp) as necessary, create or connect the chain, and 2463 * return an exclusively locked chain in *chainp. 2464 * 2465 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 2466 * and will be reassigned. 2467 */ 2468 int 2469 hammer2_chain_create(hammer2_chain_t **parentp, 2470 hammer2_chain_t **chainp, hammer2_pfs_t *pmp, 2471 hammer2_key_t key, int keybits, int type, size_t bytes, 2472 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 2473 { 2474 hammer2_dev_t *hmp; 2475 hammer2_chain_t *chain; 2476 hammer2_chain_t *parent; 2477 hammer2_blockref_t *base; 2478 hammer2_blockref_t dummy; 2479 int allocated = 0; 2480 int error = 0; 2481 int count; 2482 int maxloops = 300000; 2483 2484 /* 2485 * Topology may be crossing a PFS boundary. 2486 */ 2487 parent = *parentp; 2488 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2489 KKASSERT(parent->error == 0); 2490 hmp = parent->hmp; 2491 chain = *chainp; 2492 2493 if (chain == NULL) { 2494 /* 2495 * First allocate media space and construct the dummy bref, 2496 * then allocate the in-memory chain structure. Set the 2497 * INITIAL flag for fresh chains which do not have embedded 2498 * data. 2499 * 2500 * XXX for now set the check mode of the child based on 2501 * the parent or, if the parent is an inode, the 2502 * specification in the inode. 2503 */ 2504 bzero(&dummy, sizeof(dummy)); 2505 dummy.type = type; 2506 dummy.key = key; 2507 dummy.keybits = keybits; 2508 dummy.data_off = hammer2_getradix(bytes); 2509 dummy.methods = parent->bref.methods; 2510 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 2511 parent->data) { 2512 dummy.methods &= ~HAMMER2_ENC_CHECK(-1); 2513 dummy.methods |= HAMMER2_ENC_CHECK( 2514 parent->data->ipdata.meta.check_algo); 2515 } 2516 2517 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 2518 2519 /* 2520 * Lock the chain manually, chain_lock will load the chain 2521 * which we do NOT want to do. (note: chain->refs is set 2522 * to 1 by chain_alloc() for us, but lockcnt is not). 2523 */ 2524 chain->lockcnt = 1; 2525 hammer2_mtx_ex(&chain->lock); 2526 allocated = 1; 2527 ++curthread->td_tracker; 2528 2529 /* 2530 * Set INITIAL to optimize I/O. The flag will generally be 2531 * processed when we call hammer2_chain_modify(). 2532 * 2533 * Recalculate bytes to reflect the actual media block 2534 * allocation. 2535 */ 2536 bytes = (hammer2_off_t)1 << 2537 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2538 chain->bytes = bytes; 2539 2540 switch(type) { 2541 case HAMMER2_BREF_TYPE_VOLUME: 2542 case HAMMER2_BREF_TYPE_FREEMAP: 2543 panic("hammer2_chain_create: called with volume type"); 2544 break; 2545 case HAMMER2_BREF_TYPE_INDIRECT: 2546 panic("hammer2_chain_create: cannot be used to" 2547 "create indirect block"); 2548 break; 2549 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2550 panic("hammer2_chain_create: cannot be used to" 2551 "create freemap root or node"); 2552 break; 2553 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2554 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2555 /* fall through */ 2556 case HAMMER2_BREF_TYPE_INODE: 2557 case HAMMER2_BREF_TYPE_DATA: 2558 default: 2559 /* 2560 * leave chain->data NULL, set INITIAL 2561 */ 2562 KKASSERT(chain->data == NULL); 2563 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2564 break; 2565 } 2566 } else { 2567 /* 2568 * We are reattaching a previously deleted chain, possibly 2569 * under a new parent and possibly with a new key/keybits. 2570 * The chain does not have to be in a modified state. The 2571 * UPDATE flag will be set later on in this routine. 2572 * 2573 * Do NOT mess with the current state of the INITIAL flag. 2574 */ 2575 chain->bref.key = key; 2576 chain->bref.keybits = keybits; 2577 if (chain->flags & HAMMER2_CHAIN_DELETED) 2578 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2579 KKASSERT(chain->parent == NULL); 2580 } 2581 if (flags & HAMMER2_INSERT_PFSROOT) 2582 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 2583 else 2584 chain->bref.flags &= ~HAMMER2_BREF_FLAG_PFSROOT; 2585 2586 /* 2587 * Calculate how many entries we have in the blockref array and 2588 * determine if an indirect block is required. 2589 */ 2590 again: 2591 if (--maxloops == 0) 2592 panic("hammer2_chain_create: maxloops"); 2593 2594 switch(parent->bref.type) { 2595 case HAMMER2_BREF_TYPE_INODE: 2596 if ((parent->data->ipdata.meta.op_flags & 2597 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 2598 kprintf("hammer2: parent set for direct-data! " 2599 "pkey=%016jx ckey=%016jx\n", 2600 parent->bref.key, 2601 chain->bref.key); 2602 } 2603 KKASSERT((parent->data->ipdata.meta.op_flags & 2604 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2605 KKASSERT(parent->data != NULL); 2606 base = &parent->data->ipdata.u.blockset.blockref[0]; 2607 count = HAMMER2_SET_COUNT; 2608 break; 2609 case HAMMER2_BREF_TYPE_INDIRECT: 2610 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2611 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2612 base = NULL; 2613 else 2614 base = &parent->data->npdata[0]; 2615 count = parent->bytes / sizeof(hammer2_blockref_t); 2616 break; 2617 case HAMMER2_BREF_TYPE_VOLUME: 2618 KKASSERT(parent->data != NULL); 2619 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2620 count = HAMMER2_SET_COUNT; 2621 break; 2622 case HAMMER2_BREF_TYPE_FREEMAP: 2623 KKASSERT(parent->data != NULL); 2624 base = &parent->data->blkset.blockref[0]; 2625 count = HAMMER2_SET_COUNT; 2626 break; 2627 default: 2628 panic("hammer2_chain_create: unrecognized blockref type: %d", 2629 parent->bref.type); 2630 base = NULL; 2631 count = 0; 2632 break; 2633 } 2634 2635 /* 2636 * Make sure we've counted the brefs 2637 */ 2638 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2639 hammer2_chain_countbrefs(parent, base, count); 2640 2641 KKASSERT(parent->core.live_count >= 0 && 2642 parent->core.live_count <= count); 2643 2644 /* 2645 * If no free blockref could be found we must create an indirect 2646 * block and move a number of blockrefs into it. With the parent 2647 * locked we can safely lock each child in order to delete+duplicate 2648 * it without causing a deadlock. 2649 * 2650 * This may return the new indirect block or the old parent depending 2651 * on where the key falls. NULL is returned on error. 2652 */ 2653 if (parent->core.live_count == count) { 2654 hammer2_chain_t *nparent; 2655 2656 nparent = hammer2_chain_create_indirect(parent, key, keybits, 2657 mtid, type, &error); 2658 if (nparent == NULL) { 2659 if (allocated) 2660 hammer2_chain_drop(chain); 2661 chain = NULL; 2662 goto done; 2663 } 2664 if (parent != nparent) { 2665 hammer2_chain_unlock(parent); 2666 hammer2_chain_drop(parent); 2667 parent = *parentp = nparent; 2668 } 2669 goto again; 2670 } 2671 2672 /* 2673 * Link the chain into its parent. 2674 */ 2675 if (chain->parent != NULL) 2676 panic("hammer2: hammer2_chain_create: chain already connected"); 2677 KKASSERT(chain->parent == NULL); 2678 hammer2_chain_insert(parent, chain, 2679 HAMMER2_CHAIN_INSERT_SPIN | 2680 HAMMER2_CHAIN_INSERT_LIVE, 2681 0); 2682 2683 if (allocated) { 2684 /* 2685 * Mark the newly created chain modified. This will cause 2686 * UPDATE to be set and process the INITIAL flag. 2687 * 2688 * Device buffers are not instantiated for DATA elements 2689 * as these are handled by logical buffers. 2690 * 2691 * Indirect and freemap node indirect blocks are handled 2692 * by hammer2_chain_create_indirect() and not by this 2693 * function. 2694 * 2695 * Data for all other bref types is expected to be 2696 * instantiated (INODE, LEAF). 2697 */ 2698 switch(chain->bref.type) { 2699 case HAMMER2_BREF_TYPE_DATA: 2700 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2701 case HAMMER2_BREF_TYPE_INODE: 2702 hammer2_chain_modify(chain, mtid, dedup_off, 2703 HAMMER2_MODIFY_OPTDATA); 2704 break; 2705 default: 2706 /* 2707 * Remaining types are not supported by this function. 2708 * In particular, INDIRECT and LEAF_NODE types are 2709 * handled by create_indirect(). 2710 */ 2711 panic("hammer2_chain_create: bad type: %d", 2712 chain->bref.type); 2713 /* NOT REACHED */ 2714 break; 2715 } 2716 } else { 2717 /* 2718 * When reconnecting a chain we must set UPDATE and 2719 * setflush so the flush recognizes that it must update 2720 * the bref in the parent. 2721 */ 2722 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 2723 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 2724 } 2725 2726 /* 2727 * We must setflush(parent) to ensure that it recurses through to 2728 * chain. setflush(chain) might not work because ONFLUSH is possibly 2729 * already set in the chain (so it won't recurse up to set it in the 2730 * parent). 2731 */ 2732 hammer2_chain_setflush(parent); 2733 2734 done: 2735 *chainp = chain; 2736 2737 return (error); 2738 } 2739 2740 /* 2741 * Move the chain from its old parent to a new parent. The chain must have 2742 * already been deleted or already disconnected (or never associated) with 2743 * a parent. The chain is reassociated with the new parent and the deleted 2744 * flag will be cleared (no longer deleted). The chain's modification state 2745 * is not altered. 2746 * 2747 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 2748 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2749 * FULL. This typically means that the caller is creating the chain after 2750 * doing a hammer2_chain_lookup(). 2751 * 2752 * A non-NULL bref is typically passed when key and keybits must be overridden. 2753 * Note that hammer2_cluster_duplicate() *ONLY* uses the key and keybits fields 2754 * from a passed-in bref and uses the old chain's bref for everything else. 2755 * 2756 * Neither (parent) or (chain) can be errored. 2757 * 2758 * If (parent) is non-NULL then the new duplicated chain is inserted under 2759 * the parent. 2760 * 2761 * If (parent) is NULL then the newly duplicated chain is not inserted 2762 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2763 * passing into hammer2_chain_create() after this function returns). 2764 * 2765 * WARNING! This function calls create which means it can insert indirect 2766 * blocks. This can cause other unrelated chains in the parent to 2767 * be moved to a newly inserted indirect block in addition to the 2768 * specific chain. 2769 */ 2770 void 2771 hammer2_chain_rename(hammer2_blockref_t *bref, 2772 hammer2_chain_t **parentp, hammer2_chain_t *chain, 2773 hammer2_tid_t mtid, int flags) 2774 { 2775 hammer2_dev_t *hmp; 2776 hammer2_chain_t *parent; 2777 size_t bytes; 2778 2779 /* 2780 * WARNING! We should never resolve DATA to device buffers 2781 * (XXX allow it if the caller did?), and since 2782 * we currently do not have the logical buffer cache 2783 * buffer in-hand to fix its cached physical offset 2784 * we also force the modify code to not COW it. XXX 2785 */ 2786 hmp = chain->hmp; 2787 KKASSERT(chain->parent == NULL); 2788 KKASSERT(chain->error == 0); 2789 2790 /* 2791 * Now create a duplicate of the chain structure, associating 2792 * it with the same core, making it the same size, pointing it 2793 * to the same bref (the same media block). 2794 */ 2795 if (bref == NULL) 2796 bref = &chain->bref; 2797 bytes = (hammer2_off_t)1 << 2798 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2799 2800 /* 2801 * If parent is not NULL the duplicated chain will be entered under 2802 * the parent and the UPDATE bit set to tell flush to update 2803 * the blockref. 2804 * 2805 * We must setflush(parent) to ensure that it recurses through to 2806 * chain. setflush(chain) might not work because ONFLUSH is possibly 2807 * already set in the chain (so it won't recurse up to set it in the 2808 * parent). 2809 * 2810 * Having both chains locked is extremely important for atomicy. 2811 */ 2812 if (parentp && (parent = *parentp) != NULL) { 2813 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2814 KKASSERT(parent->refs > 0); 2815 KKASSERT(parent->error == 0); 2816 2817 hammer2_chain_create(parentp, &chain, chain->pmp, 2818 bref->key, bref->keybits, bref->type, 2819 chain->bytes, mtid, 0, flags); 2820 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 2821 hammer2_chain_setflush(*parentp); 2822 } 2823 } 2824 2825 /* 2826 * Helper function for deleting chains. 2827 * 2828 * The chain is removed from the live view (the RBTREE) as well as the parent's 2829 * blockmap. Both chain and its parent must be locked. 2830 * 2831 * parent may not be errored. chain can be errored. 2832 */ 2833 static void 2834 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 2835 hammer2_tid_t mtid, int flags) 2836 { 2837 hammer2_dev_t *hmp; 2838 2839 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2840 HAMMER2_CHAIN_FICTITIOUS)) == 0); 2841 KKASSERT(chain->parent == parent); 2842 hmp = chain->hmp; 2843 2844 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2845 /* 2846 * Chain is blockmapped, so there must be a parent. 2847 * Atomically remove the chain from the parent and remove 2848 * the blockmap entry. The parent must be set modified 2849 * to remove the blockmap entry. 2850 */ 2851 hammer2_blockref_t *base; 2852 int count; 2853 2854 KKASSERT(parent != NULL); 2855 KKASSERT(parent->error == 0); 2856 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 2857 hammer2_chain_modify(parent, mtid, 0, HAMMER2_MODIFY_OPTDATA); 2858 2859 /* 2860 * Calculate blockmap pointer 2861 */ 2862 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2863 hammer2_spin_ex(&parent->core.spin); 2864 2865 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2866 atomic_add_int(&parent->core.live_count, -1); 2867 ++parent->core.generation; 2868 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2869 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2870 --parent->core.chain_count; 2871 chain->parent = NULL; 2872 2873 switch(parent->bref.type) { 2874 case HAMMER2_BREF_TYPE_INODE: 2875 /* 2876 * Access the inode's block array. However, there 2877 * is no block array if the inode is flagged 2878 * DIRECTDATA. The DIRECTDATA case typicaly only 2879 * occurs when a hardlink has been shifted up the 2880 * tree and the original inode gets replaced with 2881 * an OBJTYPE_HARDLINK placeholding inode. 2882 */ 2883 if (parent->data && 2884 (parent->data->ipdata.meta.op_flags & 2885 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 2886 base = 2887 &parent->data->ipdata.u.blockset.blockref[0]; 2888 } else { 2889 base = NULL; 2890 } 2891 count = HAMMER2_SET_COUNT; 2892 break; 2893 case HAMMER2_BREF_TYPE_INDIRECT: 2894 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2895 if (parent->data) 2896 base = &parent->data->npdata[0]; 2897 else 2898 base = NULL; 2899 count = parent->bytes / sizeof(hammer2_blockref_t); 2900 break; 2901 case HAMMER2_BREF_TYPE_VOLUME: 2902 base = &parent->data->voldata. 2903 sroot_blockset.blockref[0]; 2904 count = HAMMER2_SET_COUNT; 2905 break; 2906 case HAMMER2_BREF_TYPE_FREEMAP: 2907 base = &parent->data->blkset.blockref[0]; 2908 count = HAMMER2_SET_COUNT; 2909 break; 2910 default: 2911 base = NULL; 2912 count = 0; 2913 panic("hammer2_flush_pass2: " 2914 "unrecognized blockref type: %d", 2915 parent->bref.type); 2916 } 2917 2918 /* 2919 * delete blockmapped chain from its parent. 2920 * 2921 * The parent is not affected by any statistics in chain 2922 * which are pending synchronization. That is, there is 2923 * nothing to undo in the parent since they have not yet 2924 * been incorporated into the parent. 2925 * 2926 * The parent is affected by statistics stored in inodes. 2927 * Those have already been synchronized, so they must be 2928 * undone. XXX split update possible w/delete in middle? 2929 */ 2930 if (base) { 2931 int cache_index = -1; 2932 hammer2_base_delete(parent, base, count, 2933 &cache_index, chain); 2934 } 2935 hammer2_spin_unex(&parent->core.spin); 2936 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 2937 /* 2938 * Chain is not blockmapped but a parent is present. 2939 * Atomically remove the chain from the parent. There is 2940 * no blockmap entry to remove. 2941 * 2942 * Because chain was associated with a parent but not 2943 * synchronized, the chain's *_count_up fields contain 2944 * inode adjustment statistics which must be undone. 2945 */ 2946 hammer2_spin_ex(&parent->core.spin); 2947 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2948 atomic_add_int(&parent->core.live_count, -1); 2949 ++parent->core.generation; 2950 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2951 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2952 --parent->core.chain_count; 2953 chain->parent = NULL; 2954 hammer2_spin_unex(&parent->core.spin); 2955 } else { 2956 /* 2957 * Chain is not blockmapped and has no parent. This 2958 * is a degenerate case. 2959 */ 2960 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2961 } 2962 } 2963 2964 /* 2965 * Create an indirect block that covers one or more of the elements in the 2966 * current parent. Either returns the existing parent with no locking or 2967 * ref changes or returns the new indirect block locked and referenced 2968 * and leaving the original parent lock/ref intact as well. 2969 * 2970 * If an error occurs, NULL is returned and *errorp is set to the error. 2971 * 2972 * The returned chain depends on where the specified key falls. 2973 * 2974 * The key/keybits for the indirect mode only needs to follow three rules: 2975 * 2976 * (1) That all elements underneath it fit within its key space and 2977 * 2978 * (2) That all elements outside it are outside its key space. 2979 * 2980 * (3) When creating the new indirect block any elements in the current 2981 * parent that fit within the new indirect block's keyspace must be 2982 * moved into the new indirect block. 2983 * 2984 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 2985 * keyspace the the current parent, but lookup/iteration rules will 2986 * ensure (and must ensure) that rule (2) for all parents leading up 2987 * to the nearest inode or the root volume header is adhered to. This 2988 * is accomplished by always recursing through matching keyspaces in 2989 * the hammer2_chain_lookup() and hammer2_chain_next() API. 2990 * 2991 * The current implementation calculates the current worst-case keyspace by 2992 * iterating the current parent and then divides it into two halves, choosing 2993 * whichever half has the most elements (not necessarily the half containing 2994 * the requested key). 2995 * 2996 * We can also opt to use the half with the least number of elements. This 2997 * causes lower-numbered keys (aka logical file offsets) to recurse through 2998 * fewer indirect blocks and higher-numbered keys to recurse through more. 2999 * This also has the risk of not moving enough elements to the new indirect 3000 * block and being forced to create several indirect blocks before the element 3001 * can be inserted. 3002 * 3003 * Must be called with an exclusively locked parent. 3004 */ 3005 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3006 hammer2_key_t *keyp, int keybits, 3007 hammer2_blockref_t *base, int count); 3008 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 3009 hammer2_key_t *keyp, int keybits, 3010 hammer2_blockref_t *base, int count); 3011 static 3012 hammer2_chain_t * 3013 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3014 hammer2_key_t create_key, int create_bits, 3015 hammer2_tid_t mtid, int for_type, int *errorp) 3016 { 3017 hammer2_dev_t *hmp; 3018 hammer2_blockref_t *base; 3019 hammer2_blockref_t *bref; 3020 hammer2_blockref_t bcopy; 3021 hammer2_chain_t *chain; 3022 hammer2_chain_t *ichain; 3023 hammer2_chain_t dummy; 3024 hammer2_key_t key = create_key; 3025 hammer2_key_t key_beg; 3026 hammer2_key_t key_end; 3027 hammer2_key_t key_next; 3028 int keybits = create_bits; 3029 int count; 3030 int nbytes; 3031 int cache_index; 3032 int loops; 3033 int reason; 3034 int generation; 3035 int maxloops = 300000; 3036 3037 /* 3038 * Calculate the base blockref pointer or NULL if the chain 3039 * is known to be empty. We need to calculate the array count 3040 * for RB lookups either way. 3041 */ 3042 hmp = parent->hmp; 3043 *errorp = 0; 3044 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3045 3046 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3047 base = hammer2_chain_base_and_count(parent, &count); 3048 3049 /* 3050 * dummy used in later chain allocation (no longer used for lookups). 3051 */ 3052 bzero(&dummy, sizeof(dummy)); 3053 3054 /* 3055 * When creating an indirect block for a freemap node or leaf 3056 * the key/keybits must be fitted to static radix levels because 3057 * particular radix levels use particular reserved blocks in the 3058 * related zone. 3059 * 3060 * This routine calculates the key/radix of the indirect block 3061 * we need to create, and whether it is on the high-side or the 3062 * low-side. 3063 */ 3064 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3065 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3066 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3067 base, count); 3068 } else { 3069 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 3070 base, count); 3071 } 3072 3073 /* 3074 * Normalize the key for the radix being represented, keeping the 3075 * high bits and throwing away the low bits. 3076 */ 3077 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3078 3079 /* 3080 * How big should our new indirect block be? It has to be at least 3081 * as large as its parent. 3082 * 3083 * The freemap uses a specific indirect block size. 3084 * 3085 * The first indirect block level down from an inode typically 3086 * uses LBUFSIZE (16384), else it uses PBUFSIZE (65536). 3087 */ 3088 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3089 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3090 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3091 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3092 nbytes = HAMMER2_IND_BYTES_MIN; 3093 } else { 3094 nbytes = HAMMER2_IND_BYTES_MAX; 3095 } 3096 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3097 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3098 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3099 nbytes = count * sizeof(hammer2_blockref_t); 3100 } 3101 3102 /* 3103 * Ok, create our new indirect block 3104 */ 3105 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3106 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3107 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3108 } else { 3109 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 3110 } 3111 dummy.bref.key = key; 3112 dummy.bref.keybits = keybits; 3113 dummy.bref.data_off = hammer2_getradix(nbytes); 3114 dummy.bref.methods = parent->bref.methods; 3115 3116 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 3117 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3118 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3119 /* ichain has one ref at this point */ 3120 3121 /* 3122 * We have to mark it modified to allocate its block, but use 3123 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3124 * it won't be acted upon by the flush code. 3125 */ 3126 hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 3127 3128 /* 3129 * Iterate the original parent and move the matching brefs into 3130 * the new indirect block. 3131 * 3132 * XXX handle flushes. 3133 */ 3134 key_beg = 0; 3135 key_end = HAMMER2_KEY_MAX; 3136 cache_index = 0; 3137 hammer2_spin_ex(&parent->core.spin); 3138 loops = 0; 3139 reason = 0; 3140 3141 for (;;) { 3142 /* 3143 * Parent may have been modified, relocating its block array. 3144 * Reload the base pointer. 3145 */ 3146 base = hammer2_chain_base_and_count(parent, &count); 3147 3148 if (++loops > 100000) { 3149 hammer2_spin_unex(&parent->core.spin); 3150 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3151 reason, parent, base, count, key_next); 3152 } 3153 3154 /* 3155 * NOTE: spinlock stays intact, returned chain (if not NULL) 3156 * is not referenced or locked which means that we 3157 * cannot safely check its flagged / deletion status 3158 * until we lock it. 3159 */ 3160 chain = hammer2_combined_find(parent, base, count, 3161 &cache_index, &key_next, 3162 key_beg, key_end, 3163 &bref); 3164 generation = parent->core.generation; 3165 if (bref == NULL) 3166 break; 3167 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3168 3169 /* 3170 * Skip keys that are not within the key/radix of the new 3171 * indirect block. They stay in the parent. 3172 */ 3173 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3174 (key ^ bref->key)) != 0) { 3175 goto next_key_spinlocked; 3176 } 3177 3178 /* 3179 * Load the new indirect block by acquiring the related 3180 * chains (potentially from media as it might not be 3181 * in-memory). Then move it to the new parent (ichain) 3182 * via DELETE-DUPLICATE. 3183 * 3184 * chain is referenced but not locked. We must lock the 3185 * chain to obtain definitive DUPLICATED/DELETED state 3186 */ 3187 if (chain) { 3188 /* 3189 * Use chain already present in the RBTREE 3190 */ 3191 hammer2_chain_ref(chain); 3192 hammer2_spin_unex(&parent->core.spin); 3193 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3194 } else { 3195 /* 3196 * Get chain for blockref element. _get returns NULL 3197 * on insertion race. 3198 */ 3199 bcopy = *bref; 3200 hammer2_spin_unex(&parent->core.spin); 3201 chain = hammer2_chain_get(parent, generation, &bcopy); 3202 if (chain == NULL) { 3203 reason = 1; 3204 hammer2_spin_ex(&parent->core.spin); 3205 continue; 3206 } 3207 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3208 kprintf("REASON 2\n"); 3209 reason = 2; 3210 hammer2_chain_drop(chain); 3211 hammer2_spin_ex(&parent->core.spin); 3212 continue; 3213 } 3214 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3215 } 3216 3217 /* 3218 * This is always live so if the chain has been deleted 3219 * we raced someone and we have to retry. 3220 * 3221 * NOTE: Lookups can race delete-duplicate because 3222 * delete-duplicate does not lock the parent's core 3223 * (they just use the spinlock on the core). We must 3224 * check for races by comparing the DUPLICATED flag before 3225 * releasing the spinlock with the flag after locking the 3226 * chain. 3227 * 3228 * (note reversed logic for this one) 3229 */ 3230 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3231 hammer2_chain_unlock(chain); 3232 hammer2_chain_drop(chain); 3233 goto next_key; 3234 } 3235 3236 /* 3237 * Shift the chain to the indirect block. 3238 * 3239 * WARNING! No reason for us to load chain data, pass NOSTATS 3240 * to prevent delete/insert from trying to access 3241 * inode stats (and thus asserting if there is no 3242 * chain->data loaded). 3243 * 3244 * WARNING! The (parent, chain) deletion may modify the parent 3245 * and invalidate the base pointer. 3246 */ 3247 hammer2_chain_delete(parent, chain, mtid, 0); 3248 hammer2_chain_rename(NULL, &ichain, chain, mtid, 0); 3249 hammer2_chain_unlock(chain); 3250 hammer2_chain_drop(chain); 3251 KKASSERT(parent->refs > 0); 3252 chain = NULL; 3253 base = NULL; /* safety */ 3254 next_key: 3255 hammer2_spin_ex(&parent->core.spin); 3256 next_key_spinlocked: 3257 if (--maxloops == 0) 3258 panic("hammer2_chain_create_indirect: maxloops"); 3259 reason = 4; 3260 if (key_next == 0 || key_next > key_end) 3261 break; 3262 key_beg = key_next; 3263 /* loop */ 3264 } 3265 hammer2_spin_unex(&parent->core.spin); 3266 3267 /* 3268 * Insert the new indirect block into the parent now that we've 3269 * cleared out some entries in the parent. We calculated a good 3270 * insertion index in the loop above (ichain->index). 3271 * 3272 * We don't have to set UPDATE here because we mark ichain 3273 * modified down below (so the normal modified -> flush -> set-moved 3274 * sequence applies). 3275 * 3276 * The insertion shouldn't race as this is a completely new block 3277 * and the parent is locked. 3278 */ 3279 base = NULL; /* safety, parent modify may change address */ 3280 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3281 hammer2_chain_insert(parent, ichain, 3282 HAMMER2_CHAIN_INSERT_SPIN | 3283 HAMMER2_CHAIN_INSERT_LIVE, 3284 0); 3285 3286 /* 3287 * Make sure flushes propogate after our manual insertion. 3288 */ 3289 hammer2_chain_setflush(ichain); 3290 hammer2_chain_setflush(parent); 3291 3292 /* 3293 * Figure out what to return. 3294 */ 3295 if (~(((hammer2_key_t)1 << keybits) - 1) & 3296 (create_key ^ key)) { 3297 /* 3298 * Key being created is outside the key range, 3299 * return the original parent. 3300 */ 3301 hammer2_chain_unlock(ichain); 3302 hammer2_chain_drop(ichain); 3303 } else { 3304 /* 3305 * Otherwise its in the range, return the new parent. 3306 * (leave both the new and old parent locked). 3307 */ 3308 parent = ichain; 3309 } 3310 3311 return(parent); 3312 } 3313 3314 /* 3315 * Calculate the keybits and highside/lowside of the freemap node the 3316 * caller is creating. 3317 * 3318 * This routine will specify the next higher-level freemap key/radix 3319 * representing the lowest-ordered set. By doing so, eventually all 3320 * low-ordered sets will be moved one level down. 3321 * 3322 * We have to be careful here because the freemap reserves a limited 3323 * number of blocks for a limited number of levels. So we can't just 3324 * push indiscriminately. 3325 */ 3326 int 3327 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3328 int keybits, hammer2_blockref_t *base, int count) 3329 { 3330 hammer2_chain_t *chain; 3331 hammer2_blockref_t *bref; 3332 hammer2_key_t key; 3333 hammer2_key_t key_beg; 3334 hammer2_key_t key_end; 3335 hammer2_key_t key_next; 3336 int cache_index; 3337 int locount; 3338 int hicount; 3339 int maxloops = 300000; 3340 3341 key = *keyp; 3342 locount = 0; 3343 hicount = 0; 3344 keybits = 64; 3345 3346 /* 3347 * Calculate the range of keys in the array being careful to skip 3348 * slots which are overridden with a deletion. 3349 */ 3350 key_beg = 0; 3351 key_end = HAMMER2_KEY_MAX; 3352 cache_index = 0; 3353 hammer2_spin_ex(&parent->core.spin); 3354 3355 for (;;) { 3356 if (--maxloops == 0) { 3357 panic("indkey_freemap shit %p %p:%d\n", 3358 parent, base, count); 3359 } 3360 chain = hammer2_combined_find(parent, base, count, 3361 &cache_index, &key_next, 3362 key_beg, key_end, 3363 &bref); 3364 3365 /* 3366 * Exhausted search 3367 */ 3368 if (bref == NULL) 3369 break; 3370 3371 /* 3372 * Skip deleted chains. 3373 */ 3374 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3375 if (key_next == 0 || key_next > key_end) 3376 break; 3377 key_beg = key_next; 3378 continue; 3379 } 3380 3381 /* 3382 * Use the full live (not deleted) element for the scan 3383 * iteration. HAMMER2 does not allow partial replacements. 3384 * 3385 * XXX should be built into hammer2_combined_find(). 3386 */ 3387 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3388 3389 if (keybits > bref->keybits) { 3390 key = bref->key; 3391 keybits = bref->keybits; 3392 } else if (keybits == bref->keybits && bref->key < key) { 3393 key = bref->key; 3394 } 3395 if (key_next == 0) 3396 break; 3397 key_beg = key_next; 3398 } 3399 hammer2_spin_unex(&parent->core.spin); 3400 3401 /* 3402 * Return the keybits for a higher-level FREEMAP_NODE covering 3403 * this node. 3404 */ 3405 switch(keybits) { 3406 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3407 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3408 break; 3409 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3410 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3411 break; 3412 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3413 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3414 break; 3415 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3416 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3417 break; 3418 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3419 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 3420 break; 3421 case HAMMER2_FREEMAP_LEVEL5_RADIX: 3422 panic("hammer2_chain_indkey_freemap: level too high"); 3423 break; 3424 default: 3425 panic("hammer2_chain_indkey_freemap: bad radix"); 3426 break; 3427 } 3428 *keyp = key; 3429 3430 return (keybits); 3431 } 3432 3433 /* 3434 * Calculate the keybits and highside/lowside of the indirect block the 3435 * caller is creating. 3436 */ 3437 static int 3438 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3439 int keybits, hammer2_blockref_t *base, int count) 3440 { 3441 hammer2_blockref_t *bref; 3442 hammer2_chain_t *chain; 3443 hammer2_key_t key_beg; 3444 hammer2_key_t key_end; 3445 hammer2_key_t key_next; 3446 hammer2_key_t key; 3447 int nkeybits; 3448 int locount; 3449 int hicount; 3450 int cache_index; 3451 int maxloops = 300000; 3452 3453 key = *keyp; 3454 locount = 0; 3455 hicount = 0; 3456 3457 /* 3458 * Calculate the range of keys in the array being careful to skip 3459 * slots which are overridden with a deletion. Once the scan 3460 * completes we will cut the key range in half and shift half the 3461 * range into the new indirect block. 3462 */ 3463 key_beg = 0; 3464 key_end = HAMMER2_KEY_MAX; 3465 cache_index = 0; 3466 hammer2_spin_ex(&parent->core.spin); 3467 3468 for (;;) { 3469 if (--maxloops == 0) { 3470 panic("indkey_freemap shit %p %p:%d\n", 3471 parent, base, count); 3472 } 3473 chain = hammer2_combined_find(parent, base, count, 3474 &cache_index, &key_next, 3475 key_beg, key_end, 3476 &bref); 3477 3478 /* 3479 * Exhausted search 3480 */ 3481 if (bref == NULL) 3482 break; 3483 3484 /* 3485 * NOTE: No need to check DUPLICATED here because we do 3486 * not release the spinlock. 3487 */ 3488 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3489 if (key_next == 0 || key_next > key_end) 3490 break; 3491 key_beg = key_next; 3492 continue; 3493 } 3494 3495 /* 3496 * Use the full live (not deleted) element for the scan 3497 * iteration. HAMMER2 does not allow partial replacements. 3498 * 3499 * XXX should be built into hammer2_combined_find(). 3500 */ 3501 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3502 3503 /* 3504 * Expand our calculated key range (key, keybits) to fit 3505 * the scanned key. nkeybits represents the full range 3506 * that we will later cut in half (two halves @ nkeybits - 1). 3507 */ 3508 nkeybits = keybits; 3509 if (nkeybits < bref->keybits) { 3510 if (bref->keybits > 64) { 3511 kprintf("bad bref chain %p bref %p\n", 3512 chain, bref); 3513 Debugger("fubar"); 3514 } 3515 nkeybits = bref->keybits; 3516 } 3517 while (nkeybits < 64 && 3518 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3519 (key ^ bref->key)) != 0) { 3520 ++nkeybits; 3521 } 3522 3523 /* 3524 * If the new key range is larger we have to determine 3525 * which side of the new key range the existing keys fall 3526 * under by checking the high bit, then collapsing the 3527 * locount into the hicount or vise-versa. 3528 */ 3529 if (keybits != nkeybits) { 3530 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3531 hicount += locount; 3532 locount = 0; 3533 } else { 3534 locount += hicount; 3535 hicount = 0; 3536 } 3537 keybits = nkeybits; 3538 } 3539 3540 /* 3541 * The newly scanned key will be in the lower half or the 3542 * upper half of the (new) key range. 3543 */ 3544 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3545 ++hicount; 3546 else 3547 ++locount; 3548 3549 if (key_next == 0) 3550 break; 3551 key_beg = key_next; 3552 } 3553 hammer2_spin_unex(&parent->core.spin); 3554 bref = NULL; /* now invalid (safety) */ 3555 3556 /* 3557 * Adjust keybits to represent half of the full range calculated 3558 * above (radix 63 max) 3559 */ 3560 --keybits; 3561 3562 /* 3563 * Select whichever half contains the most elements. Theoretically 3564 * we can select either side as long as it contains at least one 3565 * element (in order to ensure that a free slot is present to hold 3566 * the indirect block). 3567 */ 3568 if (hammer2_indirect_optimize) { 3569 /* 3570 * Insert node for least number of keys, this will arrange 3571 * the first few blocks of a large file or the first few 3572 * inodes in a directory with fewer indirect blocks when 3573 * created linearly. 3574 */ 3575 if (hicount < locount && hicount != 0) 3576 key |= (hammer2_key_t)1 << keybits; 3577 else 3578 key &= ~(hammer2_key_t)1 << keybits; 3579 } else { 3580 /* 3581 * Insert node for most number of keys, best for heavily 3582 * fragmented files. 3583 */ 3584 if (hicount > locount) 3585 key |= (hammer2_key_t)1 << keybits; 3586 else 3587 key &= ~(hammer2_key_t)1 << keybits; 3588 } 3589 *keyp = key; 3590 3591 return (keybits); 3592 } 3593 3594 /* 3595 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 3596 * it exists. 3597 * 3598 * Both parent and chain must be locked exclusively. 3599 * 3600 * This function will modify the parent if the blockref requires removal 3601 * from the parent's block table. 3602 * 3603 * This function is NOT recursive. Any entity already pushed into the 3604 * chain (such as an inode) may still need visibility into its contents, 3605 * as well as the ability to read and modify the contents. For example, 3606 * for an unlinked file which is still open. 3607 */ 3608 void 3609 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 3610 hammer2_tid_t mtid, int flags) 3611 { 3612 KKASSERT(hammer2_mtx_owned(&chain->lock)); 3613 3614 /* 3615 * Nothing to do if already marked. 3616 * 3617 * We need the spinlock on the core whos RBTREE contains chain 3618 * to protect against races. 3619 */ 3620 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 3621 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 3622 chain->parent == parent); 3623 _hammer2_chain_delete_helper(parent, chain, mtid, flags); 3624 } 3625 3626 /* 3627 * Permanent deletions mark the chain as destroyed. H 3628 */ 3629 if (flags & HAMMER2_DELETE_PERMANENT) { 3630 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 3631 } else { 3632 /* XXX might not be needed */ 3633 hammer2_chain_setflush(chain); 3634 } 3635 } 3636 3637 /* 3638 * Returns the index of the nearest element in the blockref array >= elm. 3639 * Returns (count) if no element could be found. 3640 * 3641 * Sets *key_nextp to the next key for loop purposes but does not modify 3642 * it if the next key would be higher than the current value of *key_nextp. 3643 * Note that *key_nexp can overflow to 0, which should be tested by the 3644 * caller. 3645 * 3646 * (*cache_indexp) is a heuristic and can be any value without effecting 3647 * the result. 3648 * 3649 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3650 * held through the operation. 3651 */ 3652 static int 3653 hammer2_base_find(hammer2_chain_t *parent, 3654 hammer2_blockref_t *base, int count, 3655 int *cache_indexp, hammer2_key_t *key_nextp, 3656 hammer2_key_t key_beg, hammer2_key_t key_end) 3657 { 3658 hammer2_blockref_t *scan; 3659 hammer2_key_t scan_end; 3660 int i; 3661 int limit; 3662 3663 /* 3664 * Require the live chain's already have their core's counted 3665 * so we can optimize operations. 3666 */ 3667 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 3668 3669 /* 3670 * Degenerate case 3671 */ 3672 if (count == 0 || base == NULL) 3673 return(count); 3674 3675 /* 3676 * Sequential optimization using *cache_indexp. This is the most 3677 * likely scenario. 3678 * 3679 * We can avoid trailing empty entries on live chains, otherwise 3680 * we might have to check the whole block array. 3681 */ 3682 i = *cache_indexp; 3683 cpu_ccfence(); 3684 limit = parent->core.live_zero; 3685 if (i >= limit) 3686 i = limit - 1; 3687 if (i < 0) 3688 i = 0; 3689 KKASSERT(i < count); 3690 3691 /* 3692 * Search backwards 3693 */ 3694 scan = &base[i]; 3695 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 3696 --scan; 3697 --i; 3698 } 3699 *cache_indexp = i; 3700 3701 /* 3702 * Search forwards, stop when we find a scan element which 3703 * encloses the key or until we know that there are no further 3704 * elements. 3705 */ 3706 while (i < count) { 3707 if (scan->type != 0) { 3708 scan_end = scan->key + 3709 ((hammer2_key_t)1 << scan->keybits) - 1; 3710 if (scan->key > key_beg || scan_end >= key_beg) 3711 break; 3712 } 3713 if (i >= limit) 3714 return (count); 3715 ++scan; 3716 ++i; 3717 } 3718 if (i != count) { 3719 *cache_indexp = i; 3720 if (i >= limit) { 3721 i = count; 3722 } else { 3723 scan_end = scan->key + 3724 ((hammer2_key_t)1 << scan->keybits); 3725 if (scan_end && (*key_nextp > scan_end || 3726 *key_nextp == 0)) { 3727 *key_nextp = scan_end; 3728 } 3729 } 3730 } 3731 return (i); 3732 } 3733 3734 /* 3735 * Do a combined search and return the next match either from the blockref 3736 * array or from the in-memory chain. Sets *bresp to the returned bref in 3737 * both cases, or sets it to NULL if the search exhausted. Only returns 3738 * a non-NULL chain if the search matched from the in-memory chain. 3739 * 3740 * When no in-memory chain has been found and a non-NULL bref is returned 3741 * in *bresp. 3742 * 3743 * 3744 * The returned chain is not locked or referenced. Use the returned bref 3745 * to determine if the search exhausted or not. Iterate if the base find 3746 * is chosen but matches a deleted chain. 3747 * 3748 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3749 * held through the operation. 3750 */ 3751 static hammer2_chain_t * 3752 hammer2_combined_find(hammer2_chain_t *parent, 3753 hammer2_blockref_t *base, int count, 3754 int *cache_indexp, hammer2_key_t *key_nextp, 3755 hammer2_key_t key_beg, hammer2_key_t key_end, 3756 hammer2_blockref_t **bresp) 3757 { 3758 hammer2_blockref_t *bref; 3759 hammer2_chain_t *chain; 3760 int i; 3761 3762 /* 3763 * Lookup in block array and in rbtree. 3764 */ 3765 *key_nextp = key_end + 1; 3766 i = hammer2_base_find(parent, base, count, cache_indexp, 3767 key_nextp, key_beg, key_end); 3768 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 3769 3770 /* 3771 * Neither matched 3772 */ 3773 if (i == count && chain == NULL) { 3774 *bresp = NULL; 3775 return(NULL); 3776 } 3777 3778 /* 3779 * Only chain matched. 3780 */ 3781 if (i == count) { 3782 bref = &chain->bref; 3783 goto found; 3784 } 3785 3786 /* 3787 * Only blockref matched. 3788 */ 3789 if (chain == NULL) { 3790 bref = &base[i]; 3791 goto found; 3792 } 3793 3794 /* 3795 * Both in-memory and blockref matched, select the nearer element. 3796 * 3797 * If both are flush with the left-hand side or both are the 3798 * same distance away, select the chain. In this situation the 3799 * chain must have been loaded from the matching blockmap. 3800 */ 3801 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 3802 chain->bref.key == base[i].key) { 3803 KKASSERT(chain->bref.key == base[i].key); 3804 bref = &chain->bref; 3805 goto found; 3806 } 3807 3808 /* 3809 * Select the nearer key 3810 */ 3811 if (chain->bref.key < base[i].key) { 3812 bref = &chain->bref; 3813 } else { 3814 bref = &base[i]; 3815 chain = NULL; 3816 } 3817 3818 /* 3819 * If the bref is out of bounds we've exhausted our search. 3820 */ 3821 found: 3822 if (bref->key > key_end) { 3823 *bresp = NULL; 3824 chain = NULL; 3825 } else { 3826 *bresp = bref; 3827 } 3828 return(chain); 3829 } 3830 3831 /* 3832 * Locate the specified block array element and delete it. The element 3833 * must exist. 3834 * 3835 * The spin lock on the related chain must be held. 3836 * 3837 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3838 * need to be adjusted when we commit the media change. 3839 */ 3840 void 3841 hammer2_base_delete(hammer2_chain_t *parent, 3842 hammer2_blockref_t *base, int count, 3843 int *cache_indexp, hammer2_chain_t *chain) 3844 { 3845 hammer2_blockref_t *elm = &chain->bref; 3846 hammer2_key_t key_next; 3847 int i; 3848 3849 /* 3850 * Delete element. Expect the element to exist. 3851 * 3852 * XXX see caller, flush code not yet sophisticated enough to prevent 3853 * re-flushed in some cases. 3854 */ 3855 key_next = 0; /* max range */ 3856 i = hammer2_base_find(parent, base, count, cache_indexp, 3857 &key_next, elm->key, elm->key); 3858 if (i == count || base[i].type == 0 || 3859 base[i].key != elm->key || 3860 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 3861 base[i].keybits != elm->keybits)) { 3862 hammer2_spin_unex(&parent->core.spin); 3863 panic("delete base %p element not found at %d/%d elm %p\n", 3864 base, i, count, elm); 3865 return; 3866 } 3867 3868 /* 3869 * Update stats and zero the entry 3870 */ 3871 parent->bref.data_count -= base[i].data_count; 3872 parent->bref.data_count -= (hammer2_off_t)1 << 3873 (int)(base[i].data_off & HAMMER2_OFF_MASK_RADIX); 3874 parent->bref.inode_count -= base[i].inode_count; 3875 if (base[i].type == HAMMER2_BREF_TYPE_INODE) 3876 parent->bref.inode_count -= 1; 3877 3878 bzero(&base[i], sizeof(*base)); 3879 3880 /* 3881 * We can only optimize parent->core.live_zero for live chains. 3882 */ 3883 if (parent->core.live_zero == i + 1) { 3884 while (--i >= 0 && base[i].type == 0) 3885 ; 3886 parent->core.live_zero = i + 1; 3887 } 3888 3889 /* 3890 * Clear appropriate blockmap flags in chain. 3891 */ 3892 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 3893 HAMMER2_CHAIN_BMAPUPD); 3894 } 3895 3896 /* 3897 * Insert the specified element. The block array must not already have the 3898 * element and must have space available for the insertion. 3899 * 3900 * The spin lock on the related chain must be held. 3901 * 3902 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3903 * need to be adjusted when we commit the media change. 3904 */ 3905 void 3906 hammer2_base_insert(hammer2_chain_t *parent, 3907 hammer2_blockref_t *base, int count, 3908 int *cache_indexp, hammer2_chain_t *chain) 3909 { 3910 hammer2_blockref_t *elm = &chain->bref; 3911 hammer2_key_t key_next; 3912 hammer2_key_t xkey; 3913 int i; 3914 int j; 3915 int k; 3916 int l; 3917 int u = 1; 3918 3919 /* 3920 * Insert new element. Expect the element to not already exist 3921 * unless we are replacing it. 3922 * 3923 * XXX see caller, flush code not yet sophisticated enough to prevent 3924 * re-flushed in some cases. 3925 */ 3926 key_next = 0; /* max range */ 3927 i = hammer2_base_find(parent, base, count, cache_indexp, 3928 &key_next, elm->key, elm->key); 3929 3930 /* 3931 * Shortcut fill optimization, typical ordered insertion(s) may not 3932 * require a search. 3933 */ 3934 KKASSERT(i >= 0 && i <= count); 3935 3936 /* 3937 * Set appropriate blockmap flags in chain. 3938 */ 3939 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 3940 3941 /* 3942 * Update stats and zero the entry 3943 */ 3944 parent->bref.data_count += elm->data_count; 3945 parent->bref.data_count += (hammer2_off_t)1 << 3946 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 3947 parent->bref.inode_count += elm->inode_count; 3948 if (elm->type == HAMMER2_BREF_TYPE_INODE) 3949 parent->bref.inode_count += 1; 3950 3951 3952 /* 3953 * We can only optimize parent->core.live_zero for live chains. 3954 */ 3955 if (i == count && parent->core.live_zero < count) { 3956 i = parent->core.live_zero++; 3957 base[i] = *elm; 3958 return; 3959 } 3960 3961 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 3962 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 3963 hammer2_spin_unex(&parent->core.spin); 3964 panic("insert base %p overlapping elements at %d elm %p\n", 3965 base, i, elm); 3966 } 3967 3968 /* 3969 * Try to find an empty slot before or after. 3970 */ 3971 j = i; 3972 k = i; 3973 while (j > 0 || k < count) { 3974 --j; 3975 if (j >= 0 && base[j].type == 0) { 3976 if (j == i - 1) { 3977 base[j] = *elm; 3978 } else { 3979 bcopy(&base[j+1], &base[j], 3980 (i - j - 1) * sizeof(*base)); 3981 base[i - 1] = *elm; 3982 } 3983 goto validate; 3984 } 3985 ++k; 3986 if (k < count && base[k].type == 0) { 3987 bcopy(&base[i], &base[i+1], 3988 (k - i) * sizeof(hammer2_blockref_t)); 3989 base[i] = *elm; 3990 3991 /* 3992 * We can only update parent->core.live_zero for live 3993 * chains. 3994 */ 3995 if (parent->core.live_zero <= k) 3996 parent->core.live_zero = k + 1; 3997 u = 2; 3998 goto validate; 3999 } 4000 } 4001 panic("hammer2_base_insert: no room!"); 4002 4003 /* 4004 * Debugging 4005 */ 4006 validate: 4007 key_next = 0; 4008 for (l = 0; l < count; ++l) { 4009 if (base[l].type) { 4010 key_next = base[l].key + 4011 ((hammer2_key_t)1 << base[l].keybits) - 1; 4012 break; 4013 } 4014 } 4015 while (++l < count) { 4016 if (base[l].type) { 4017 if (base[l].key <= key_next) 4018 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 4019 key_next = base[l].key + 4020 ((hammer2_key_t)1 << base[l].keybits) - 1; 4021 4022 } 4023 } 4024 4025 } 4026 4027 #if 0 4028 4029 /* 4030 * Sort the blockref array for the chain. Used by the flush code to 4031 * sort the blockref[] array. 4032 * 4033 * The chain must be exclusively locked AND spin-locked. 4034 */ 4035 typedef hammer2_blockref_t *hammer2_blockref_p; 4036 4037 static 4038 int 4039 hammer2_base_sort_callback(const void *v1, const void *v2) 4040 { 4041 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 4042 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 4043 4044 /* 4045 * Make sure empty elements are placed at the end of the array 4046 */ 4047 if (bref1->type == 0) { 4048 if (bref2->type == 0) 4049 return(0); 4050 return(1); 4051 } else if (bref2->type == 0) { 4052 return(-1); 4053 } 4054 4055 /* 4056 * Sort by key 4057 */ 4058 if (bref1->key < bref2->key) 4059 return(-1); 4060 if (bref1->key > bref2->key) 4061 return(1); 4062 return(0); 4063 } 4064 4065 void 4066 hammer2_base_sort(hammer2_chain_t *chain) 4067 { 4068 hammer2_blockref_t *base; 4069 int count; 4070 4071 switch(chain->bref.type) { 4072 case HAMMER2_BREF_TYPE_INODE: 4073 /* 4074 * Special shortcut for embedded data returns the inode 4075 * itself. Callers must detect this condition and access 4076 * the embedded data (the strategy code does this for us). 4077 * 4078 * This is only applicable to regular files and softlinks. 4079 */ 4080 if (chain->data->ipdata.meta.op_flags & 4081 HAMMER2_OPFLAG_DIRECTDATA) { 4082 return; 4083 } 4084 base = &chain->data->ipdata.u.blockset.blockref[0]; 4085 count = HAMMER2_SET_COUNT; 4086 break; 4087 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4088 case HAMMER2_BREF_TYPE_INDIRECT: 4089 /* 4090 * Optimize indirect blocks in the INITIAL state to avoid 4091 * I/O. 4092 */ 4093 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4094 base = &chain->data->npdata[0]; 4095 count = chain->bytes / sizeof(hammer2_blockref_t); 4096 break; 4097 case HAMMER2_BREF_TYPE_VOLUME: 4098 base = &chain->data->voldata.sroot_blockset.blockref[0]; 4099 count = HAMMER2_SET_COUNT; 4100 break; 4101 case HAMMER2_BREF_TYPE_FREEMAP: 4102 base = &chain->data->blkset.blockref[0]; 4103 count = HAMMER2_SET_COUNT; 4104 break; 4105 default: 4106 kprintf("hammer2_chain_lookup: unrecognized " 4107 "blockref(A) type: %d", 4108 chain->bref.type); 4109 while (1) 4110 tsleep(&base, 0, "dead", 0); 4111 panic("hammer2_chain_lookup: unrecognized " 4112 "blockref(A) type: %d", 4113 chain->bref.type); 4114 base = NULL; /* safety */ 4115 count = 0; /* safety */ 4116 } 4117 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 4118 } 4119 4120 #endif 4121 4122 /* 4123 * Chain memory management 4124 */ 4125 void 4126 hammer2_chain_wait(hammer2_chain_t *chain) 4127 { 4128 tsleep(chain, 0, "chnflw", 1); 4129 } 4130 4131 const hammer2_media_data_t * 4132 hammer2_chain_rdata(hammer2_chain_t *chain) 4133 { 4134 KKASSERT(chain->data != NULL); 4135 return (chain->data); 4136 } 4137 4138 hammer2_media_data_t * 4139 hammer2_chain_wdata(hammer2_chain_t *chain) 4140 { 4141 KKASSERT(chain->data != NULL); 4142 return (chain->data); 4143 } 4144 4145 /* 4146 * Set the check data for a chain. This can be a heavy-weight operation 4147 * and typically only runs on-flush. For file data check data is calculated 4148 * when the logical buffers are flushed. 4149 */ 4150 void 4151 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 4152 { 4153 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 4154 4155 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4156 case HAMMER2_CHECK_NONE: 4157 break; 4158 case HAMMER2_CHECK_DISABLED: 4159 break; 4160 case HAMMER2_CHECK_ISCSI32: 4161 chain->bref.check.iscsi32.value = 4162 hammer2_icrc32(bdata, chain->bytes); 4163 break; 4164 case HAMMER2_CHECK_CRC64: 4165 chain->bref.check.crc64.value = 0; 4166 /* XXX */ 4167 break; 4168 case HAMMER2_CHECK_SHA192: 4169 { 4170 SHA256_CTX hash_ctx; 4171 union { 4172 uint8_t digest[SHA256_DIGEST_LENGTH]; 4173 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4174 } u; 4175 4176 SHA256_Init(&hash_ctx); 4177 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4178 SHA256_Final(u.digest, &hash_ctx); 4179 u.digest64[2] ^= u.digest64[3]; 4180 bcopy(u.digest, 4181 chain->bref.check.sha192.data, 4182 sizeof(chain->bref.check.sha192.data)); 4183 } 4184 break; 4185 case HAMMER2_CHECK_FREEMAP: 4186 chain->bref.check.freemap.icrc32 = 4187 hammer2_icrc32(bdata, chain->bytes); 4188 break; 4189 default: 4190 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4191 chain->bref.methods); 4192 break; 4193 } 4194 } 4195 4196 int 4197 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 4198 { 4199 int r; 4200 4201 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 4202 return 1; 4203 4204 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4205 case HAMMER2_CHECK_NONE: 4206 r = 1; 4207 break; 4208 case HAMMER2_CHECK_DISABLED: 4209 r = 1; 4210 break; 4211 case HAMMER2_CHECK_ISCSI32: 4212 r = (chain->bref.check.iscsi32.value == 4213 hammer2_icrc32(bdata, chain->bytes)); 4214 break; 4215 case HAMMER2_CHECK_CRC64: 4216 r = (chain->bref.check.crc64.value == 0); 4217 /* XXX */ 4218 break; 4219 case HAMMER2_CHECK_SHA192: 4220 { 4221 SHA256_CTX hash_ctx; 4222 union { 4223 uint8_t digest[SHA256_DIGEST_LENGTH]; 4224 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4225 } u; 4226 4227 SHA256_Init(&hash_ctx); 4228 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4229 SHA256_Final(u.digest, &hash_ctx); 4230 u.digest64[2] ^= u.digest64[3]; 4231 if (bcmp(u.digest, 4232 chain->bref.check.sha192.data, 4233 sizeof(chain->bref.check.sha192.data)) == 0) { 4234 r = 1; 4235 } else { 4236 r = 0; 4237 } 4238 } 4239 break; 4240 case HAMMER2_CHECK_FREEMAP: 4241 r = (chain->bref.check.freemap.icrc32 == 4242 hammer2_icrc32(bdata, chain->bytes)); 4243 if (r == 0) { 4244 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 4245 chain->bref.check.freemap.icrc32, 4246 hammer2_icrc32(bdata, chain->bytes), chain->bytes); 4247 if (chain->dio) 4248 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 4249 chain->dio, chain->dio->bp->b_loffset, chain->dio->bp->b_bufsize, bdata, chain->dio->bp->b_data); 4250 } 4251 4252 break; 4253 default: 4254 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4255 chain->bref.methods); 4256 r = 1; 4257 break; 4258 } 4259 return r; 4260 } 4261 4262 /* 4263 * The caller presents a shared-locked (parent, chain) where the chain 4264 * is of type HAMMER2_OBJTYPE_HARDLINK. The caller must hold the ip 4265 * structure representing the inode locked to prevent 4266 * consolidation/deconsolidation races. 4267 * 4268 * The flags passed in are LOOKUP flags, not RESOLVE flags. Only 4269 * HAMMER2_LOOKUP_SHARED is supported. 4270 * 4271 * We locate the hardlink in the current or a common parent directory. 4272 * 4273 * If we are unable to locate the hardlink, EIO is returned and 4274 * (*chainp) is unlocked and dropped. 4275 */ 4276 int 4277 hammer2_chain_hardlink_find(hammer2_inode_t *dip, 4278 hammer2_chain_t **parentp, 4279 hammer2_chain_t **chainp, 4280 int flags) 4281 { 4282 hammer2_chain_t *parent; 4283 hammer2_chain_t *rchain; 4284 hammer2_key_t key_dummy; 4285 hammer2_key_t lhc; 4286 int cache_index = -1; 4287 int resolve_flags; 4288 4289 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 4290 HAMMER2_RESOLVE_SHARED : 0; 4291 4292 /* 4293 * Obtain the key for the hardlink from *chainp. 4294 */ 4295 rchain = *chainp; 4296 lhc = rchain->data->ipdata.meta.inum; 4297 hammer2_chain_unlock(rchain); 4298 hammer2_chain_drop(rchain); 4299 rchain = NULL; 4300 4301 for (;;) { 4302 int again; 4303 4304 rchain = hammer2_chain_lookup(parentp, &key_dummy, 4305 lhc, lhc, 4306 &cache_index, flags); 4307 if (rchain) 4308 break; 4309 4310 /* 4311 * Iterate parents, handle parent rename races by retrying 4312 * the operation. 4313 */ 4314 again = 0; 4315 for (;;) { 4316 parent = *parentp; 4317 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4318 if (again == 1) 4319 break; 4320 ++again; 4321 } 4322 if (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) 4323 goto done; 4324 for (;;) { 4325 if (parent->parent == NULL) 4326 goto done; 4327 parent = parent->parent; 4328 hammer2_chain_ref(parent); 4329 hammer2_chain_unlock(*parentp); 4330 hammer2_chain_lock(parent, 4331 HAMMER2_RESOLVE_ALWAYS | 4332 resolve_flags); 4333 if ((*parentp)->parent == parent) { 4334 hammer2_chain_drop(*parentp); 4335 *parentp = parent; 4336 break; 4337 } 4338 hammer2_chain_unlock(parent); 4339 hammer2_chain_drop(parent); 4340 hammer2_chain_lock(*parentp, 4341 HAMMER2_RESOLVE_ALWAYS | 4342 resolve_flags); 4343 parent = *parentp; 4344 } 4345 } 4346 } 4347 done: 4348 4349 *chainp = rchain; 4350 return (rchain ? 0 : EINVAL); 4351 } 4352 4353 /* 4354 * Used by the bulkscan code to snapshot the synchronized storage for 4355 * a volume, allowing it to be scanned concurrently against normal 4356 * operation. 4357 */ 4358 hammer2_chain_t * 4359 hammer2_chain_bulksnap(hammer2_chain_t *chain) 4360 { 4361 hammer2_chain_t *copy; 4362 4363 copy = hammer2_chain_alloc(chain->hmp, chain->pmp, &chain->bref); 4364 switch(chain->bref.type) { 4365 case HAMMER2_BREF_TYPE_VOLUME: 4366 copy->data = kmalloc(sizeof(copy->data->voldata), 4367 chain->hmp->mchain, 4368 M_WAITOK | M_ZERO); 4369 hammer2_spin_ex(&chain->core.spin); 4370 copy->data->voldata = chain->data->voldata; 4371 hammer2_spin_unex(&chain->core.spin); 4372 break; 4373 case HAMMER2_BREF_TYPE_FREEMAP: 4374 copy->data = kmalloc(sizeof(hammer2_blockset_t), 4375 chain->hmp->mchain, 4376 M_WAITOK | M_ZERO); 4377 hammer2_spin_ex(&chain->core.spin); 4378 copy->data->blkset = chain->data->blkset; 4379 hammer2_spin_unex(&chain->core.spin); 4380 break; 4381 default: 4382 break; 4383 } 4384 return copy; 4385 } 4386 4387 void 4388 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 4389 { 4390 switch(copy->bref.type) { 4391 case HAMMER2_BREF_TYPE_VOLUME: 4392 case HAMMER2_BREF_TYPE_FREEMAP: 4393 KKASSERT(copy->data); 4394 kfree(copy->data, copy->hmp->mchain); 4395 copy->data = NULL; 4396 default: 4397 break; 4398 } 4399 hammer2_chain_drop(copy); 4400 } 4401 4402 /* 4403 * Create a snapshot of the specified {parent, ochain} with the specified 4404 * label. The originating hammer2_inode must be exclusively locked for 4405 * safety. 4406 * 4407 * The ioctl code has already synced the filesystem. 4408 */ 4409 int 4410 hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp, 4411 hammer2_tid_t mtid) 4412 { 4413 hammer2_dev_t *hmp; 4414 const hammer2_inode_data_t *ripdata; 4415 hammer2_inode_data_t *wipdata; 4416 hammer2_chain_t *nchain; 4417 hammer2_inode_t *nip; 4418 size_t name_len; 4419 hammer2_key_t lhc; 4420 struct vattr vat; 4421 #if 0 4422 uuid_t opfs_clid; 4423 #endif 4424 int error; 4425 4426 kprintf("snapshot %s\n", pmp->name); 4427 4428 name_len = strlen(pmp->name); 4429 lhc = hammer2_dirhash(pmp->name, name_len); 4430 4431 /* 4432 * Get the clid 4433 */ 4434 ripdata = &chain->data->ipdata; 4435 #if 0 4436 opfs_clid = ripdata->meta.pfs_clid; 4437 #endif 4438 hmp = chain->hmp; 4439 4440 /* 4441 * Create the snapshot directory under the super-root 4442 * 4443 * Set PFS type, generate a unique filesystem id, and generate 4444 * a cluster id. Use the same clid when snapshotting a PFS root, 4445 * which theoretically allows the snapshot to be used as part of 4446 * the same cluster (perhaps as a cache). 4447 * 4448 * Copy the (flushed) blockref array. Theoretically we could use 4449 * chain_duplicate() but it becomes difficult to disentangle 4450 * the shared core so for now just brute-force it. 4451 */ 4452 VATTR_NULL(&vat); 4453 vat.va_type = VDIR; 4454 vat.va_mode = 0755; 4455 nip = hammer2_inode_create(hmp->spmp->iroot, &vat, proc0.p_ucred, 4456 pmp->name, name_len, 0, 4457 1, 0, 0, 4458 HAMMER2_INSERT_PFSROOT, &error); 4459 4460 if (nip) { 4461 hammer2_inode_modify(nip); 4462 nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); 4463 hammer2_chain_modify(nchain, mtid, 0, 0); 4464 wipdata = &nchain->data->ipdata; 4465 4466 nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 4467 nip->meta.pfs_subtype = HAMMER2_PFSSUBTYPE_SNAPSHOT; 4468 nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 4469 kern_uuidgen(&nip->meta.pfs_fsid, 1); 4470 4471 /* 4472 * Give the snapshot its own private cluster id. As a 4473 * snapshot no further synchronization with the original 4474 * cluster will be done. 4475 */ 4476 #if 0 4477 if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) 4478 nip->meta.pfs_clid = opfs_clid; 4479 else 4480 kern_uuidgen(&nip->meta.pfs_clid, 1); 4481 #endif 4482 kern_uuidgen(&nip->meta.pfs_clid, 1); 4483 nchain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 4484 4485 /* XXX hack blockset copy */ 4486 /* XXX doesn't work with real cluster */ 4487 wipdata->meta = nip->meta; 4488 wipdata->u.blockset = ripdata->u.blockset; 4489 hammer2_flush(nchain, 1); 4490 hammer2_chain_unlock(nchain); 4491 hammer2_chain_drop(nchain); 4492 hammer2_inode_unlock(nip); 4493 } 4494 return (error); 4495 } 4496