1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 68 69 static hammer2_chain_t *hammer2_chain_create_indirect( 70 hammer2_chain_t *parent, 71 hammer2_key_t key, int keybits, 72 hammer2_tid_t mtid, int for_type, int *errorp); 73 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 int *cache_indexp, hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **bresp); 80 81 /* 82 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 83 * overlap in the RB trees. Deleted chains are moved from rbtree to either 84 * dbtree or to dbq. 85 * 86 * Chains in delete-duplicate sequences can always iterate through core_entry 87 * to locate the live version of the chain. 88 */ 89 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 90 91 int 92 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 93 { 94 hammer2_key_t c1_beg; 95 hammer2_key_t c1_end; 96 hammer2_key_t c2_beg; 97 hammer2_key_t c2_end; 98 99 /* 100 * Compare chains. Overlaps are not supposed to happen and catch 101 * any software issues early we count overlaps as a match. 102 */ 103 c1_beg = chain1->bref.key; 104 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 105 c2_beg = chain2->bref.key; 106 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 107 108 if (c1_end < c2_beg) /* fully to the left */ 109 return(-1); 110 if (c1_beg > c2_end) /* fully to the right */ 111 return(1); 112 return(0); /* overlap (must not cross edge boundary) */ 113 } 114 115 static __inline 116 int 117 hammer2_isclusterable(hammer2_chain_t *chain) 118 { 119 if (hammer2_cluster_enable) { 120 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 121 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 122 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 123 return(1); 124 } 125 } 126 return(0); 127 } 128 129 /* 130 * Make a chain visible to the flusher. The flusher needs to be able to 131 * do flushes of subdirectory chains or single files so it does a top-down 132 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 133 * or UPDATE chains and flushes back up the chain to the volume root. 134 * 135 * This routine sets ONFLUSH upward until it hits the volume root. For 136 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 137 * Extra ONFLUSH flagging doesn't hurt the filesystem. 138 */ 139 void 140 hammer2_chain_setflush(hammer2_chain_t *chain) 141 { 142 hammer2_chain_t *parent; 143 144 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 145 hammer2_spin_sh(&chain->core.spin); 146 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 147 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 148 if ((parent = chain->parent) == NULL) 149 break; 150 hammer2_spin_sh(&parent->core.spin); 151 hammer2_spin_unsh(&chain->core.spin); 152 chain = parent; 153 } 154 hammer2_spin_unsh(&chain->core.spin); 155 } 156 } 157 158 /* 159 * Allocate a new disconnected chain element representing the specified 160 * bref. chain->refs is set to 1 and the passed bref is copied to 161 * chain->bref. chain->bytes is derived from the bref. 162 * 163 * chain->pmp inherits pmp unless the chain is an inode (other than the 164 * super-root inode). 165 * 166 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 167 */ 168 hammer2_chain_t * 169 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 170 hammer2_blockref_t *bref) 171 { 172 hammer2_chain_t *chain; 173 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 174 175 /* 176 * Construct the appropriate system structure. 177 */ 178 switch(bref->type) { 179 case HAMMER2_BREF_TYPE_INODE: 180 case HAMMER2_BREF_TYPE_INDIRECT: 181 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 182 case HAMMER2_BREF_TYPE_DATA: 183 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 184 /* 185 * Chain's are really only associated with the hmp but we 186 * maintain a pmp association for per-mount memory tracking 187 * purposes. The pmp can be NULL. 188 */ 189 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 190 break; 191 case HAMMER2_BREF_TYPE_VOLUME: 192 case HAMMER2_BREF_TYPE_FREEMAP: 193 /* 194 * Only hammer2_chain_bulksnap() calls this function with these 195 * types. 196 */ 197 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 198 break; 199 default: 200 chain = NULL; 201 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 202 bref->type); 203 } 204 205 /* 206 * Initialize the new chain structure. pmp must be set to NULL for 207 * chains belonging to the super-root topology of a device mount. 208 */ 209 if (pmp == hmp->spmp) 210 chain->pmp = NULL; 211 else 212 chain->pmp = pmp; 213 chain->hmp = hmp; 214 chain->bref = *bref; 215 chain->bytes = bytes; 216 chain->refs = 1; 217 chain->flags = HAMMER2_CHAIN_ALLOCATED; 218 219 /* 220 * Set the PFS boundary flag if this chain represents a PFS root. 221 */ 222 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 223 chain->flags |= HAMMER2_CHAIN_PFSBOUNDARY; 224 hammer2_chain_core_init(chain); 225 226 return (chain); 227 } 228 229 /* 230 * Initialize a chain's core structure. This structure used to be allocated 231 * but is now embedded. 232 * 233 * The core is not locked. No additional refs on the chain are made. 234 * (trans) must not be NULL if (core) is not NULL. 235 */ 236 void 237 hammer2_chain_core_init(hammer2_chain_t *chain) 238 { 239 /* 240 * Fresh core under nchain (no multi-homing of ochain's 241 * sub-tree). 242 */ 243 RB_INIT(&chain->core.rbtree); /* live chains */ 244 hammer2_mtx_init(&chain->lock, "h2chain"); 245 } 246 247 /* 248 * Add a reference to a chain element, preventing its destruction. 249 * 250 * (can be called with spinlock held) 251 */ 252 void 253 hammer2_chain_ref(hammer2_chain_t *chain) 254 { 255 atomic_add_int(&chain->refs, 1); 256 #if 0 257 kprintf("REFC %p %d %08x\n", chain, chain->refs - 1, chain->flags); 258 print_backtrace(8); 259 #endif 260 } 261 262 /* 263 * Insert the chain in the core rbtree. 264 * 265 * Normal insertions are placed in the live rbtree. Insertion of a deleted 266 * chain is a special case used by the flush code that is placed on the 267 * unstaged deleted list to avoid confusing the live view. 268 */ 269 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 270 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 271 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 272 273 static 274 int 275 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 276 int flags, int generation) 277 { 278 hammer2_chain_t *xchain; 279 int error = 0; 280 281 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 282 hammer2_spin_ex(&parent->core.spin); 283 284 /* 285 * Interlocked by spinlock, check for race 286 */ 287 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 288 parent->core.generation != generation) { 289 error = EAGAIN; 290 goto failed; 291 } 292 293 /* 294 * Insert chain 295 */ 296 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 297 KASSERT(xchain == NULL, 298 ("hammer2_chain_insert: collision %p %p", chain, xchain)); 299 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 300 chain->parent = parent; 301 ++parent->core.chain_count; 302 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 303 304 /* 305 * We have to keep track of the effective live-view blockref count 306 * so the create code knows when to push an indirect block. 307 */ 308 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 309 atomic_add_int(&parent->core.live_count, 1); 310 failed: 311 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 312 hammer2_spin_unex(&parent->core.spin); 313 return error; 314 } 315 316 /* 317 * Drop the caller's reference to the chain. When the ref count drops to 318 * zero this function will try to disassociate the chain from its parent and 319 * deallocate it, then recursely drop the parent using the implied ref 320 * from the chain's chain->parent. 321 */ 322 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain); 323 324 void 325 hammer2_chain_drop(hammer2_chain_t *chain) 326 { 327 u_int refs; 328 329 if (hammer2_debug & 0x200000) 330 Debugger("drop"); 331 #if 0 332 kprintf("DROP %p %d %08x\n", chain, chain->refs - 1, chain->flags); 333 print_backtrace(8); 334 #endif 335 336 KKASSERT(chain->refs > 0); 337 338 while (chain) { 339 refs = chain->refs; 340 cpu_ccfence(); 341 KKASSERT(refs > 0); 342 343 if (refs == 1) { 344 chain = hammer2_chain_lastdrop(chain); 345 } else { 346 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 347 break; 348 /* retry the same chain */ 349 } 350 } 351 } 352 353 /* 354 * Safe handling of the 1->0 transition on chain. Returns a chain for 355 * recursive drop or NULL, possibly returning the same chain if the atomic 356 * op fails. 357 * 358 * Whem two chains need to be recursively dropped we use the chain 359 * we would otherwise free to placehold the additional chain. It's a bit 360 * convoluted but we can't just recurse without potentially blowing out 361 * the kernel stack. 362 * 363 * The chain cannot be freed if it has any children. 364 * The chain cannot be freed if flagged MODIFIED unless we can dispose of that. 365 * The chain cannot be freed if flagged UPDATE unless we can dispose of that. 366 * 367 * The core spinlock is allowed nest child-to-parent (not parent-to-child). 368 */ 369 static 370 hammer2_chain_t * 371 hammer2_chain_lastdrop(hammer2_chain_t *chain) 372 { 373 hammer2_pfs_t *pmp; 374 hammer2_dev_t *hmp; 375 hammer2_chain_t *parent; 376 hammer2_chain_t *rdrop; 377 378 /* 379 * Critical field access. 380 */ 381 hammer2_spin_ex(&chain->core.spin); 382 383 if (chain->parent) { 384 /* 385 * If the chain has a parent the UPDATE bit prevents scrapping 386 * as the chain is needed to properly flush the parent. Try 387 * to complete the 1->0 transition and return NULL. Retry 388 * (return chain) if we are unable to complete the 1->0 389 * transition, else return NULL (nothing more to do). 390 * 391 * If the chain has a parent the MODIFIED bit prevents 392 * scrapping. 393 */ 394 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 395 HAMMER2_CHAIN_MODIFIED)) { 396 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 397 hammer2_spin_unex(&chain->core.spin); 398 chain = NULL; 399 } else { 400 hammer2_spin_unex(&chain->core.spin); 401 } 402 return (chain); 403 } 404 /* spinlock still held */ 405 } else { 406 /* 407 * The chain has no parent and can be flagged for destruction. 408 * Since it has no parent, UPDATE can also be cleared. 409 */ 410 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 411 if (chain->flags & HAMMER2_CHAIN_UPDATE) 412 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 413 414 /* 415 * If the chain has children or if it has been MODIFIED and 416 * also recorded for DEDUP, we must still flush the chain. 417 * 418 * In the case where it has children, the DESTROY flag test 419 * in the flush code will prevent unnecessary flushes of 420 * MODIFIED chains that are not flagged DEDUP so don't worry 421 * about that here. 422 */ 423 if (chain->core.chain_count || 424 (chain->flags & (HAMMER2_CHAIN_MODIFIED | 425 HAMMER2_CHAIN_DEDUP)) == 426 (HAMMER2_CHAIN_MODIFIED | 427 HAMMER2_CHAIN_DEDUP)) { 428 /* 429 * Put on flushq (should ensure refs > 1), retry 430 * the drop. 431 */ 432 hammer2_spin_unex(&chain->core.spin); 433 hammer2_delayed_flush(chain); 434 return(chain); /* retry drop */ 435 } 436 437 /* 438 * Otherwise we can scrap the MODIFIED bit if it is set, 439 * and continue along the freeing path. 440 */ 441 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 442 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 443 atomic_add_long(&hammer2_count_modified_chains, -1); 444 } 445 /* spinlock still held */ 446 } 447 448 /* 449 * If any children exist we must leave the chain intact with refs == 0. 450 * They exist because chains are retained below us which have refs or 451 * may require flushing. This case can occur when parent != NULL. 452 * 453 * Retry (return chain) if we fail to transition the refs to 0, else 454 * return NULL indication nothing more to do. 455 */ 456 if (chain->core.chain_count) { 457 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 458 hammer2_spin_unex(&chain->core.spin); 459 chain = NULL; 460 } else { 461 hammer2_spin_unex(&chain->core.spin); 462 } 463 return (chain); 464 } 465 /* spinlock still held */ 466 /* no chains left under us */ 467 468 /* 469 * chain->core has no children left so no accessors can get to our 470 * chain from there. Now we have to lock the parent core to interlock 471 * remaining possible accessors that might bump chain's refs before 472 * we can safely drop chain's refs with intent to free the chain. 473 */ 474 hmp = chain->hmp; 475 pmp = chain->pmp; /* can be NULL */ 476 rdrop = NULL; 477 478 /* 479 * Spinlock the parent and try to drop the last ref on chain. 480 * On success remove chain from its parent, otherwise return NULL. 481 * 482 * (normal core locks are top-down recursive but we define core 483 * spinlocks as bottom-up recursive, so this is safe). 484 */ 485 if ((parent = chain->parent) != NULL) { 486 hammer2_spin_ex(&parent->core.spin); 487 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 488 /* 1->0 transition failed, retry */ 489 hammer2_spin_unex(&parent->core.spin); 490 hammer2_spin_unex(&chain->core.spin); 491 return(chain); 492 } 493 494 /* 495 * 1->0 transition successful, remove chain from the 496 * parent. 497 */ 498 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 499 RB_REMOVE(hammer2_chain_tree, 500 &parent->core.rbtree, chain); 501 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 502 --parent->core.chain_count; 503 chain->parent = NULL; 504 } 505 506 /* 507 * If our chain was the last chain in the parent's core the 508 * core is now empty and its parent might have to be 509 * re-dropped if it has 0 refs. 510 */ 511 if (parent->core.chain_count == 0) { 512 rdrop = parent; 513 atomic_add_int(&rdrop->refs, 1); 514 /* 515 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 516 rdrop = NULL; 517 */ 518 } 519 hammer2_spin_unex(&parent->core.spin); 520 parent = NULL; /* safety */ 521 } 522 523 /* 524 * Successful 1->0 transition and the chain can be destroyed now. 525 * 526 * We still have the core spinlock, and core's chain_count is 0. 527 * Any parent spinlock is gone. 528 */ 529 hammer2_spin_unex(&chain->core.spin); 530 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 531 chain->core.chain_count == 0); 532 533 /* 534 * All spin locks are gone, no pointers remain to the chain, finish 535 * freeing it. 536 */ 537 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 538 HAMMER2_CHAIN_MODIFIED)) == 0); 539 hammer2_chain_drop_data(chain, 1); 540 541 KKASSERT(chain->dio == NULL); 542 543 /* 544 * Once chain resources are gone we can use the now dead chain 545 * structure to placehold what might otherwise require a recursive 546 * drop, because we have potentially two things to drop and can only 547 * return one directly. 548 */ 549 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 550 chain->flags &= ~HAMMER2_CHAIN_ALLOCATED; 551 chain->hmp = NULL; 552 kfree(chain, hmp->mchain); 553 } 554 555 /* 556 * Possible chaining loop when parent re-drop needed. 557 */ 558 return(rdrop); 559 } 560 561 /* 562 * On either last lock release or last drop 563 */ 564 static void 565 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 566 { 567 /*hammer2_dev_t *hmp = chain->hmp;*/ 568 569 switch(chain->bref.type) { 570 case HAMMER2_BREF_TYPE_VOLUME: 571 case HAMMER2_BREF_TYPE_FREEMAP: 572 if (lastdrop) 573 chain->data = NULL; 574 break; 575 default: 576 KKASSERT(chain->data == NULL); 577 break; 578 } 579 } 580 581 /* 582 * Lock a referenced chain element, acquiring its data with I/O if necessary, 583 * and specify how you would like the data to be resolved. 584 * 585 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 586 * 587 * The lock is allowed to recurse, multiple locking ops will aggregate 588 * the requested resolve types. Once data is assigned it will not be 589 * removed until the last unlock. 590 * 591 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 592 * (typically used to avoid device/logical buffer 593 * aliasing for data) 594 * 595 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 596 * the INITIAL-create state (indirect blocks only). 597 * 598 * Do not resolve data elements for DATA chains. 599 * (typically used to avoid device/logical buffer 600 * aliasing for data) 601 * 602 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 603 * 604 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 605 * it will be locked exclusive. 606 * 607 * NOTE: Embedded elements (volume header, inodes) are always resolved 608 * regardless. 609 * 610 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 611 * element will instantiate and zero its buffer, and flush it on 612 * release. 613 * 614 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 615 * so as not to instantiate a device buffer, which could alias against 616 * a logical file buffer. However, if ALWAYS is specified the 617 * device buffer will be instantiated anyway. 618 * 619 * WARNING! This function blocks on I/O if data needs to be fetched. This 620 * blocking can run concurrent with other compatible lock holders 621 * who do not need data returning. The lock is not upgraded to 622 * exclusive during a data fetch, a separate bit is used to 623 * interlock I/O. However, an exclusive lock holder can still count 624 * on being interlocked against an I/O fetch managed by a shared 625 * lock holder. 626 */ 627 void 628 hammer2_chain_lock(hammer2_chain_t *chain, int how) 629 { 630 /* 631 * Ref and lock the element. Recursive locks are allowed. 632 */ 633 KKASSERT(chain->refs > 0); 634 atomic_add_int(&chain->lockcnt, 1); 635 636 /* 637 * Get the appropriate lock. 638 */ 639 if (how & HAMMER2_RESOLVE_SHARED) 640 hammer2_mtx_sh(&chain->lock); 641 else 642 hammer2_mtx_ex(&chain->lock); 643 ++curthread->td_tracker; 644 645 /* 646 * If we already have a valid data pointer no further action is 647 * necessary. 648 */ 649 if (chain->data) 650 return; 651 652 /* 653 * Do we have to resolve the data? 654 */ 655 switch(how & HAMMER2_RESOLVE_MASK) { 656 case HAMMER2_RESOLVE_NEVER: 657 return; 658 case HAMMER2_RESOLVE_MAYBE: 659 if (chain->flags & HAMMER2_CHAIN_INITIAL) 660 return; 661 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 662 return; 663 #if 0 664 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 665 return; 666 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 667 return; 668 #endif 669 /* fall through */ 670 case HAMMER2_RESOLVE_ALWAYS: 671 default: 672 break; 673 } 674 675 /* 676 * Caller requires data 677 */ 678 hammer2_chain_load_data(chain); 679 } 680 681 /* 682 * Obtains a second shared lock on the chain, does not account the second 683 * shared lock as being owned by the current thread. 684 * 685 * Caller must already own a shared lock on this chain. 686 */ 687 void 688 hammer2_chain_push_shared_lock(hammer2_chain_t *chain) 689 { 690 hammer2_mtx_sh(&chain->lock); 691 atomic_add_int(&chain->lockcnt, 1); 692 /* do not count in td_tracker for this thread */ 693 } 694 695 /* 696 * Accounts for a shared lock that was pushed to us as being owned by our 697 * thread. 698 */ 699 void 700 hammer2_chain_pull_shared_lock(hammer2_chain_t *chain) 701 { 702 ++curthread->td_tracker; 703 } 704 705 /* 706 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 707 * may be of any type. 708 * 709 * Once chain->data is set it cannot be disposed of until all locks are 710 * released. 711 */ 712 void 713 hammer2_chain_load_data(hammer2_chain_t *chain) 714 { 715 hammer2_blockref_t *bref; 716 hammer2_dev_t *hmp; 717 char *bdata; 718 int error; 719 720 /* 721 * Degenerate case, data already present. 722 */ 723 if (chain->data) 724 return; 725 726 hmp = chain->hmp; 727 KKASSERT(hmp != NULL); 728 729 /* 730 * Gain the IOINPROG bit, interlocked block. 731 */ 732 for (;;) { 733 u_int oflags; 734 u_int nflags; 735 736 oflags = chain->flags; 737 cpu_ccfence(); 738 if (oflags & HAMMER2_CHAIN_IOINPROG) { 739 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 740 tsleep_interlock(&chain->flags, 0); 741 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 742 tsleep(&chain->flags, PINTERLOCKED, 743 "h2iocw", 0); 744 } 745 /* retry */ 746 } else { 747 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 748 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 749 break; 750 } 751 /* retry */ 752 } 753 } 754 755 /* 756 * We own CHAIN_IOINPROG 757 * 758 * Degenerate case if we raced another load. 759 */ 760 if (chain->data) 761 goto done; 762 763 /* 764 * We must resolve to a device buffer, either by issuing I/O or 765 * by creating a zero-fill element. We do not mark the buffer 766 * dirty when creating a zero-fill element (the hammer2_chain_modify() 767 * API must still be used to do that). 768 * 769 * The device buffer is variable-sized in powers of 2 down 770 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 771 * chunk always contains buffers of the same size. (XXX) 772 * 773 * The minimum physical IO size may be larger than the variable 774 * block size. 775 */ 776 bref = &chain->bref; 777 778 /* 779 * The getblk() optimization can only be used on newly created 780 * elements if the physical block size matches the request. 781 */ 782 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 783 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, 784 &chain->dio); 785 } else { 786 error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 787 &chain->dio); 788 hammer2_adjreadcounter(&chain->bref, chain->bytes); 789 } 790 if (error) { 791 chain->error = HAMMER2_ERROR_IO; 792 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 793 (intmax_t)bref->data_off, error); 794 hammer2_io_bqrelse(&chain->dio); 795 goto done; 796 } 797 chain->error = 0; 798 799 /* 800 * NOTE: A locked chain's data cannot be modified without first 801 * calling hammer2_chain_modify(). 802 */ 803 804 /* 805 * Clear INITIAL. In this case we used io_new() and the buffer has 806 * been zero'd and marked dirty. 807 */ 808 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 809 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 810 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 811 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 812 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 813 /* 814 * check data not currently synchronized due to 815 * modification. XXX assumes data stays in the buffer 816 * cache, which might not be true (need biodep on flush 817 * to calculate crc? or simple crc?). 818 */ 819 } else { 820 if (hammer2_chain_testcheck(chain, bdata) == 0) { 821 kprintf("chain %016jx.%02x meth=%02x " 822 "CHECK FAIL %08x (flags=%08x)\n", 823 chain->bref.data_off, 824 chain->bref.type, 825 chain->bref.methods, 826 hammer2_icrc32(bdata, chain->bytes), 827 chain->flags); 828 chain->error = HAMMER2_ERROR_CHECK; 829 } 830 } 831 832 /* 833 * Setup the data pointer, either pointing it to an embedded data 834 * structure and copying the data from the buffer, or pointing it 835 * into the buffer. 836 * 837 * The buffer is not retained when copying to an embedded data 838 * structure in order to avoid potential deadlocks or recursions 839 * on the same physical buffer. 840 * 841 * WARNING! Other threads can start using the data the instant we 842 * set chain->data non-NULL. 843 */ 844 switch (bref->type) { 845 case HAMMER2_BREF_TYPE_VOLUME: 846 case HAMMER2_BREF_TYPE_FREEMAP: 847 /* 848 * Copy data from bp to embedded buffer 849 */ 850 panic("hammer2_chain_lock: called on unresolved volume header"); 851 break; 852 case HAMMER2_BREF_TYPE_INODE: 853 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 854 case HAMMER2_BREF_TYPE_INDIRECT: 855 case HAMMER2_BREF_TYPE_DATA: 856 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 857 default: 858 /* 859 * Point data at the device buffer and leave dio intact. 860 */ 861 chain->data = (void *)bdata; 862 break; 863 } 864 865 /* 866 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 867 */ 868 done: 869 for (;;) { 870 u_int oflags; 871 u_int nflags; 872 873 oflags = chain->flags; 874 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 875 HAMMER2_CHAIN_IOSIGNAL); 876 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 877 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 878 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 879 wakeup(&chain->flags); 880 break; 881 } 882 } 883 } 884 885 /* 886 * Unlock and deref a chain element. 887 * 888 * On the last lock release any non-embedded data (chain->dio) will be 889 * retired. 890 */ 891 void 892 hammer2_chain_unlock(hammer2_chain_t *chain) 893 { 894 hammer2_mtx_state_t ostate; 895 long *counterp; 896 u_int lockcnt; 897 898 --curthread->td_tracker; 899 /* 900 * If multiple locks are present (or being attempted) on this 901 * particular chain we can just unlock, drop refs, and return. 902 * 903 * Otherwise fall-through on the 1->0 transition. 904 */ 905 for (;;) { 906 lockcnt = chain->lockcnt; 907 KKASSERT(lockcnt > 0); 908 cpu_ccfence(); 909 if (lockcnt > 1) { 910 if (atomic_cmpset_int(&chain->lockcnt, 911 lockcnt, lockcnt - 1)) { 912 hammer2_mtx_unlock(&chain->lock); 913 return; 914 } 915 } else { 916 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 917 break; 918 } 919 /* retry */ 920 } 921 922 /* 923 * On the 1->0 transition we upgrade the core lock (if necessary) 924 * to exclusive for terminal processing. If after upgrading we find 925 * that lockcnt is non-zero, another thread is racing us and will 926 * handle the unload for us later on, so just cleanup and return 927 * leaving the data/io intact 928 * 929 * Otherwise if lockcnt is still 0 it is possible for it to become 930 * non-zero and race, but since we hold the core->lock exclusively 931 * all that will happen is that the chain will be reloaded after we 932 * unload it. 933 */ 934 ostate = hammer2_mtx_upgrade(&chain->lock); 935 if (chain->lockcnt) { 936 hammer2_mtx_unlock(&chain->lock); 937 return; 938 } 939 940 /* 941 * Shortcut the case if the data is embedded or not resolved. 942 * Only drop non-DIO-based data if the chain is not modified. 943 * 944 * Do NOT NULL out chain->data (e.g. inode data), it might be 945 * dirty. 946 */ 947 if (chain->dio == NULL) { 948 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 949 hammer2_chain_drop_data(chain, 0); 950 hammer2_mtx_unlock(&chain->lock); 951 return; 952 } 953 954 /* 955 * Statistics 956 */ 957 if (hammer2_io_isdirty(chain->dio)) { 958 switch(chain->bref.type) { 959 case HAMMER2_BREF_TYPE_DATA: 960 counterp = &hammer2_iod_file_write; 961 break; 962 case HAMMER2_BREF_TYPE_INODE: 963 counterp = &hammer2_iod_meta_write; 964 break; 965 case HAMMER2_BREF_TYPE_INDIRECT: 966 counterp = &hammer2_iod_indr_write; 967 break; 968 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 969 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 970 counterp = &hammer2_iod_fmap_write; 971 break; 972 default: 973 counterp = &hammer2_iod_volu_write; 974 break; 975 } 976 *counterp += chain->bytes; 977 } 978 979 /* 980 * Clean out the dio. 981 * 982 * NOTE: Freemap leaf's use reserved blocks and thus no aliasing 983 * is possible. 984 */ 985 chain->data = NULL; 986 hammer2_io_bqrelse(&chain->dio); 987 hammer2_mtx_unlock(&chain->lock); 988 } 989 990 /* 991 * Helper to obtain the blockref[] array base and count for a chain. 992 * 993 * XXX Not widely used yet, various use cases need to be validated and 994 * converted to use this function. 995 */ 996 static 997 hammer2_blockref_t * 998 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 999 { 1000 hammer2_blockref_t *base; 1001 int count; 1002 1003 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1004 base = NULL; 1005 1006 switch(parent->bref.type) { 1007 case HAMMER2_BREF_TYPE_INODE: 1008 count = HAMMER2_SET_COUNT; 1009 break; 1010 case HAMMER2_BREF_TYPE_INDIRECT: 1011 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1012 count = parent->bytes / sizeof(hammer2_blockref_t); 1013 break; 1014 case HAMMER2_BREF_TYPE_VOLUME: 1015 count = HAMMER2_SET_COUNT; 1016 break; 1017 case HAMMER2_BREF_TYPE_FREEMAP: 1018 count = HAMMER2_SET_COUNT; 1019 break; 1020 default: 1021 panic("hammer2_chain_create_indirect: " 1022 "unrecognized blockref type: %d", 1023 parent->bref.type); 1024 count = 0; 1025 break; 1026 } 1027 } else { 1028 switch(parent->bref.type) { 1029 case HAMMER2_BREF_TYPE_INODE: 1030 base = &parent->data->ipdata.u.blockset.blockref[0]; 1031 count = HAMMER2_SET_COUNT; 1032 break; 1033 case HAMMER2_BREF_TYPE_INDIRECT: 1034 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1035 base = &parent->data->npdata[0]; 1036 count = parent->bytes / sizeof(hammer2_blockref_t); 1037 break; 1038 case HAMMER2_BREF_TYPE_VOLUME: 1039 base = &parent->data->voldata. 1040 sroot_blockset.blockref[0]; 1041 count = HAMMER2_SET_COUNT; 1042 break; 1043 case HAMMER2_BREF_TYPE_FREEMAP: 1044 base = &parent->data->blkset.blockref[0]; 1045 count = HAMMER2_SET_COUNT; 1046 break; 1047 default: 1048 panic("hammer2_chain_create_indirect: " 1049 "unrecognized blockref type: %d", 1050 parent->bref.type); 1051 count = 0; 1052 break; 1053 } 1054 } 1055 *countp = count; 1056 1057 return base; 1058 } 1059 1060 /* 1061 * This counts the number of live blockrefs in a block array and 1062 * also calculates the point at which all remaining blockrefs are empty. 1063 * This routine can only be called on a live chain (DUPLICATED flag not set). 1064 * 1065 * NOTE: Flag is not set until after the count is complete, allowing 1066 * callers to test the flag without holding the spinlock. 1067 * 1068 * NOTE: If base is NULL the related chain is still in the INITIAL 1069 * state and there are no blockrefs to count. 1070 * 1071 * NOTE: live_count may already have some counts accumulated due to 1072 * creation and deletion and could even be initially negative. 1073 */ 1074 void 1075 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1076 hammer2_blockref_t *base, int count) 1077 { 1078 hammer2_spin_ex(&chain->core.spin); 1079 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1080 if (base) { 1081 while (--count >= 0) { 1082 if (base[count].type) 1083 break; 1084 } 1085 chain->core.live_zero = count + 1; 1086 while (count >= 0) { 1087 if (base[count].type) 1088 atomic_add_int(&chain->core.live_count, 1089 1); 1090 --count; 1091 } 1092 } else { 1093 chain->core.live_zero = 0; 1094 } 1095 /* else do not modify live_count */ 1096 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1097 } 1098 hammer2_spin_unex(&chain->core.spin); 1099 } 1100 1101 /* 1102 * Resize the chain's physical storage allocation in-place. This function does 1103 * not adjust the data pointer and must be followed by (typically) a 1104 * hammer2_chain_modify() call to copy any old data over and adjust the 1105 * data pointer. 1106 * 1107 * Chains can be resized smaller without reallocating the storage. Resizing 1108 * larger will reallocate the storage. Excess or prior storage is reclaimed 1109 * asynchronously at a later time. 1110 * 1111 * Must be passed an exclusively locked parent and chain. 1112 * 1113 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1114 * to avoid instantiating a device buffer that conflicts with the vnode data 1115 * buffer. However, because H2 can compress or encrypt data, the chain may 1116 * have a dio assigned to it in those situations, and they do not conflict. 1117 * 1118 * XXX return error if cannot resize. 1119 */ 1120 void 1121 hammer2_chain_resize(hammer2_inode_t *ip, 1122 hammer2_chain_t *parent, hammer2_chain_t *chain, 1123 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1124 int nradix, int flags) 1125 { 1126 hammer2_dev_t *hmp; 1127 size_t obytes; 1128 size_t nbytes; 1129 1130 hmp = chain->hmp; 1131 1132 /* 1133 * Only data and indirect blocks can be resized for now. 1134 * (The volu root, inodes, and freemap elements use a fixed size). 1135 */ 1136 KKASSERT(chain != &hmp->vchain); 1137 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1138 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 1139 KKASSERT(chain->parent == parent); 1140 1141 /* 1142 * Nothing to do if the element is already the proper size 1143 */ 1144 obytes = chain->bytes; 1145 nbytes = 1U << nradix; 1146 if (obytes == nbytes) 1147 return; 1148 1149 /* 1150 * Make sure the old data is instantiated so we can copy it. If this 1151 * is a data block, the device data may be superfluous since the data 1152 * might be in a logical block, but compressed or encrypted data is 1153 * another matter. 1154 * 1155 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1156 */ 1157 hammer2_chain_modify(chain, mtid, dedup_off, 0); 1158 1159 /* 1160 * Relocate the block, even if making it smaller (because different 1161 * block sizes may be in different regions). 1162 * 1163 * (data blocks only, we aren't copying the storage here). 1164 */ 1165 hammer2_freemap_alloc(chain, nbytes); 1166 chain->bytes = nbytes; 1167 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1168 1169 /* 1170 * We don't want the followup chain_modify() to try to copy data 1171 * from the old (wrong-sized) buffer. It won't know how much to 1172 * copy. This case should only occur during writes when the 1173 * originator already has the data to write in-hand. 1174 */ 1175 if (chain->dio) { 1176 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1177 hammer2_io_brelse(&chain->dio); 1178 chain->data = NULL; 1179 } 1180 } 1181 1182 /* 1183 * Set the chain modified so its data can be changed by the caller. 1184 * 1185 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1186 * is a CLC (cluster level change) field and is not updated by parent 1187 * propagation during a flush. 1188 * 1189 * If the caller passes a non-zero dedup_off we assign data_off to that 1190 * instead of allocating a ne block. Caller must not modify the data already 1191 * present at the target offset. 1192 */ 1193 void 1194 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1195 hammer2_off_t dedup_off, int flags) 1196 { 1197 hammer2_blockref_t obref; 1198 hammer2_dev_t *hmp; 1199 hammer2_io_t *dio; 1200 int error; 1201 int wasinitial; 1202 int newmod; 1203 char *bdata; 1204 1205 hmp = chain->hmp; 1206 obref = chain->bref; 1207 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1208 1209 /* 1210 * Data is not optional for freemap chains (we must always be sure 1211 * to copy the data on COW storage allocations). 1212 */ 1213 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1214 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1215 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1216 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1217 } 1218 1219 /* 1220 * Data must be resolved if already assigned, unless explicitly 1221 * flagged otherwise. 1222 */ 1223 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1224 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1225 hammer2_chain_load_data(chain); 1226 } 1227 1228 /* 1229 * Set MODIFIED to indicate that the chain has been modified. 1230 * Set UPDATE to ensure that the blockref is updated in the parent. 1231 */ 1232 if ((chain->flags & (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) == 1233 (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) { 1234 /* 1235 * Modified already set but a new allocation is needed 1236 * anyway because we recorded this data_off for possible 1237 * dedup operation. 1238 */ 1239 newmod = 1; 1240 } else if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1241 /* 1242 * Must set modified bit. 1243 */ 1244 atomic_add_long(&hammer2_count_modified_chains, 1); 1245 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1246 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1247 newmod = 1; 1248 } else { 1249 /* 1250 * Already flagged modified, no new allocation is needed. 1251 */ 1252 newmod = 0; 1253 } 1254 1255 /* 1256 * Flag parent update required, clear DEDUP flag (already processed 1257 * above). 1258 */ 1259 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 1260 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1261 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DEDUP); 1262 1263 /* 1264 * The modification or re-modification requires an allocation and 1265 * possible COW. 1266 * 1267 * If dedup_off is non-zero, caller already has a data offset 1268 * containing the caller's desired data. The dedup offset is 1269 * allowed to be in a partially free state and we must be sure 1270 * to reset it to a fully allocated state to force two bulkfree 1271 * passes to free it again. 1272 * 1273 * XXX can a chain already be marked MODIFIED without a data 1274 * assignment? If not, assert here instead of testing the case. 1275 */ 1276 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1277 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1278 newmod 1279 ) { 1280 if (dedup_off) { 1281 chain->bref.data_off = dedup_off; 1282 chain->bytes = 1 << (dedup_off & 1283 HAMMER2_OFF_MASK_RADIX); 1284 atomic_set_int(&chain->flags, 1285 HAMMER2_CHAIN_DEDUP); 1286 hammer2_freemap_adjust(hmp, &chain->bref, 1287 HAMMER2_FREEMAP_DORECOVER); 1288 } else { 1289 hammer2_freemap_alloc(chain, chain->bytes); 1290 } 1291 /* XXX failed allocation */ 1292 } 1293 } 1294 1295 /* 1296 * Update mirror_tid and modify_tid. modify_tid is only updated 1297 * if not passed as zero (during flushes, parent propagation passes 1298 * the value 0). 1299 * 1300 * NOTE: chain->pmp could be the device spmp. 1301 */ 1302 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1303 if (mtid) 1304 chain->bref.modify_tid = mtid; 1305 1306 /* 1307 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1308 * requires updating as well as to tell the delete code that the 1309 * chain's blockref might not exactly match (in terms of physical size 1310 * or block offset) the one in the parent's blocktable. The base key 1311 * of course will still match. 1312 */ 1313 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1314 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1315 1316 /* 1317 * Short-cut data blocks which the caller does not need an actual 1318 * data reference to (aka OPTDATA), as long as the chain does not 1319 * already have a data pointer to the data. This generally means 1320 * that the modifications are being done via the logical buffer cache. 1321 * The INITIAL flag relates only to the device data buffer and thus 1322 * remains unchange in this situation. 1323 */ 1324 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1325 (flags & HAMMER2_MODIFY_OPTDATA) && 1326 chain->data == NULL) { 1327 goto skip2; 1328 } 1329 1330 /* 1331 * Clearing the INITIAL flag (for indirect blocks) indicates that 1332 * we've processed the uninitialized storage allocation. 1333 * 1334 * If this flag is already clear we are likely in a copy-on-write 1335 * situation but we have to be sure NOT to bzero the storage if 1336 * no data is present. 1337 */ 1338 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1339 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1340 wasinitial = 1; 1341 } else { 1342 wasinitial = 0; 1343 } 1344 1345 /* 1346 * Instantiate data buffer and possibly execute COW operation 1347 */ 1348 switch(chain->bref.type) { 1349 case HAMMER2_BREF_TYPE_VOLUME: 1350 case HAMMER2_BREF_TYPE_FREEMAP: 1351 /* 1352 * The data is embedded, no copy-on-write operation is 1353 * needed. 1354 */ 1355 KKASSERT(chain->dio == NULL); 1356 break; 1357 case HAMMER2_BREF_TYPE_INODE: 1358 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1359 case HAMMER2_BREF_TYPE_DATA: 1360 case HAMMER2_BREF_TYPE_INDIRECT: 1361 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1362 /* 1363 * Perform the copy-on-write operation 1364 * 1365 * zero-fill or copy-on-write depending on whether 1366 * chain->data exists or not and set the dirty state for 1367 * the new buffer. hammer2_io_new() will handle the 1368 * zero-fill. 1369 * 1370 * If a dedup_off was supplied this is an existing block 1371 * and no COW, copy, or further modification is required. 1372 */ 1373 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1374 1375 if (wasinitial && dedup_off == 0) { 1376 error = hammer2_io_new(hmp, chain->bref.data_off, 1377 chain->bytes, &dio); 1378 } else { 1379 error = hammer2_io_bread(hmp, chain->bref.data_off, 1380 chain->bytes, &dio); 1381 } 1382 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1383 1384 /* 1385 * If an I/O error occurs make sure callers cannot accidently 1386 * modify the old buffer's contents and corrupt the filesystem. 1387 */ 1388 if (error) { 1389 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1390 hmp); 1391 chain->error = HAMMER2_ERROR_IO; 1392 hammer2_io_brelse(&dio); 1393 hammer2_io_brelse(&chain->dio); 1394 chain->data = NULL; 1395 break; 1396 } 1397 chain->error = 0; 1398 bdata = hammer2_io_data(dio, chain->bref.data_off); 1399 1400 if (chain->data) { 1401 /* 1402 * COW (unless a dedup). 1403 */ 1404 KKASSERT(chain->dio != NULL); 1405 if (chain->data != (void *)bdata && dedup_off == 0) { 1406 bcopy(chain->data, bdata, chain->bytes); 1407 } 1408 } else if (wasinitial == 0) { 1409 /* 1410 * We have a problem. We were asked to COW but 1411 * we don't have any data to COW with! 1412 */ 1413 panic("hammer2_chain_modify: having a COW %p\n", 1414 chain); 1415 } 1416 1417 /* 1418 * Retire the old buffer, replace with the new. Dirty or 1419 * redirty the new buffer. 1420 * 1421 * WARNING! The system buffer cache may have already flushed 1422 * the buffer, so we must be sure to [re]dirty it 1423 * for further modification. 1424 * 1425 * If dedup_off was supplied, the caller is not 1426 * expected to make any further modification to the 1427 * buffer. 1428 */ 1429 if (chain->dio) 1430 hammer2_io_bqrelse(&chain->dio); 1431 chain->data = (void *)bdata; 1432 chain->dio = dio; 1433 if (dedup_off == 0) 1434 hammer2_io_setdirty(dio); 1435 break; 1436 default: 1437 panic("hammer2_chain_modify: illegal non-embedded type %d", 1438 chain->bref.type); 1439 break; 1440 1441 } 1442 skip2: 1443 /* 1444 * setflush on parent indicating that the parent must recurse down 1445 * to us. Do not call on chain itself which might already have it 1446 * set. 1447 */ 1448 if (chain->parent) 1449 hammer2_chain_setflush(chain->parent); 1450 } 1451 1452 /* 1453 * Modify the chain associated with an inode. 1454 */ 1455 void 1456 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1457 hammer2_tid_t mtid, int flags) 1458 { 1459 hammer2_inode_modify(ip); 1460 hammer2_chain_modify(chain, mtid, 0, flags); 1461 } 1462 1463 /* 1464 * Volume header data locks 1465 */ 1466 void 1467 hammer2_voldata_lock(hammer2_dev_t *hmp) 1468 { 1469 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 1470 } 1471 1472 void 1473 hammer2_voldata_unlock(hammer2_dev_t *hmp) 1474 { 1475 lockmgr(&hmp->vollk, LK_RELEASE); 1476 } 1477 1478 void 1479 hammer2_voldata_modify(hammer2_dev_t *hmp) 1480 { 1481 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1482 atomic_add_long(&hammer2_count_modified_chains, 1); 1483 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 1484 hammer2_pfs_memory_inc(hmp->vchain.pmp); 1485 } 1486 } 1487 1488 /* 1489 * This function returns the chain at the nearest key within the specified 1490 * range. The returned chain will be referenced but not locked. 1491 * 1492 * This function will recurse through chain->rbtree as necessary and will 1493 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1494 * the iteration value is less than the current value of *key_nextp. 1495 * 1496 * The caller should use (*key_nextp) to calculate the actual range of 1497 * the returned element, which will be (key_beg to *key_nextp - 1), because 1498 * there might be another element which is superior to the returned element 1499 * and overlaps it. 1500 * 1501 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1502 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1503 * it will wind up being (key_end + 1). 1504 * 1505 * WARNING! Must be called with child's spinlock held. Spinlock remains 1506 * held through the operation. 1507 */ 1508 struct hammer2_chain_find_info { 1509 hammer2_chain_t *best; 1510 hammer2_key_t key_beg; 1511 hammer2_key_t key_end; 1512 hammer2_key_t key_next; 1513 }; 1514 1515 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1516 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1517 1518 static 1519 hammer2_chain_t * 1520 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1521 hammer2_key_t key_beg, hammer2_key_t key_end) 1522 { 1523 struct hammer2_chain_find_info info; 1524 1525 info.best = NULL; 1526 info.key_beg = key_beg; 1527 info.key_end = key_end; 1528 info.key_next = *key_nextp; 1529 1530 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1531 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1532 &info); 1533 *key_nextp = info.key_next; 1534 #if 0 1535 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1536 parent, key_beg, key_end, *key_nextp); 1537 #endif 1538 1539 return (info.best); 1540 } 1541 1542 static 1543 int 1544 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1545 { 1546 struct hammer2_chain_find_info *info = data; 1547 hammer2_key_t child_beg; 1548 hammer2_key_t child_end; 1549 1550 child_beg = child->bref.key; 1551 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1552 1553 if (child_end < info->key_beg) 1554 return(-1); 1555 if (child_beg > info->key_end) 1556 return(1); 1557 return(0); 1558 } 1559 1560 static 1561 int 1562 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1563 { 1564 struct hammer2_chain_find_info *info = data; 1565 hammer2_chain_t *best; 1566 hammer2_key_t child_end; 1567 1568 /* 1569 * WARNING! Do not discard DUPLICATED chains, it is possible that 1570 * we are catching an insertion half-way done. If a 1571 * duplicated chain turns out to be the best choice the 1572 * caller will re-check its flags after locking it. 1573 * 1574 * WARNING! Layerq is scanned forwards, exact matches should keep 1575 * the existing info->best. 1576 */ 1577 if ((best = info->best) == NULL) { 1578 /* 1579 * No previous best. Assign best 1580 */ 1581 info->best = child; 1582 } else if (best->bref.key <= info->key_beg && 1583 child->bref.key <= info->key_beg) { 1584 /* 1585 * Illegal overlap. 1586 */ 1587 KKASSERT(0); 1588 /*info->best = child;*/ 1589 } else if (child->bref.key < best->bref.key) { 1590 /* 1591 * Child has a nearer key and best is not flush with key_beg. 1592 * Set best to child. Truncate key_next to the old best key. 1593 */ 1594 info->best = child; 1595 if (info->key_next > best->bref.key || info->key_next == 0) 1596 info->key_next = best->bref.key; 1597 } else if (child->bref.key == best->bref.key) { 1598 /* 1599 * If our current best is flush with the child then this 1600 * is an illegal overlap. 1601 * 1602 * key_next will automatically be limited to the smaller of 1603 * the two end-points. 1604 */ 1605 KKASSERT(0); 1606 info->best = child; 1607 } else { 1608 /* 1609 * Keep the current best but truncate key_next to the child's 1610 * base. 1611 * 1612 * key_next will also automatically be limited to the smaller 1613 * of the two end-points (probably not necessary for this case 1614 * but we do it anyway). 1615 */ 1616 if (info->key_next > child->bref.key || info->key_next == 0) 1617 info->key_next = child->bref.key; 1618 } 1619 1620 /* 1621 * Always truncate key_next based on child's end-of-range. 1622 */ 1623 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1624 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1625 info->key_next = child_end; 1626 1627 return(0); 1628 } 1629 1630 /* 1631 * Retrieve the specified chain from a media blockref, creating the 1632 * in-memory chain structure which reflects it. 1633 * 1634 * To handle insertion races pass the INSERT_RACE flag along with the 1635 * generation number of the core. NULL will be returned if the generation 1636 * number changes before we have a chance to insert the chain. Insert 1637 * races can occur because the parent might be held shared. 1638 * 1639 * Caller must hold the parent locked shared or exclusive since we may 1640 * need the parent's bref array to find our block. 1641 * 1642 * WARNING! chain->pmp is always set to NULL for any chain representing 1643 * part of the super-root topology. 1644 */ 1645 hammer2_chain_t * 1646 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1647 hammer2_blockref_t *bref) 1648 { 1649 hammer2_dev_t *hmp = parent->hmp; 1650 hammer2_chain_t *chain; 1651 int error; 1652 1653 /* 1654 * Allocate a chain structure representing the existing media 1655 * entry. Resulting chain has one ref and is not locked. 1656 */ 1657 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 1658 chain = hammer2_chain_alloc(hmp, NULL, bref); 1659 else 1660 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 1661 /* ref'd chain returned */ 1662 1663 /* 1664 * Flag that the chain is in the parent's blockmap so delete/flush 1665 * knows what to do with it. 1666 */ 1667 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1668 1669 /* 1670 * Link the chain into its parent. A spinlock is required to safely 1671 * access the RBTREE, and it is possible to collide with another 1672 * hammer2_chain_get() operation because the caller might only hold 1673 * a shared lock on the parent. 1674 */ 1675 KKASSERT(parent->refs > 0); 1676 error = hammer2_chain_insert(parent, chain, 1677 HAMMER2_CHAIN_INSERT_SPIN | 1678 HAMMER2_CHAIN_INSERT_RACE, 1679 generation); 1680 if (error) { 1681 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 1682 kprintf("chain %p get race\n", chain); 1683 hammer2_chain_drop(chain); 1684 chain = NULL; 1685 } else { 1686 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1687 } 1688 1689 /* 1690 * Return our new chain referenced but not locked, or NULL if 1691 * a race occurred. 1692 */ 1693 return (chain); 1694 } 1695 1696 /* 1697 * Lookup initialization/completion API 1698 */ 1699 hammer2_chain_t * 1700 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1701 { 1702 hammer2_chain_ref(parent); 1703 if (flags & HAMMER2_LOOKUP_SHARED) { 1704 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1705 HAMMER2_RESOLVE_SHARED); 1706 } else { 1707 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1708 } 1709 return (parent); 1710 } 1711 1712 void 1713 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1714 { 1715 if (parent) { 1716 hammer2_chain_unlock(parent); 1717 hammer2_chain_drop(parent); 1718 } 1719 } 1720 1721 hammer2_chain_t * 1722 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1723 { 1724 hammer2_chain_t *oparent; 1725 hammer2_chain_t *nparent; 1726 1727 /* 1728 * Be careful of order, oparent must be unlocked before nparent 1729 * is locked below to avoid a deadlock. 1730 */ 1731 oparent = *parentp; 1732 hammer2_spin_ex(&oparent->core.spin); 1733 nparent = oparent->parent; 1734 hammer2_chain_ref(nparent); 1735 hammer2_spin_unex(&oparent->core.spin); 1736 if (oparent) { 1737 hammer2_chain_unlock(oparent); 1738 hammer2_chain_drop(oparent); 1739 oparent = NULL; 1740 } 1741 1742 hammer2_chain_lock(nparent, how); 1743 *parentp = nparent; 1744 1745 return (nparent); 1746 } 1747 1748 /* 1749 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1750 * (*parentp) typically points to an inode but can also point to a related 1751 * indirect block and this function will recurse upwards and find the inode 1752 * again. 1753 * 1754 * (*parentp) must be exclusively locked and referenced and can be an inode 1755 * or an existing indirect block within the inode. 1756 * 1757 * On return (*parentp) will be modified to point at the deepest parent chain 1758 * element encountered during the search, as a helper for an insertion or 1759 * deletion. The new (*parentp) will be locked and referenced and the old 1760 * will be unlocked and dereferenced (no change if they are both the same). 1761 * 1762 * The matching chain will be returned exclusively locked. If NOLOCK is 1763 * requested the chain will be returned only referenced. Note that the 1764 * parent chain must always be locked shared or exclusive, matching the 1765 * HAMMER2_LOOKUP_SHARED flag. We can conceivably lock it SHARED temporarily 1766 * when NOLOCK is specified but that complicates matters if *parentp must 1767 * inherit the chain. 1768 * 1769 * NOLOCK also implies NODATA, since an unlocked chain usually has a NULL 1770 * data pointer or can otherwise be in flux. 1771 * 1772 * NULL is returned if no match was found, but (*parentp) will still 1773 * potentially be adjusted. 1774 * 1775 * If a fatal error occurs (typically an I/O error), a dummy chain is 1776 * returned with chain->error and error-identifying information set. This 1777 * chain will assert if you try to do anything fancy with it. 1778 * 1779 * XXX Depending on where the error occurs we should allow continued iteration. 1780 * 1781 * On return (*key_nextp) will point to an iterative value for key_beg. 1782 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 1783 * 1784 * This function will also recurse up the chain if the key is not within the 1785 * current parent's range. (*parentp) can never be set to NULL. An iteration 1786 * can simply allow (*parentp) to float inside the loop. 1787 * 1788 * NOTE! chain->data is not always resolved. By default it will not be 1789 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1790 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1791 * BREF_TYPE_DATA as the device buffer can alias the logical file 1792 * buffer). 1793 */ 1794 hammer2_chain_t * 1795 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1796 hammer2_key_t key_beg, hammer2_key_t key_end, 1797 int *cache_indexp, int flags) 1798 { 1799 hammer2_dev_t *hmp; 1800 hammer2_chain_t *parent; 1801 hammer2_chain_t *chain; 1802 hammer2_blockref_t *base; 1803 hammer2_blockref_t *bref; 1804 hammer2_blockref_t bcopy; 1805 hammer2_key_t scan_beg; 1806 hammer2_key_t scan_end; 1807 int count = 0; 1808 int how_always = HAMMER2_RESOLVE_ALWAYS; 1809 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1810 int how; 1811 int generation; 1812 int maxloops = 300000; 1813 1814 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1815 how_maybe = how_always; 1816 how = HAMMER2_RESOLVE_ALWAYS; 1817 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1818 how = HAMMER2_RESOLVE_NEVER; 1819 } else { 1820 how = HAMMER2_RESOLVE_MAYBE; 1821 } 1822 if (flags & HAMMER2_LOOKUP_SHARED) { 1823 how_maybe |= HAMMER2_RESOLVE_SHARED; 1824 how_always |= HAMMER2_RESOLVE_SHARED; 1825 how |= HAMMER2_RESOLVE_SHARED; 1826 } 1827 1828 /* 1829 * Recurse (*parentp) upward if necessary until the parent completely 1830 * encloses the key range or we hit the inode. 1831 * 1832 * This function handles races against the flusher doing a delete- 1833 * duplicate above us and re-homes the parent to the duplicate in 1834 * that case, otherwise we'd wind up recursing down a stale chain. 1835 */ 1836 parent = *parentp; 1837 hmp = parent->hmp; 1838 1839 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1840 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1841 scan_beg = parent->bref.key; 1842 scan_end = scan_beg + 1843 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1844 if (key_beg >= scan_beg && key_end <= scan_end) 1845 break; 1846 parent = hammer2_chain_getparent(parentp, how_maybe); 1847 } 1848 1849 again: 1850 if (--maxloops == 0) 1851 panic("hammer2_chain_lookup: maxloops"); 1852 /* 1853 * Locate the blockref array. Currently we do a fully associative 1854 * search through the array. 1855 */ 1856 switch(parent->bref.type) { 1857 case HAMMER2_BREF_TYPE_INODE: 1858 /* 1859 * Special shortcut for embedded data returns the inode 1860 * itself. Callers must detect this condition and access 1861 * the embedded data (the strategy code does this for us). 1862 * 1863 * This is only applicable to regular files and softlinks. 1864 */ 1865 if (parent->data->ipdata.meta.op_flags & 1866 HAMMER2_OPFLAG_DIRECTDATA) { 1867 if (flags & HAMMER2_LOOKUP_NODIRECT) { 1868 chain = NULL; 1869 *key_nextp = key_end + 1; 1870 goto done; 1871 } 1872 hammer2_chain_ref(parent); 1873 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) 1874 hammer2_chain_lock(parent, how_always); 1875 *key_nextp = key_end + 1; 1876 return (parent); 1877 } 1878 base = &parent->data->ipdata.u.blockset.blockref[0]; 1879 count = HAMMER2_SET_COUNT; 1880 break; 1881 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1882 case HAMMER2_BREF_TYPE_INDIRECT: 1883 /* 1884 * Handle MATCHIND on the parent 1885 */ 1886 if (flags & HAMMER2_LOOKUP_MATCHIND) { 1887 scan_beg = parent->bref.key; 1888 scan_end = scan_beg + 1889 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1890 if (key_beg == scan_beg && key_end == scan_end) { 1891 chain = parent; 1892 hammer2_chain_ref(chain); 1893 hammer2_chain_lock(chain, how_maybe); 1894 *key_nextp = scan_end + 1; 1895 goto done; 1896 } 1897 } 1898 /* 1899 * Optimize indirect blocks in the INITIAL state to avoid 1900 * I/O. 1901 */ 1902 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1903 base = NULL; 1904 } else { 1905 if (parent->data == NULL) 1906 panic("parent->data is NULL"); 1907 base = &parent->data->npdata[0]; 1908 } 1909 count = parent->bytes / sizeof(hammer2_blockref_t); 1910 break; 1911 case HAMMER2_BREF_TYPE_VOLUME: 1912 base = &parent->data->voldata.sroot_blockset.blockref[0]; 1913 count = HAMMER2_SET_COUNT; 1914 break; 1915 case HAMMER2_BREF_TYPE_FREEMAP: 1916 base = &parent->data->blkset.blockref[0]; 1917 count = HAMMER2_SET_COUNT; 1918 break; 1919 default: 1920 kprintf("hammer2_chain_lookup: unrecognized " 1921 "blockref(B) type: %d", 1922 parent->bref.type); 1923 while (1) 1924 tsleep(&base, 0, "dead", 0); 1925 panic("hammer2_chain_lookup: unrecognized " 1926 "blockref(B) type: %d", 1927 parent->bref.type); 1928 base = NULL; /* safety */ 1929 count = 0; /* safety */ 1930 } 1931 1932 /* 1933 * Merged scan to find next candidate. 1934 * 1935 * hammer2_base_*() functions require the parent->core.live_* fields 1936 * to be synchronized. 1937 * 1938 * We need to hold the spinlock to access the block array and RB tree 1939 * and to interlock chain creation. 1940 */ 1941 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 1942 hammer2_chain_countbrefs(parent, base, count); 1943 1944 /* 1945 * Combined search 1946 */ 1947 hammer2_spin_ex(&parent->core.spin); 1948 chain = hammer2_combined_find(parent, base, count, 1949 cache_indexp, key_nextp, 1950 key_beg, key_end, 1951 &bref); 1952 generation = parent->core.generation; 1953 1954 /* 1955 * Exhausted parent chain, iterate. 1956 */ 1957 if (bref == NULL) { 1958 hammer2_spin_unex(&parent->core.spin); 1959 if (key_beg == key_end) /* short cut single-key case */ 1960 return (NULL); 1961 1962 /* 1963 * Stop if we reached the end of the iteration. 1964 */ 1965 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 1966 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1967 return (NULL); 1968 } 1969 1970 /* 1971 * Calculate next key, stop if we reached the end of the 1972 * iteration, otherwise go up one level and loop. 1973 */ 1974 key_beg = parent->bref.key + 1975 ((hammer2_key_t)1 << parent->bref.keybits); 1976 if (key_beg == 0 || key_beg > key_end) 1977 return (NULL); 1978 parent = hammer2_chain_getparent(parentp, how_maybe); 1979 goto again; 1980 } 1981 1982 /* 1983 * Selected from blockref or in-memory chain. 1984 */ 1985 if (chain == NULL) { 1986 bcopy = *bref; 1987 hammer2_spin_unex(&parent->core.spin); 1988 chain = hammer2_chain_get(parent, generation, 1989 &bcopy); 1990 if (chain == NULL) { 1991 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 1992 parent, key_beg, key_end); 1993 goto again; 1994 } 1995 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 1996 hammer2_chain_drop(chain); 1997 goto again; 1998 } 1999 } else { 2000 hammer2_chain_ref(chain); 2001 hammer2_spin_unex(&parent->core.spin); 2002 } 2003 2004 /* 2005 * chain is referenced but not locked. We must lock the chain 2006 * to obtain definitive DUPLICATED/DELETED state 2007 */ 2008 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2009 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2010 hammer2_chain_lock(chain, how_maybe); 2011 } else { 2012 hammer2_chain_lock(chain, how); 2013 } 2014 2015 /* 2016 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2017 * 2018 * NOTE: Chain's key range is not relevant as there might be 2019 * one-offs within the range that are not deleted. 2020 * 2021 * NOTE: Lookups can race delete-duplicate because 2022 * delete-duplicate does not lock the parent's core 2023 * (they just use the spinlock on the core). We must 2024 * check for races by comparing the DUPLICATED flag before 2025 * releasing the spinlock with the flag after locking the 2026 * chain. 2027 */ 2028 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2029 hammer2_chain_unlock(chain); 2030 hammer2_chain_drop(chain); 2031 key_beg = *key_nextp; 2032 if (key_beg == 0 || key_beg > key_end) 2033 return(NULL); 2034 goto again; 2035 } 2036 2037 /* 2038 * If the chain element is an indirect block it becomes the new 2039 * parent and we loop on it. We must maintain our top-down locks 2040 * to prevent the flusher from interfering (i.e. doing a 2041 * delete-duplicate and leaving us recursing down a deleted chain). 2042 * 2043 * The parent always has to be locked with at least RESOLVE_MAYBE 2044 * so we can access its data. It might need a fixup if the caller 2045 * passed incompatible flags. Be careful not to cause a deadlock 2046 * as a data-load requires an exclusive lock. 2047 * 2048 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2049 * range is within the requested key range we return the indirect 2050 * block and do NOT loop. This is usually only used to acquire 2051 * freemap nodes. 2052 */ 2053 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2054 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2055 hammer2_chain_unlock(parent); 2056 hammer2_chain_drop(parent); 2057 *parentp = parent = chain; 2058 goto again; 2059 } 2060 done: 2061 /* 2062 * All done, return the chain. 2063 * 2064 * If the caller does not want a locked chain, replace the lock with 2065 * a ref. Perhaps this can eventually be optimized to not obtain the 2066 * lock in the first place for situations where the data does not 2067 * need to be resolved. 2068 */ 2069 if (chain) { 2070 if (flags & HAMMER2_LOOKUP_NOLOCK) 2071 hammer2_chain_unlock(chain); 2072 } 2073 2074 return (chain); 2075 } 2076 2077 /* 2078 * After having issued a lookup we can iterate all matching keys. 2079 * 2080 * If chain is non-NULL we continue the iteration from just after it's index. 2081 * 2082 * If chain is NULL we assume the parent was exhausted and continue the 2083 * iteration at the next parent. 2084 * 2085 * If a fatal error occurs (typically an I/O error), a dummy chain is 2086 * returned with chain->error and error-identifying information set. This 2087 * chain will assert if you try to do anything fancy with it. 2088 * 2089 * XXX Depending on where the error occurs we should allow continued iteration. 2090 * 2091 * parent must be locked on entry and remains locked throughout. chain's 2092 * lock status must match flags. Chain is always at least referenced. 2093 * 2094 * WARNING! The MATCHIND flag does not apply to this function. 2095 */ 2096 hammer2_chain_t * 2097 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2098 hammer2_key_t *key_nextp, 2099 hammer2_key_t key_beg, hammer2_key_t key_end, 2100 int *cache_indexp, int flags) 2101 { 2102 hammer2_chain_t *parent; 2103 int how_maybe; 2104 2105 /* 2106 * Calculate locking flags for upward recursion. 2107 */ 2108 how_maybe = HAMMER2_RESOLVE_MAYBE; 2109 if (flags & HAMMER2_LOOKUP_SHARED) 2110 how_maybe |= HAMMER2_RESOLVE_SHARED; 2111 2112 parent = *parentp; 2113 2114 /* 2115 * Calculate the next index and recalculate the parent if necessary. 2116 */ 2117 if (chain) { 2118 key_beg = chain->bref.key + 2119 ((hammer2_key_t)1 << chain->bref.keybits); 2120 if ((flags & (HAMMER2_LOOKUP_NOLOCK | 2121 HAMMER2_LOOKUP_NOUNLOCK)) == 0) { 2122 hammer2_chain_unlock(chain); 2123 } 2124 hammer2_chain_drop(chain); 2125 2126 /* 2127 * chain invalid past this point, but we can still do a 2128 * pointer comparison w/parent. 2129 * 2130 * Any scan where the lookup returned degenerate data embedded 2131 * in the inode has an invalid index and must terminate. 2132 */ 2133 if (chain == parent) 2134 return(NULL); 2135 if (key_beg == 0 || key_beg > key_end) 2136 return(NULL); 2137 chain = NULL; 2138 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2139 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2140 /* 2141 * We reached the end of the iteration. 2142 */ 2143 return (NULL); 2144 } else { 2145 /* 2146 * Continue iteration with next parent unless the current 2147 * parent covers the range. 2148 */ 2149 key_beg = parent->bref.key + 2150 ((hammer2_key_t)1 << parent->bref.keybits); 2151 if (key_beg == 0 || key_beg > key_end) 2152 return (NULL); 2153 parent = hammer2_chain_getparent(parentp, how_maybe); 2154 } 2155 2156 /* 2157 * And execute 2158 */ 2159 return (hammer2_chain_lookup(parentp, key_nextp, 2160 key_beg, key_end, 2161 cache_indexp, flags)); 2162 } 2163 2164 /* 2165 * The raw scan function is similar to lookup/next but does not seek to a key. 2166 * Blockrefs are iterated via first_bref = (parent, NULL) and 2167 * next_chain = (parent, bref). 2168 * 2169 * The passed-in parent must be locked and its data resolved. The function 2170 * nominally returns a locked and referenced *chainp != NULL for chains 2171 * the caller might need to recurse on (and will dipose of any *chainp passed 2172 * in). The caller must check the chain->bref.type either way. 2173 * 2174 * *chainp is not set for leaf elements. 2175 * 2176 * This function takes a pointer to a stack-based bref structure whos 2177 * contents is updated for each iteration. The same pointer is returned, 2178 * or NULL when the iteration is complete. *firstp must be set to 1 for 2179 * the first ieration. This function will set it to 0. 2180 */ 2181 hammer2_blockref_t * 2182 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2183 hammer2_blockref_t *bref, int *firstp, 2184 int *cache_indexp, int flags) 2185 { 2186 hammer2_dev_t *hmp; 2187 hammer2_blockref_t *base; 2188 hammer2_blockref_t *bref_ptr; 2189 hammer2_key_t key; 2190 hammer2_key_t next_key; 2191 hammer2_chain_t *chain = NULL; 2192 int count = 0; 2193 int how_always = HAMMER2_RESOLVE_ALWAYS; 2194 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2195 int how; 2196 int generation; 2197 int maxloops = 300000; 2198 2199 hmp = parent->hmp; 2200 2201 /* 2202 * Scan flags borrowed from lookup. 2203 */ 2204 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2205 how_maybe = how_always; 2206 how = HAMMER2_RESOLVE_ALWAYS; 2207 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2208 how = HAMMER2_RESOLVE_NEVER; 2209 } else { 2210 how = HAMMER2_RESOLVE_MAYBE; 2211 } 2212 if (flags & HAMMER2_LOOKUP_SHARED) { 2213 how_maybe |= HAMMER2_RESOLVE_SHARED; 2214 how_always |= HAMMER2_RESOLVE_SHARED; 2215 how |= HAMMER2_RESOLVE_SHARED; 2216 } 2217 2218 /* 2219 * Calculate key to locate first/next element, unlocking the previous 2220 * element as we go. Be careful, the key calculation can overflow. 2221 * 2222 * (also reset bref to NULL) 2223 */ 2224 if (*firstp) { 2225 key = 0; 2226 *firstp = 0; 2227 } else { 2228 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2229 if ((chain = *chainp) != NULL) { 2230 *chainp = NULL; 2231 hammer2_chain_unlock(chain); 2232 hammer2_chain_drop(chain); 2233 chain = NULL; 2234 } 2235 if (key == 0) { 2236 bref = NULL; 2237 goto done; 2238 } 2239 } 2240 2241 again: 2242 KKASSERT(parent->error == 0); /* XXX case not handled yet */ 2243 if (--maxloops == 0) 2244 panic("hammer2_chain_scan: maxloops"); 2245 /* 2246 * Locate the blockref array. Currently we do a fully associative 2247 * search through the array. 2248 */ 2249 switch(parent->bref.type) { 2250 case HAMMER2_BREF_TYPE_INODE: 2251 /* 2252 * An inode with embedded data has no sub-chains. 2253 * 2254 * WARNING! Bulk scan code may pass a static chain marked 2255 * as BREF_TYPE_INODE with a copy of the volume 2256 * root blockset to snapshot the volume. 2257 */ 2258 if (parent->data->ipdata.meta.op_flags & 2259 HAMMER2_OPFLAG_DIRECTDATA) { 2260 bref = NULL; 2261 goto done; 2262 } 2263 base = &parent->data->ipdata.u.blockset.blockref[0]; 2264 count = HAMMER2_SET_COUNT; 2265 break; 2266 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2267 case HAMMER2_BREF_TYPE_INDIRECT: 2268 /* 2269 * Optimize indirect blocks in the INITIAL state to avoid 2270 * I/O. 2271 */ 2272 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2273 base = NULL; 2274 } else { 2275 if (parent->data == NULL) 2276 panic("parent->data is NULL"); 2277 base = &parent->data->npdata[0]; 2278 } 2279 count = parent->bytes / sizeof(hammer2_blockref_t); 2280 break; 2281 case HAMMER2_BREF_TYPE_VOLUME: 2282 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2283 count = HAMMER2_SET_COUNT; 2284 break; 2285 case HAMMER2_BREF_TYPE_FREEMAP: 2286 base = &parent->data->blkset.blockref[0]; 2287 count = HAMMER2_SET_COUNT; 2288 break; 2289 default: 2290 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2291 parent->bref.type); 2292 base = NULL; /* safety */ 2293 count = 0; /* safety */ 2294 } 2295 2296 /* 2297 * Merged scan to find next candidate. 2298 * 2299 * hammer2_base_*() functions require the parent->core.live_* fields 2300 * to be synchronized. 2301 * 2302 * We need to hold the spinlock to access the block array and RB tree 2303 * and to interlock chain creation. 2304 */ 2305 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2306 hammer2_chain_countbrefs(parent, base, count); 2307 2308 next_key = 0; 2309 bref_ptr = NULL; 2310 hammer2_spin_ex(&parent->core.spin); 2311 chain = hammer2_combined_find(parent, base, count, 2312 cache_indexp, &next_key, 2313 key, HAMMER2_KEY_MAX, 2314 &bref_ptr); 2315 generation = parent->core.generation; 2316 2317 /* 2318 * Exhausted parent chain, we're done. 2319 */ 2320 if (bref_ptr == NULL) { 2321 hammer2_spin_unex(&parent->core.spin); 2322 KKASSERT(chain == NULL); 2323 bref = NULL; 2324 goto done; 2325 } 2326 2327 /* 2328 * Copy into the supplied stack-based blockref. 2329 */ 2330 *bref = *bref_ptr; 2331 2332 /* 2333 * Selected from blockref or in-memory chain. 2334 */ 2335 if (chain == NULL) { 2336 switch(bref->type) { 2337 case HAMMER2_BREF_TYPE_INODE: 2338 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2339 case HAMMER2_BREF_TYPE_INDIRECT: 2340 case HAMMER2_BREF_TYPE_VOLUME: 2341 case HAMMER2_BREF_TYPE_FREEMAP: 2342 /* 2343 * Recursion, always get the chain 2344 */ 2345 hammer2_spin_unex(&parent->core.spin); 2346 chain = hammer2_chain_get(parent, generation, bref); 2347 if (chain == NULL) { 2348 kprintf("retry scan parent %p keys %016jx\n", 2349 parent, key); 2350 goto again; 2351 } 2352 if (bcmp(bref, bref_ptr, sizeof(*bref))) { 2353 hammer2_chain_drop(chain); 2354 chain = NULL; 2355 goto again; 2356 } 2357 break; 2358 default: 2359 /* 2360 * No recursion, do not waste time instantiating 2361 * a chain, just iterate using the bref. 2362 */ 2363 hammer2_spin_unex(&parent->core.spin); 2364 break; 2365 } 2366 } else { 2367 /* 2368 * Recursion or not we need the chain in order to supply 2369 * the bref. 2370 */ 2371 hammer2_chain_ref(chain); 2372 hammer2_spin_unex(&parent->core.spin); 2373 } 2374 2375 /* 2376 * chain is referenced but not locked. We must lock the chain 2377 * to obtain definitive DUPLICATED/DELETED state 2378 */ 2379 if (chain) 2380 hammer2_chain_lock(chain, how); 2381 2382 /* 2383 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2384 * 2385 * NOTE: chain's key range is not relevant as there might be 2386 * one-offs within the range that are not deleted. 2387 * 2388 * NOTE: XXX this could create problems with scans used in 2389 * situations other than mount-time recovery. 2390 * 2391 * NOTE: Lookups can race delete-duplicate because 2392 * delete-duplicate does not lock the parent's core 2393 * (they just use the spinlock on the core). We must 2394 * check for races by comparing the DUPLICATED flag before 2395 * releasing the spinlock with the flag after locking the 2396 * chain. 2397 */ 2398 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 2399 hammer2_chain_unlock(chain); 2400 hammer2_chain_drop(chain); 2401 chain = NULL; 2402 2403 key = next_key; 2404 if (key == 0) { 2405 bref = NULL; 2406 goto done; 2407 } 2408 goto again; 2409 } 2410 2411 done: 2412 /* 2413 * All done, return the bref or NULL, supply chain if necessary. 2414 */ 2415 if (chain) 2416 *chainp = chain; 2417 return (bref); 2418 } 2419 2420 /* 2421 * Create and return a new hammer2 system memory structure of the specified 2422 * key, type and size and insert it under (*parentp). This is a full 2423 * insertion, based on the supplied key/keybits, and may involve creating 2424 * indirect blocks and moving other chains around via delete/duplicate. 2425 * 2426 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2427 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2428 * FULL. This typically means that the caller is creating the chain after 2429 * doing a hammer2_chain_lookup(). 2430 * 2431 * (*parentp) must be exclusive locked and may be replaced on return 2432 * depending on how much work the function had to do. 2433 * 2434 * (*parentp) must not be errored or this function will assert. 2435 * 2436 * (*chainp) usually starts out NULL and returns the newly created chain, 2437 * but if the caller desires the caller may allocate a disconnected chain 2438 * and pass it in instead. 2439 * 2440 * This function should NOT be used to insert INDIRECT blocks. It is 2441 * typically used to create/insert inodes and data blocks. 2442 * 2443 * Caller must pass-in an exclusively locked parent the new chain is to 2444 * be inserted under, and optionally pass-in a disconnected, exclusively 2445 * locked chain to insert (else we create a new chain). The function will 2446 * adjust (*parentp) as necessary, create or connect the chain, and 2447 * return an exclusively locked chain in *chainp. 2448 * 2449 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 2450 * and will be reassigned. 2451 */ 2452 int 2453 hammer2_chain_create(hammer2_chain_t **parentp, 2454 hammer2_chain_t **chainp, hammer2_pfs_t *pmp, 2455 hammer2_key_t key, int keybits, int type, size_t bytes, 2456 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 2457 { 2458 hammer2_dev_t *hmp; 2459 hammer2_chain_t *chain; 2460 hammer2_chain_t *parent; 2461 hammer2_blockref_t *base; 2462 hammer2_blockref_t dummy; 2463 int allocated = 0; 2464 int error = 0; 2465 int count; 2466 int maxloops = 300000; 2467 2468 /* 2469 * Topology may be crossing a PFS boundary. 2470 */ 2471 parent = *parentp; 2472 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2473 KKASSERT(parent->error == 0); 2474 hmp = parent->hmp; 2475 chain = *chainp; 2476 2477 if (chain == NULL) { 2478 /* 2479 * First allocate media space and construct the dummy bref, 2480 * then allocate the in-memory chain structure. Set the 2481 * INITIAL flag for fresh chains which do not have embedded 2482 * data. 2483 * 2484 * XXX for now set the check mode of the child based on 2485 * the parent or, if the parent is an inode, the 2486 * specification in the inode. 2487 */ 2488 bzero(&dummy, sizeof(dummy)); 2489 dummy.type = type; 2490 dummy.key = key; 2491 dummy.keybits = keybits; 2492 dummy.data_off = hammer2_getradix(bytes); 2493 dummy.methods = parent->bref.methods; 2494 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 2495 parent->data) { 2496 dummy.methods &= ~HAMMER2_ENC_CHECK(-1); 2497 dummy.methods |= HAMMER2_ENC_CHECK( 2498 parent->data->ipdata.meta.check_algo); 2499 } 2500 2501 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 2502 2503 /* 2504 * Lock the chain manually, chain_lock will load the chain 2505 * which we do NOT want to do. (note: chain->refs is set 2506 * to 1 by chain_alloc() for us, but lockcnt is not). 2507 */ 2508 chain->lockcnt = 1; 2509 hammer2_mtx_ex(&chain->lock); 2510 allocated = 1; 2511 ++curthread->td_tracker; 2512 2513 /* 2514 * Set INITIAL to optimize I/O. The flag will generally be 2515 * processed when we call hammer2_chain_modify(). 2516 * 2517 * Recalculate bytes to reflect the actual media block 2518 * allocation. 2519 */ 2520 bytes = (hammer2_off_t)1 << 2521 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2522 chain->bytes = bytes; 2523 2524 switch(type) { 2525 case HAMMER2_BREF_TYPE_VOLUME: 2526 case HAMMER2_BREF_TYPE_FREEMAP: 2527 panic("hammer2_chain_create: called with volume type"); 2528 break; 2529 case HAMMER2_BREF_TYPE_INDIRECT: 2530 panic("hammer2_chain_create: cannot be used to" 2531 "create indirect block"); 2532 break; 2533 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2534 panic("hammer2_chain_create: cannot be used to" 2535 "create freemap root or node"); 2536 break; 2537 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2538 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2539 /* fall through */ 2540 case HAMMER2_BREF_TYPE_INODE: 2541 case HAMMER2_BREF_TYPE_DATA: 2542 default: 2543 /* 2544 * leave chain->data NULL, set INITIAL 2545 */ 2546 KKASSERT(chain->data == NULL); 2547 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2548 break; 2549 } 2550 } else { 2551 /* 2552 * We are reattaching a previously deleted chain, possibly 2553 * under a new parent and possibly with a new key/keybits. 2554 * The chain does not have to be in a modified state. The 2555 * UPDATE flag will be set later on in this routine. 2556 * 2557 * Do NOT mess with the current state of the INITIAL flag. 2558 */ 2559 chain->bref.key = key; 2560 chain->bref.keybits = keybits; 2561 if (chain->flags & HAMMER2_CHAIN_DELETED) 2562 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2563 KKASSERT(chain->parent == NULL); 2564 } 2565 if (flags & HAMMER2_INSERT_PFSROOT) 2566 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 2567 else 2568 chain->bref.flags &= ~HAMMER2_BREF_FLAG_PFSROOT; 2569 2570 /* 2571 * Calculate how many entries we have in the blockref array and 2572 * determine if an indirect block is required. 2573 */ 2574 again: 2575 if (--maxloops == 0) 2576 panic("hammer2_chain_create: maxloops"); 2577 2578 switch(parent->bref.type) { 2579 case HAMMER2_BREF_TYPE_INODE: 2580 KKASSERT((parent->data->ipdata.meta.op_flags & 2581 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2582 KKASSERT(parent->data != NULL); 2583 base = &parent->data->ipdata.u.blockset.blockref[0]; 2584 count = HAMMER2_SET_COUNT; 2585 break; 2586 case HAMMER2_BREF_TYPE_INDIRECT: 2587 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2588 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2589 base = NULL; 2590 else 2591 base = &parent->data->npdata[0]; 2592 count = parent->bytes / sizeof(hammer2_blockref_t); 2593 break; 2594 case HAMMER2_BREF_TYPE_VOLUME: 2595 KKASSERT(parent->data != NULL); 2596 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2597 count = HAMMER2_SET_COUNT; 2598 break; 2599 case HAMMER2_BREF_TYPE_FREEMAP: 2600 KKASSERT(parent->data != NULL); 2601 base = &parent->data->blkset.blockref[0]; 2602 count = HAMMER2_SET_COUNT; 2603 break; 2604 default: 2605 panic("hammer2_chain_create: unrecognized blockref type: %d", 2606 parent->bref.type); 2607 base = NULL; 2608 count = 0; 2609 break; 2610 } 2611 2612 /* 2613 * Make sure we've counted the brefs 2614 */ 2615 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2616 hammer2_chain_countbrefs(parent, base, count); 2617 2618 KKASSERT(parent->core.live_count >= 0 && 2619 parent->core.live_count <= count); 2620 2621 /* 2622 * If no free blockref could be found we must create an indirect 2623 * block and move a number of blockrefs into it. With the parent 2624 * locked we can safely lock each child in order to delete+duplicate 2625 * it without causing a deadlock. 2626 * 2627 * This may return the new indirect block or the old parent depending 2628 * on where the key falls. NULL is returned on error. 2629 */ 2630 if (parent->core.live_count == count) { 2631 hammer2_chain_t *nparent; 2632 2633 nparent = hammer2_chain_create_indirect(parent, key, keybits, 2634 mtid, type, &error); 2635 if (nparent == NULL) { 2636 if (allocated) 2637 hammer2_chain_drop(chain); 2638 chain = NULL; 2639 goto done; 2640 } 2641 if (parent != nparent) { 2642 hammer2_chain_unlock(parent); 2643 hammer2_chain_drop(parent); 2644 parent = *parentp = nparent; 2645 } 2646 goto again; 2647 } 2648 2649 /* 2650 * Link the chain into its parent. 2651 */ 2652 if (chain->parent != NULL) 2653 panic("hammer2: hammer2_chain_create: chain already connected"); 2654 KKASSERT(chain->parent == NULL); 2655 hammer2_chain_insert(parent, chain, 2656 HAMMER2_CHAIN_INSERT_SPIN | 2657 HAMMER2_CHAIN_INSERT_LIVE, 2658 0); 2659 2660 if (allocated) { 2661 /* 2662 * Mark the newly created chain modified. This will cause 2663 * UPDATE to be set and process the INITIAL flag. 2664 * 2665 * Device buffers are not instantiated for DATA elements 2666 * as these are handled by logical buffers. 2667 * 2668 * Indirect and freemap node indirect blocks are handled 2669 * by hammer2_chain_create_indirect() and not by this 2670 * function. 2671 * 2672 * Data for all other bref types is expected to be 2673 * instantiated (INODE, LEAF). 2674 */ 2675 switch(chain->bref.type) { 2676 case HAMMER2_BREF_TYPE_DATA: 2677 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2678 case HAMMER2_BREF_TYPE_INODE: 2679 hammer2_chain_modify(chain, mtid, dedup_off, 2680 HAMMER2_MODIFY_OPTDATA); 2681 break; 2682 default: 2683 /* 2684 * Remaining types are not supported by this function. 2685 * In particular, INDIRECT and LEAF_NODE types are 2686 * handled by create_indirect(). 2687 */ 2688 panic("hammer2_chain_create: bad type: %d", 2689 chain->bref.type); 2690 /* NOT REACHED */ 2691 break; 2692 } 2693 } else { 2694 /* 2695 * When reconnecting a chain we must set UPDATE and 2696 * setflush so the flush recognizes that it must update 2697 * the bref in the parent. 2698 */ 2699 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 2700 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 2701 } 2702 2703 /* 2704 * We must setflush(parent) to ensure that it recurses through to 2705 * chain. setflush(chain) might not work because ONFLUSH is possibly 2706 * already set in the chain (so it won't recurse up to set it in the 2707 * parent). 2708 */ 2709 hammer2_chain_setflush(parent); 2710 2711 done: 2712 *chainp = chain; 2713 2714 return (error); 2715 } 2716 2717 /* 2718 * Move the chain from its old parent to a new parent. The chain must have 2719 * already been deleted or already disconnected (or never associated) with 2720 * a parent. The chain is reassociated with the new parent and the deleted 2721 * flag will be cleared (no longer deleted). The chain's modification state 2722 * is not altered. 2723 * 2724 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 2725 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2726 * FULL. This typically means that the caller is creating the chain after 2727 * doing a hammer2_chain_lookup(). 2728 * 2729 * A non-NULL bref is typically passed when key and keybits must be overridden. 2730 * Note that hammer2_cluster_duplicate() *ONLY* uses the key and keybits fields 2731 * from a passed-in bref and uses the old chain's bref for everything else. 2732 * 2733 * Neither (parent) or (chain) can be errored. 2734 * 2735 * If (parent) is non-NULL then the new duplicated chain is inserted under 2736 * the parent. 2737 * 2738 * If (parent) is NULL then the newly duplicated chain is not inserted 2739 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2740 * passing into hammer2_chain_create() after this function returns). 2741 * 2742 * WARNING! This function calls create which means it can insert indirect 2743 * blocks. This can cause other unrelated chains in the parent to 2744 * be moved to a newly inserted indirect block in addition to the 2745 * specific chain. 2746 */ 2747 void 2748 hammer2_chain_rename(hammer2_blockref_t *bref, 2749 hammer2_chain_t **parentp, hammer2_chain_t *chain, 2750 hammer2_tid_t mtid, int flags) 2751 { 2752 hammer2_dev_t *hmp; 2753 hammer2_chain_t *parent; 2754 size_t bytes; 2755 2756 /* 2757 * WARNING! We should never resolve DATA to device buffers 2758 * (XXX allow it if the caller did?), and since 2759 * we currently do not have the logical buffer cache 2760 * buffer in-hand to fix its cached physical offset 2761 * we also force the modify code to not COW it. XXX 2762 */ 2763 hmp = chain->hmp; 2764 KKASSERT(chain->parent == NULL); 2765 KKASSERT(chain->error == 0); 2766 2767 /* 2768 * Now create a duplicate of the chain structure, associating 2769 * it with the same core, making it the same size, pointing it 2770 * to the same bref (the same media block). 2771 */ 2772 if (bref == NULL) 2773 bref = &chain->bref; 2774 bytes = (hammer2_off_t)1 << 2775 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2776 2777 /* 2778 * If parent is not NULL the duplicated chain will be entered under 2779 * the parent and the UPDATE bit set to tell flush to update 2780 * the blockref. 2781 * 2782 * We must setflush(parent) to ensure that it recurses through to 2783 * chain. setflush(chain) might not work because ONFLUSH is possibly 2784 * already set in the chain (so it won't recurse up to set it in the 2785 * parent). 2786 * 2787 * Having both chains locked is extremely important for atomicy. 2788 */ 2789 if (parentp && (parent = *parentp) != NULL) { 2790 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2791 KKASSERT(parent->refs > 0); 2792 KKASSERT(parent->error == 0); 2793 2794 hammer2_chain_create(parentp, &chain, chain->pmp, 2795 bref->key, bref->keybits, bref->type, 2796 chain->bytes, mtid, 0, flags); 2797 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 2798 hammer2_chain_setflush(*parentp); 2799 } 2800 } 2801 2802 /* 2803 * Helper function for deleting chains. 2804 * 2805 * The chain is removed from the live view (the RBTREE) as well as the parent's 2806 * blockmap. Both chain and its parent must be locked. 2807 * 2808 * parent may not be errored. chain can be errored. 2809 */ 2810 static void 2811 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 2812 hammer2_tid_t mtid, int flags) 2813 { 2814 hammer2_dev_t *hmp; 2815 2816 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2817 HAMMER2_CHAIN_FICTITIOUS)) == 0); 2818 KKASSERT(chain->parent == parent); 2819 hmp = chain->hmp; 2820 2821 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2822 /* 2823 * Chain is blockmapped, so there must be a parent. 2824 * Atomically remove the chain from the parent and remove 2825 * the blockmap entry. The parent must be set modified 2826 * to remove the blockmap entry. 2827 */ 2828 hammer2_blockref_t *base; 2829 int count; 2830 2831 KKASSERT(parent != NULL); 2832 KKASSERT(parent->error == 0); 2833 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 2834 hammer2_chain_modify(parent, mtid, 0, HAMMER2_MODIFY_OPTDATA); 2835 2836 /* 2837 * Calculate blockmap pointer 2838 */ 2839 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2840 hammer2_spin_ex(&parent->core.spin); 2841 2842 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2843 atomic_add_int(&parent->core.live_count, -1); 2844 ++parent->core.generation; 2845 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2846 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2847 --parent->core.chain_count; 2848 chain->parent = NULL; 2849 2850 switch(parent->bref.type) { 2851 case HAMMER2_BREF_TYPE_INODE: 2852 /* 2853 * Access the inode's block array. However, there 2854 * is no block array if the inode is flagged 2855 * DIRECTDATA. The DIRECTDATA case typicaly only 2856 * occurs when a hardlink has been shifted up the 2857 * tree and the original inode gets replaced with 2858 * an OBJTYPE_HARDLINK placeholding inode. 2859 */ 2860 if (parent->data && 2861 (parent->data->ipdata.meta.op_flags & 2862 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 2863 base = 2864 &parent->data->ipdata.u.blockset.blockref[0]; 2865 } else { 2866 base = NULL; 2867 } 2868 count = HAMMER2_SET_COUNT; 2869 break; 2870 case HAMMER2_BREF_TYPE_INDIRECT: 2871 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2872 if (parent->data) 2873 base = &parent->data->npdata[0]; 2874 else 2875 base = NULL; 2876 count = parent->bytes / sizeof(hammer2_blockref_t); 2877 break; 2878 case HAMMER2_BREF_TYPE_VOLUME: 2879 base = &parent->data->voldata. 2880 sroot_blockset.blockref[0]; 2881 count = HAMMER2_SET_COUNT; 2882 break; 2883 case HAMMER2_BREF_TYPE_FREEMAP: 2884 base = &parent->data->blkset.blockref[0]; 2885 count = HAMMER2_SET_COUNT; 2886 break; 2887 default: 2888 base = NULL; 2889 count = 0; 2890 panic("hammer2_flush_pass2: " 2891 "unrecognized blockref type: %d", 2892 parent->bref.type); 2893 } 2894 2895 /* 2896 * delete blockmapped chain from its parent. 2897 * 2898 * The parent is not affected by any statistics in chain 2899 * which are pending synchronization. That is, there is 2900 * nothing to undo in the parent since they have not yet 2901 * been incorporated into the parent. 2902 * 2903 * The parent is affected by statistics stored in inodes. 2904 * Those have already been synchronized, so they must be 2905 * undone. XXX split update possible w/delete in middle? 2906 */ 2907 if (base) { 2908 int cache_index = -1; 2909 hammer2_base_delete(parent, base, count, 2910 &cache_index, chain); 2911 } 2912 hammer2_spin_unex(&parent->core.spin); 2913 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 2914 /* 2915 * Chain is not blockmapped but a parent is present. 2916 * Atomically remove the chain from the parent. There is 2917 * no blockmap entry to remove. 2918 * 2919 * Because chain was associated with a parent but not 2920 * synchronized, the chain's *_count_up fields contain 2921 * inode adjustment statistics which must be undone. 2922 */ 2923 hammer2_spin_ex(&parent->core.spin); 2924 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2925 atomic_add_int(&parent->core.live_count, -1); 2926 ++parent->core.generation; 2927 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2928 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2929 --parent->core.chain_count; 2930 chain->parent = NULL; 2931 hammer2_spin_unex(&parent->core.spin); 2932 } else { 2933 /* 2934 * Chain is not blockmapped and has no parent. This 2935 * is a degenerate case. 2936 */ 2937 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2938 } 2939 } 2940 2941 /* 2942 * Create an indirect block that covers one or more of the elements in the 2943 * current parent. Either returns the existing parent with no locking or 2944 * ref changes or returns the new indirect block locked and referenced 2945 * and leaving the original parent lock/ref intact as well. 2946 * 2947 * If an error occurs, NULL is returned and *errorp is set to the error. 2948 * 2949 * The returned chain depends on where the specified key falls. 2950 * 2951 * The key/keybits for the indirect mode only needs to follow three rules: 2952 * 2953 * (1) That all elements underneath it fit within its key space and 2954 * 2955 * (2) That all elements outside it are outside its key space. 2956 * 2957 * (3) When creating the new indirect block any elements in the current 2958 * parent that fit within the new indirect block's keyspace must be 2959 * moved into the new indirect block. 2960 * 2961 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 2962 * keyspace the the current parent, but lookup/iteration rules will 2963 * ensure (and must ensure) that rule (2) for all parents leading up 2964 * to the nearest inode or the root volume header is adhered to. This 2965 * is accomplished by always recursing through matching keyspaces in 2966 * the hammer2_chain_lookup() and hammer2_chain_next() API. 2967 * 2968 * The current implementation calculates the current worst-case keyspace by 2969 * iterating the current parent and then divides it into two halves, choosing 2970 * whichever half has the most elements (not necessarily the half containing 2971 * the requested key). 2972 * 2973 * We can also opt to use the half with the least number of elements. This 2974 * causes lower-numbered keys (aka logical file offsets) to recurse through 2975 * fewer indirect blocks and higher-numbered keys to recurse through more. 2976 * This also has the risk of not moving enough elements to the new indirect 2977 * block and being forced to create several indirect blocks before the element 2978 * can be inserted. 2979 * 2980 * Must be called with an exclusively locked parent. 2981 */ 2982 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 2983 hammer2_key_t *keyp, int keybits, 2984 hammer2_blockref_t *base, int count); 2985 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 2986 hammer2_key_t *keyp, int keybits, 2987 hammer2_blockref_t *base, int count); 2988 static 2989 hammer2_chain_t * 2990 hammer2_chain_create_indirect(hammer2_chain_t *parent, 2991 hammer2_key_t create_key, int create_bits, 2992 hammer2_tid_t mtid, int for_type, int *errorp) 2993 { 2994 hammer2_dev_t *hmp; 2995 hammer2_blockref_t *base; 2996 hammer2_blockref_t *bref; 2997 hammer2_blockref_t bcopy; 2998 hammer2_chain_t *chain; 2999 hammer2_chain_t *ichain; 3000 hammer2_chain_t dummy; 3001 hammer2_key_t key = create_key; 3002 hammer2_key_t key_beg; 3003 hammer2_key_t key_end; 3004 hammer2_key_t key_next; 3005 int keybits = create_bits; 3006 int count; 3007 int nbytes; 3008 int cache_index; 3009 int loops; 3010 int reason; 3011 int generation; 3012 int maxloops = 300000; 3013 3014 /* 3015 * Calculate the base blockref pointer or NULL if the chain 3016 * is known to be empty. We need to calculate the array count 3017 * for RB lookups either way. 3018 */ 3019 hmp = parent->hmp; 3020 *errorp = 0; 3021 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3022 3023 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3024 base = hammer2_chain_base_and_count(parent, &count); 3025 3026 /* 3027 * dummy used in later chain allocation (no longer used for lookups). 3028 */ 3029 bzero(&dummy, sizeof(dummy)); 3030 3031 /* 3032 * When creating an indirect block for a freemap node or leaf 3033 * the key/keybits must be fitted to static radix levels because 3034 * particular radix levels use particular reserved blocks in the 3035 * related zone. 3036 * 3037 * This routine calculates the key/radix of the indirect block 3038 * we need to create, and whether it is on the high-side or the 3039 * low-side. 3040 */ 3041 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3042 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3043 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3044 base, count); 3045 } else { 3046 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 3047 base, count); 3048 } 3049 3050 /* 3051 * Normalize the key for the radix being represented, keeping the 3052 * high bits and throwing away the low bits. 3053 */ 3054 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3055 3056 /* 3057 * How big should our new indirect block be? It has to be at least 3058 * as large as its parent. 3059 * 3060 * The freemap uses a specific indirect block size. 3061 * 3062 * The first indirect block level down from an inode typically 3063 * uses LBUFSIZE (16384), else it uses PBUFSIZE (65536). 3064 */ 3065 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3066 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3067 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3068 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3069 nbytes = HAMMER2_IND_BYTES_MIN; 3070 } else { 3071 nbytes = HAMMER2_IND_BYTES_MAX; 3072 } 3073 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3074 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3075 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3076 nbytes = count * sizeof(hammer2_blockref_t); 3077 } 3078 3079 /* 3080 * Ok, create our new indirect block 3081 */ 3082 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3083 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3084 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3085 } else { 3086 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 3087 } 3088 dummy.bref.key = key; 3089 dummy.bref.keybits = keybits; 3090 dummy.bref.data_off = hammer2_getradix(nbytes); 3091 dummy.bref.methods = parent->bref.methods; 3092 3093 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 3094 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3095 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3096 /* ichain has one ref at this point */ 3097 3098 /* 3099 * We have to mark it modified to allocate its block, but use 3100 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3101 * it won't be acted upon by the flush code. 3102 */ 3103 hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 3104 3105 /* 3106 * Iterate the original parent and move the matching brefs into 3107 * the new indirect block. 3108 * 3109 * XXX handle flushes. 3110 */ 3111 key_beg = 0; 3112 key_end = HAMMER2_KEY_MAX; 3113 cache_index = 0; 3114 hammer2_spin_ex(&parent->core.spin); 3115 loops = 0; 3116 reason = 0; 3117 3118 for (;;) { 3119 /* 3120 * Parent may have been modified, relocating its block array. 3121 * Reload the base pointer. 3122 */ 3123 base = hammer2_chain_base_and_count(parent, &count); 3124 3125 if (++loops > 100000) { 3126 hammer2_spin_unex(&parent->core.spin); 3127 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3128 reason, parent, base, count, key_next); 3129 } 3130 3131 /* 3132 * NOTE: spinlock stays intact, returned chain (if not NULL) 3133 * is not referenced or locked which means that we 3134 * cannot safely check its flagged / deletion status 3135 * until we lock it. 3136 */ 3137 chain = hammer2_combined_find(parent, base, count, 3138 &cache_index, &key_next, 3139 key_beg, key_end, 3140 &bref); 3141 generation = parent->core.generation; 3142 if (bref == NULL) 3143 break; 3144 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3145 3146 /* 3147 * Skip keys that are not within the key/radix of the new 3148 * indirect block. They stay in the parent. 3149 */ 3150 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3151 (key ^ bref->key)) != 0) { 3152 goto next_key_spinlocked; 3153 } 3154 3155 /* 3156 * Load the new indirect block by acquiring the related 3157 * chains (potentially from media as it might not be 3158 * in-memory). Then move it to the new parent (ichain) 3159 * via DELETE-DUPLICATE. 3160 * 3161 * chain is referenced but not locked. We must lock the 3162 * chain to obtain definitive DUPLICATED/DELETED state 3163 */ 3164 if (chain) { 3165 /* 3166 * Use chain already present in the RBTREE 3167 */ 3168 hammer2_chain_ref(chain); 3169 hammer2_spin_unex(&parent->core.spin); 3170 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3171 } else { 3172 /* 3173 * Get chain for blockref element. _get returns NULL 3174 * on insertion race. 3175 */ 3176 bcopy = *bref; 3177 hammer2_spin_unex(&parent->core.spin); 3178 chain = hammer2_chain_get(parent, generation, &bcopy); 3179 if (chain == NULL) { 3180 reason = 1; 3181 hammer2_spin_ex(&parent->core.spin); 3182 continue; 3183 } 3184 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3185 kprintf("REASON 2\n"); 3186 reason = 2; 3187 hammer2_chain_drop(chain); 3188 hammer2_spin_ex(&parent->core.spin); 3189 continue; 3190 } 3191 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3192 } 3193 3194 /* 3195 * This is always live so if the chain has been deleted 3196 * we raced someone and we have to retry. 3197 * 3198 * NOTE: Lookups can race delete-duplicate because 3199 * delete-duplicate does not lock the parent's core 3200 * (they just use the spinlock on the core). We must 3201 * check for races by comparing the DUPLICATED flag before 3202 * releasing the spinlock with the flag after locking the 3203 * chain. 3204 * 3205 * (note reversed logic for this one) 3206 */ 3207 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3208 hammer2_chain_unlock(chain); 3209 hammer2_chain_drop(chain); 3210 goto next_key; 3211 } 3212 3213 /* 3214 * Shift the chain to the indirect block. 3215 * 3216 * WARNING! No reason for us to load chain data, pass NOSTATS 3217 * to prevent delete/insert from trying to access 3218 * inode stats (and thus asserting if there is no 3219 * chain->data loaded). 3220 * 3221 * WARNING! The (parent, chain) deletion may modify the parent 3222 * and invalidate the base pointer. 3223 */ 3224 hammer2_chain_delete(parent, chain, mtid, 0); 3225 hammer2_chain_rename(NULL, &ichain, chain, mtid, 0); 3226 hammer2_chain_unlock(chain); 3227 hammer2_chain_drop(chain); 3228 KKASSERT(parent->refs > 0); 3229 chain = NULL; 3230 base = NULL; /* safety */ 3231 next_key: 3232 hammer2_spin_ex(&parent->core.spin); 3233 next_key_spinlocked: 3234 if (--maxloops == 0) 3235 panic("hammer2_chain_create_indirect: maxloops"); 3236 reason = 4; 3237 if (key_next == 0 || key_next > key_end) 3238 break; 3239 key_beg = key_next; 3240 /* loop */ 3241 } 3242 hammer2_spin_unex(&parent->core.spin); 3243 3244 /* 3245 * Insert the new indirect block into the parent now that we've 3246 * cleared out some entries in the parent. We calculated a good 3247 * insertion index in the loop above (ichain->index). 3248 * 3249 * We don't have to set UPDATE here because we mark ichain 3250 * modified down below (so the normal modified -> flush -> set-moved 3251 * sequence applies). 3252 * 3253 * The insertion shouldn't race as this is a completely new block 3254 * and the parent is locked. 3255 */ 3256 base = NULL; /* safety, parent modify may change address */ 3257 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3258 hammer2_chain_insert(parent, ichain, 3259 HAMMER2_CHAIN_INSERT_SPIN | 3260 HAMMER2_CHAIN_INSERT_LIVE, 3261 0); 3262 3263 /* 3264 * Make sure flushes propogate after our manual insertion. 3265 */ 3266 hammer2_chain_setflush(ichain); 3267 hammer2_chain_setflush(parent); 3268 3269 /* 3270 * Figure out what to return. 3271 */ 3272 if (~(((hammer2_key_t)1 << keybits) - 1) & 3273 (create_key ^ key)) { 3274 /* 3275 * Key being created is outside the key range, 3276 * return the original parent. 3277 */ 3278 hammer2_chain_unlock(ichain); 3279 hammer2_chain_drop(ichain); 3280 } else { 3281 /* 3282 * Otherwise its in the range, return the new parent. 3283 * (leave both the new and old parent locked). 3284 */ 3285 parent = ichain; 3286 } 3287 3288 return(parent); 3289 } 3290 3291 /* 3292 * Calculate the keybits and highside/lowside of the freemap node the 3293 * caller is creating. 3294 * 3295 * This routine will specify the next higher-level freemap key/radix 3296 * representing the lowest-ordered set. By doing so, eventually all 3297 * low-ordered sets will be moved one level down. 3298 * 3299 * We have to be careful here because the freemap reserves a limited 3300 * number of blocks for a limited number of levels. So we can't just 3301 * push indiscriminately. 3302 */ 3303 int 3304 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3305 int keybits, hammer2_blockref_t *base, int count) 3306 { 3307 hammer2_chain_t *chain; 3308 hammer2_blockref_t *bref; 3309 hammer2_key_t key; 3310 hammer2_key_t key_beg; 3311 hammer2_key_t key_end; 3312 hammer2_key_t key_next; 3313 int cache_index; 3314 int locount; 3315 int hicount; 3316 int maxloops = 300000; 3317 3318 key = *keyp; 3319 locount = 0; 3320 hicount = 0; 3321 keybits = 64; 3322 3323 /* 3324 * Calculate the range of keys in the array being careful to skip 3325 * slots which are overridden with a deletion. 3326 */ 3327 key_beg = 0; 3328 key_end = HAMMER2_KEY_MAX; 3329 cache_index = 0; 3330 hammer2_spin_ex(&parent->core.spin); 3331 3332 for (;;) { 3333 if (--maxloops == 0) { 3334 panic("indkey_freemap shit %p %p:%d\n", 3335 parent, base, count); 3336 } 3337 chain = hammer2_combined_find(parent, base, count, 3338 &cache_index, &key_next, 3339 key_beg, key_end, 3340 &bref); 3341 3342 /* 3343 * Exhausted search 3344 */ 3345 if (bref == NULL) 3346 break; 3347 3348 /* 3349 * Skip deleted chains. 3350 */ 3351 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3352 if (key_next == 0 || key_next > key_end) 3353 break; 3354 key_beg = key_next; 3355 continue; 3356 } 3357 3358 /* 3359 * Use the full live (not deleted) element for the scan 3360 * iteration. HAMMER2 does not allow partial replacements. 3361 * 3362 * XXX should be built into hammer2_combined_find(). 3363 */ 3364 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3365 3366 if (keybits > bref->keybits) { 3367 key = bref->key; 3368 keybits = bref->keybits; 3369 } else if (keybits == bref->keybits && bref->key < key) { 3370 key = bref->key; 3371 } 3372 if (key_next == 0) 3373 break; 3374 key_beg = key_next; 3375 } 3376 hammer2_spin_unex(&parent->core.spin); 3377 3378 /* 3379 * Return the keybits for a higher-level FREEMAP_NODE covering 3380 * this node. 3381 */ 3382 switch(keybits) { 3383 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3384 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3385 break; 3386 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3387 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3388 break; 3389 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3390 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3391 break; 3392 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3393 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3394 break; 3395 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3396 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 3397 break; 3398 case HAMMER2_FREEMAP_LEVEL5_RADIX: 3399 panic("hammer2_chain_indkey_freemap: level too high"); 3400 break; 3401 default: 3402 panic("hammer2_chain_indkey_freemap: bad radix"); 3403 break; 3404 } 3405 *keyp = key; 3406 3407 return (keybits); 3408 } 3409 3410 /* 3411 * Calculate the keybits and highside/lowside of the indirect block the 3412 * caller is creating. 3413 */ 3414 static int 3415 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3416 int keybits, hammer2_blockref_t *base, int count) 3417 { 3418 hammer2_blockref_t *bref; 3419 hammer2_chain_t *chain; 3420 hammer2_key_t key_beg; 3421 hammer2_key_t key_end; 3422 hammer2_key_t key_next; 3423 hammer2_key_t key; 3424 int nkeybits; 3425 int locount; 3426 int hicount; 3427 int cache_index; 3428 int maxloops = 300000; 3429 3430 key = *keyp; 3431 locount = 0; 3432 hicount = 0; 3433 3434 /* 3435 * Calculate the range of keys in the array being careful to skip 3436 * slots which are overridden with a deletion. Once the scan 3437 * completes we will cut the key range in half and shift half the 3438 * range into the new indirect block. 3439 */ 3440 key_beg = 0; 3441 key_end = HAMMER2_KEY_MAX; 3442 cache_index = 0; 3443 hammer2_spin_ex(&parent->core.spin); 3444 3445 for (;;) { 3446 if (--maxloops == 0) { 3447 panic("indkey_freemap shit %p %p:%d\n", 3448 parent, base, count); 3449 } 3450 chain = hammer2_combined_find(parent, base, count, 3451 &cache_index, &key_next, 3452 key_beg, key_end, 3453 &bref); 3454 3455 /* 3456 * Exhausted search 3457 */ 3458 if (bref == NULL) 3459 break; 3460 3461 /* 3462 * NOTE: No need to check DUPLICATED here because we do 3463 * not release the spinlock. 3464 */ 3465 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3466 if (key_next == 0 || key_next > key_end) 3467 break; 3468 key_beg = key_next; 3469 continue; 3470 } 3471 3472 /* 3473 * Use the full live (not deleted) element for the scan 3474 * iteration. HAMMER2 does not allow partial replacements. 3475 * 3476 * XXX should be built into hammer2_combined_find(). 3477 */ 3478 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3479 3480 /* 3481 * Expand our calculated key range (key, keybits) to fit 3482 * the scanned key. nkeybits represents the full range 3483 * that we will later cut in half (two halves @ nkeybits - 1). 3484 */ 3485 nkeybits = keybits; 3486 if (nkeybits < bref->keybits) { 3487 if (bref->keybits > 64) { 3488 kprintf("bad bref chain %p bref %p\n", 3489 chain, bref); 3490 Debugger("fubar"); 3491 } 3492 nkeybits = bref->keybits; 3493 } 3494 while (nkeybits < 64 && 3495 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3496 (key ^ bref->key)) != 0) { 3497 ++nkeybits; 3498 } 3499 3500 /* 3501 * If the new key range is larger we have to determine 3502 * which side of the new key range the existing keys fall 3503 * under by checking the high bit, then collapsing the 3504 * locount into the hicount or vise-versa. 3505 */ 3506 if (keybits != nkeybits) { 3507 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3508 hicount += locount; 3509 locount = 0; 3510 } else { 3511 locount += hicount; 3512 hicount = 0; 3513 } 3514 keybits = nkeybits; 3515 } 3516 3517 /* 3518 * The newly scanned key will be in the lower half or the 3519 * upper half of the (new) key range. 3520 */ 3521 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3522 ++hicount; 3523 else 3524 ++locount; 3525 3526 if (key_next == 0) 3527 break; 3528 key_beg = key_next; 3529 } 3530 hammer2_spin_unex(&parent->core.spin); 3531 bref = NULL; /* now invalid (safety) */ 3532 3533 /* 3534 * Adjust keybits to represent half of the full range calculated 3535 * above (radix 63 max) 3536 */ 3537 --keybits; 3538 3539 /* 3540 * Select whichever half contains the most elements. Theoretically 3541 * we can select either side as long as it contains at least one 3542 * element (in order to ensure that a free slot is present to hold 3543 * the indirect block). 3544 */ 3545 if (hammer2_indirect_optimize) { 3546 /* 3547 * Insert node for least number of keys, this will arrange 3548 * the first few blocks of a large file or the first few 3549 * inodes in a directory with fewer indirect blocks when 3550 * created linearly. 3551 */ 3552 if (hicount < locount && hicount != 0) 3553 key |= (hammer2_key_t)1 << keybits; 3554 else 3555 key &= ~(hammer2_key_t)1 << keybits; 3556 } else { 3557 /* 3558 * Insert node for most number of keys, best for heavily 3559 * fragmented files. 3560 */ 3561 if (hicount > locount) 3562 key |= (hammer2_key_t)1 << keybits; 3563 else 3564 key &= ~(hammer2_key_t)1 << keybits; 3565 } 3566 *keyp = key; 3567 3568 return (keybits); 3569 } 3570 3571 /* 3572 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 3573 * it exists. 3574 * 3575 * Both parent and chain must be locked exclusively. 3576 * 3577 * This function will modify the parent if the blockref requires removal 3578 * from the parent's block table. 3579 * 3580 * This function is NOT recursive. Any entity already pushed into the 3581 * chain (such as an inode) may still need visibility into its contents, 3582 * as well as the ability to read and modify the contents. For example, 3583 * for an unlinked file which is still open. 3584 */ 3585 void 3586 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 3587 hammer2_tid_t mtid, int flags) 3588 { 3589 KKASSERT(hammer2_mtx_owned(&chain->lock)); 3590 3591 /* 3592 * Nothing to do if already marked. 3593 * 3594 * We need the spinlock on the core whos RBTREE contains chain 3595 * to protect against races. 3596 */ 3597 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 3598 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 3599 chain->parent == parent); 3600 _hammer2_chain_delete_helper(parent, chain, mtid, flags); 3601 } 3602 3603 /* 3604 * Permanent deletions mark the chain as destroyed. H 3605 */ 3606 if (flags & HAMMER2_DELETE_PERMANENT) { 3607 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 3608 } else { 3609 /* XXX might not be needed */ 3610 hammer2_chain_setflush(chain); 3611 } 3612 } 3613 3614 /* 3615 * Returns the index of the nearest element in the blockref array >= elm. 3616 * Returns (count) if no element could be found. 3617 * 3618 * Sets *key_nextp to the next key for loop purposes but does not modify 3619 * it if the next key would be higher than the current value of *key_nextp. 3620 * Note that *key_nexp can overflow to 0, which should be tested by the 3621 * caller. 3622 * 3623 * (*cache_indexp) is a heuristic and can be any value without effecting 3624 * the result. 3625 * 3626 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3627 * held through the operation. 3628 */ 3629 static int 3630 hammer2_base_find(hammer2_chain_t *parent, 3631 hammer2_blockref_t *base, int count, 3632 int *cache_indexp, hammer2_key_t *key_nextp, 3633 hammer2_key_t key_beg, hammer2_key_t key_end) 3634 { 3635 hammer2_blockref_t *scan; 3636 hammer2_key_t scan_end; 3637 int i; 3638 int limit; 3639 3640 /* 3641 * Require the live chain's already have their core's counted 3642 * so we can optimize operations. 3643 */ 3644 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 3645 3646 /* 3647 * Degenerate case 3648 */ 3649 if (count == 0 || base == NULL) 3650 return(count); 3651 3652 /* 3653 * Sequential optimization using *cache_indexp. This is the most 3654 * likely scenario. 3655 * 3656 * We can avoid trailing empty entries on live chains, otherwise 3657 * we might have to check the whole block array. 3658 */ 3659 i = *cache_indexp; 3660 cpu_ccfence(); 3661 limit = parent->core.live_zero; 3662 if (i >= limit) 3663 i = limit - 1; 3664 if (i < 0) 3665 i = 0; 3666 KKASSERT(i < count); 3667 3668 /* 3669 * Search backwards 3670 */ 3671 scan = &base[i]; 3672 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 3673 --scan; 3674 --i; 3675 } 3676 *cache_indexp = i; 3677 3678 /* 3679 * Search forwards, stop when we find a scan element which 3680 * encloses the key or until we know that there are no further 3681 * elements. 3682 */ 3683 while (i < count) { 3684 if (scan->type != 0) { 3685 scan_end = scan->key + 3686 ((hammer2_key_t)1 << scan->keybits) - 1; 3687 if (scan->key > key_beg || scan_end >= key_beg) 3688 break; 3689 } 3690 if (i >= limit) 3691 return (count); 3692 ++scan; 3693 ++i; 3694 } 3695 if (i != count) { 3696 *cache_indexp = i; 3697 if (i >= limit) { 3698 i = count; 3699 } else { 3700 scan_end = scan->key + 3701 ((hammer2_key_t)1 << scan->keybits); 3702 if (scan_end && (*key_nextp > scan_end || 3703 *key_nextp == 0)) { 3704 *key_nextp = scan_end; 3705 } 3706 } 3707 } 3708 return (i); 3709 } 3710 3711 /* 3712 * Do a combined search and return the next match either from the blockref 3713 * array or from the in-memory chain. Sets *bresp to the returned bref in 3714 * both cases, or sets it to NULL if the search exhausted. Only returns 3715 * a non-NULL chain if the search matched from the in-memory chain. 3716 * 3717 * When no in-memory chain has been found and a non-NULL bref is returned 3718 * in *bresp. 3719 * 3720 * 3721 * The returned chain is not locked or referenced. Use the returned bref 3722 * to determine if the search exhausted or not. Iterate if the base find 3723 * is chosen but matches a deleted chain. 3724 * 3725 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3726 * held through the operation. 3727 */ 3728 static hammer2_chain_t * 3729 hammer2_combined_find(hammer2_chain_t *parent, 3730 hammer2_blockref_t *base, int count, 3731 int *cache_indexp, hammer2_key_t *key_nextp, 3732 hammer2_key_t key_beg, hammer2_key_t key_end, 3733 hammer2_blockref_t **bresp) 3734 { 3735 hammer2_blockref_t *bref; 3736 hammer2_chain_t *chain; 3737 int i; 3738 3739 /* 3740 * Lookup in block array and in rbtree. 3741 */ 3742 *key_nextp = key_end + 1; 3743 i = hammer2_base_find(parent, base, count, cache_indexp, 3744 key_nextp, key_beg, key_end); 3745 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 3746 3747 /* 3748 * Neither matched 3749 */ 3750 if (i == count && chain == NULL) { 3751 *bresp = NULL; 3752 return(NULL); 3753 } 3754 3755 /* 3756 * Only chain matched. 3757 */ 3758 if (i == count) { 3759 bref = &chain->bref; 3760 goto found; 3761 } 3762 3763 /* 3764 * Only blockref matched. 3765 */ 3766 if (chain == NULL) { 3767 bref = &base[i]; 3768 goto found; 3769 } 3770 3771 /* 3772 * Both in-memory and blockref matched, select the nearer element. 3773 * 3774 * If both are flush with the left-hand side or both are the 3775 * same distance away, select the chain. In this situation the 3776 * chain must have been loaded from the matching blockmap. 3777 */ 3778 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 3779 chain->bref.key == base[i].key) { 3780 KKASSERT(chain->bref.key == base[i].key); 3781 bref = &chain->bref; 3782 goto found; 3783 } 3784 3785 /* 3786 * Select the nearer key 3787 */ 3788 if (chain->bref.key < base[i].key) { 3789 bref = &chain->bref; 3790 } else { 3791 bref = &base[i]; 3792 chain = NULL; 3793 } 3794 3795 /* 3796 * If the bref is out of bounds we've exhausted our search. 3797 */ 3798 found: 3799 if (bref->key > key_end) { 3800 *bresp = NULL; 3801 chain = NULL; 3802 } else { 3803 *bresp = bref; 3804 } 3805 return(chain); 3806 } 3807 3808 /* 3809 * Locate the specified block array element and delete it. The element 3810 * must exist. 3811 * 3812 * The spin lock on the related chain must be held. 3813 * 3814 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3815 * need to be adjusted when we commit the media change. 3816 */ 3817 void 3818 hammer2_base_delete(hammer2_chain_t *parent, 3819 hammer2_blockref_t *base, int count, 3820 int *cache_indexp, hammer2_chain_t *chain) 3821 { 3822 hammer2_blockref_t *elm = &chain->bref; 3823 hammer2_key_t key_next; 3824 int i; 3825 3826 /* 3827 * Delete element. Expect the element to exist. 3828 * 3829 * XXX see caller, flush code not yet sophisticated enough to prevent 3830 * re-flushed in some cases. 3831 */ 3832 key_next = 0; /* max range */ 3833 i = hammer2_base_find(parent, base, count, cache_indexp, 3834 &key_next, elm->key, elm->key); 3835 if (i == count || base[i].type == 0 || 3836 base[i].key != elm->key || 3837 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 3838 base[i].keybits != elm->keybits)) { 3839 hammer2_spin_unex(&parent->core.spin); 3840 panic("delete base %p element not found at %d/%d elm %p\n", 3841 base, i, count, elm); 3842 return; 3843 } 3844 3845 /* 3846 * Update stats and zero the entry 3847 */ 3848 parent->bref.data_count -= base[i].data_count; 3849 parent->bref.data_count -= (hammer2_off_t)1 << 3850 (int)(base[i].data_off & HAMMER2_OFF_MASK_RADIX); 3851 parent->bref.inode_count -= base[i].inode_count; 3852 if (base[i].type == HAMMER2_BREF_TYPE_INODE) 3853 parent->bref.inode_count -= 1; 3854 3855 bzero(&base[i], sizeof(*base)); 3856 3857 /* 3858 * We can only optimize parent->core.live_zero for live chains. 3859 */ 3860 if (parent->core.live_zero == i + 1) { 3861 while (--i >= 0 && base[i].type == 0) 3862 ; 3863 parent->core.live_zero = i + 1; 3864 } 3865 3866 /* 3867 * Clear appropriate blockmap flags in chain. 3868 */ 3869 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 3870 HAMMER2_CHAIN_BMAPUPD); 3871 } 3872 3873 /* 3874 * Insert the specified element. The block array must not already have the 3875 * element and must have space available for the insertion. 3876 * 3877 * The spin lock on the related chain must be held. 3878 * 3879 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3880 * need to be adjusted when we commit the media change. 3881 */ 3882 void 3883 hammer2_base_insert(hammer2_chain_t *parent, 3884 hammer2_blockref_t *base, int count, 3885 int *cache_indexp, hammer2_chain_t *chain) 3886 { 3887 hammer2_blockref_t *elm = &chain->bref; 3888 hammer2_key_t key_next; 3889 hammer2_key_t xkey; 3890 int i; 3891 int j; 3892 int k; 3893 int l; 3894 int u = 1; 3895 3896 /* 3897 * Insert new element. Expect the element to not already exist 3898 * unless we are replacing it. 3899 * 3900 * XXX see caller, flush code not yet sophisticated enough to prevent 3901 * re-flushed in some cases. 3902 */ 3903 key_next = 0; /* max range */ 3904 i = hammer2_base_find(parent, base, count, cache_indexp, 3905 &key_next, elm->key, elm->key); 3906 3907 /* 3908 * Shortcut fill optimization, typical ordered insertion(s) may not 3909 * require a search. 3910 */ 3911 KKASSERT(i >= 0 && i <= count); 3912 3913 /* 3914 * Set appropriate blockmap flags in chain. 3915 */ 3916 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 3917 3918 /* 3919 * Update stats and zero the entry 3920 */ 3921 parent->bref.data_count += elm->data_count; 3922 parent->bref.data_count += (hammer2_off_t)1 << 3923 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 3924 parent->bref.inode_count += elm->inode_count; 3925 if (elm->type == HAMMER2_BREF_TYPE_INODE) 3926 parent->bref.inode_count += 1; 3927 3928 3929 /* 3930 * We can only optimize parent->core.live_zero for live chains. 3931 */ 3932 if (i == count && parent->core.live_zero < count) { 3933 i = parent->core.live_zero++; 3934 base[i] = *elm; 3935 return; 3936 } 3937 3938 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 3939 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 3940 hammer2_spin_unex(&parent->core.spin); 3941 panic("insert base %p overlapping elements at %d elm %p\n", 3942 base, i, elm); 3943 } 3944 3945 /* 3946 * Try to find an empty slot before or after. 3947 */ 3948 j = i; 3949 k = i; 3950 while (j > 0 || k < count) { 3951 --j; 3952 if (j >= 0 && base[j].type == 0) { 3953 if (j == i - 1) { 3954 base[j] = *elm; 3955 } else { 3956 bcopy(&base[j+1], &base[j], 3957 (i - j - 1) * sizeof(*base)); 3958 base[i - 1] = *elm; 3959 } 3960 goto validate; 3961 } 3962 ++k; 3963 if (k < count && base[k].type == 0) { 3964 bcopy(&base[i], &base[i+1], 3965 (k - i) * sizeof(hammer2_blockref_t)); 3966 base[i] = *elm; 3967 3968 /* 3969 * We can only update parent->core.live_zero for live 3970 * chains. 3971 */ 3972 if (parent->core.live_zero <= k) 3973 parent->core.live_zero = k + 1; 3974 u = 2; 3975 goto validate; 3976 } 3977 } 3978 panic("hammer2_base_insert: no room!"); 3979 3980 /* 3981 * Debugging 3982 */ 3983 validate: 3984 key_next = 0; 3985 for (l = 0; l < count; ++l) { 3986 if (base[l].type) { 3987 key_next = base[l].key + 3988 ((hammer2_key_t)1 << base[l].keybits) - 1; 3989 break; 3990 } 3991 } 3992 while (++l < count) { 3993 if (base[l].type) { 3994 if (base[l].key <= key_next) 3995 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 3996 key_next = base[l].key + 3997 ((hammer2_key_t)1 << base[l].keybits) - 1; 3998 3999 } 4000 } 4001 4002 } 4003 4004 #if 0 4005 4006 /* 4007 * Sort the blockref array for the chain. Used by the flush code to 4008 * sort the blockref[] array. 4009 * 4010 * The chain must be exclusively locked AND spin-locked. 4011 */ 4012 typedef hammer2_blockref_t *hammer2_blockref_p; 4013 4014 static 4015 int 4016 hammer2_base_sort_callback(const void *v1, const void *v2) 4017 { 4018 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 4019 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 4020 4021 /* 4022 * Make sure empty elements are placed at the end of the array 4023 */ 4024 if (bref1->type == 0) { 4025 if (bref2->type == 0) 4026 return(0); 4027 return(1); 4028 } else if (bref2->type == 0) { 4029 return(-1); 4030 } 4031 4032 /* 4033 * Sort by key 4034 */ 4035 if (bref1->key < bref2->key) 4036 return(-1); 4037 if (bref1->key > bref2->key) 4038 return(1); 4039 return(0); 4040 } 4041 4042 void 4043 hammer2_base_sort(hammer2_chain_t *chain) 4044 { 4045 hammer2_blockref_t *base; 4046 int count; 4047 4048 switch(chain->bref.type) { 4049 case HAMMER2_BREF_TYPE_INODE: 4050 /* 4051 * Special shortcut for embedded data returns the inode 4052 * itself. Callers must detect this condition and access 4053 * the embedded data (the strategy code does this for us). 4054 * 4055 * This is only applicable to regular files and softlinks. 4056 */ 4057 if (chain->data->ipdata.meta.op_flags & 4058 HAMMER2_OPFLAG_DIRECTDATA) { 4059 return; 4060 } 4061 base = &chain->data->ipdata.u.blockset.blockref[0]; 4062 count = HAMMER2_SET_COUNT; 4063 break; 4064 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4065 case HAMMER2_BREF_TYPE_INDIRECT: 4066 /* 4067 * Optimize indirect blocks in the INITIAL state to avoid 4068 * I/O. 4069 */ 4070 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4071 base = &chain->data->npdata[0]; 4072 count = chain->bytes / sizeof(hammer2_blockref_t); 4073 break; 4074 case HAMMER2_BREF_TYPE_VOLUME: 4075 base = &chain->data->voldata.sroot_blockset.blockref[0]; 4076 count = HAMMER2_SET_COUNT; 4077 break; 4078 case HAMMER2_BREF_TYPE_FREEMAP: 4079 base = &chain->data->blkset.blockref[0]; 4080 count = HAMMER2_SET_COUNT; 4081 break; 4082 default: 4083 kprintf("hammer2_chain_lookup: unrecognized " 4084 "blockref(A) type: %d", 4085 chain->bref.type); 4086 while (1) 4087 tsleep(&base, 0, "dead", 0); 4088 panic("hammer2_chain_lookup: unrecognized " 4089 "blockref(A) type: %d", 4090 chain->bref.type); 4091 base = NULL; /* safety */ 4092 count = 0; /* safety */ 4093 } 4094 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 4095 } 4096 4097 #endif 4098 4099 /* 4100 * Chain memory management 4101 */ 4102 void 4103 hammer2_chain_wait(hammer2_chain_t *chain) 4104 { 4105 tsleep(chain, 0, "chnflw", 1); 4106 } 4107 4108 const hammer2_media_data_t * 4109 hammer2_chain_rdata(hammer2_chain_t *chain) 4110 { 4111 KKASSERT(chain->data != NULL); 4112 return (chain->data); 4113 } 4114 4115 hammer2_media_data_t * 4116 hammer2_chain_wdata(hammer2_chain_t *chain) 4117 { 4118 KKASSERT(chain->data != NULL); 4119 return (chain->data); 4120 } 4121 4122 /* 4123 * Set the check data for a chain. This can be a heavy-weight operation 4124 * and typically only runs on-flush. For file data check data is calculated 4125 * when the logical buffers are flushed. 4126 */ 4127 void 4128 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 4129 { 4130 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 4131 4132 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4133 case HAMMER2_CHECK_NONE: 4134 break; 4135 case HAMMER2_CHECK_DISABLED: 4136 break; 4137 case HAMMER2_CHECK_ISCSI32: 4138 chain->bref.check.iscsi32.value = 4139 hammer2_icrc32(bdata, chain->bytes); 4140 break; 4141 case HAMMER2_CHECK_CRC64: 4142 chain->bref.check.crc64.value = 0; 4143 /* XXX */ 4144 break; 4145 case HAMMER2_CHECK_SHA192: 4146 { 4147 SHA256_CTX hash_ctx; 4148 union { 4149 uint8_t digest[SHA256_DIGEST_LENGTH]; 4150 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4151 } u; 4152 4153 SHA256_Init(&hash_ctx); 4154 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4155 SHA256_Final(u.digest, &hash_ctx); 4156 u.digest64[2] ^= u.digest64[3]; 4157 bcopy(u.digest, 4158 chain->bref.check.sha192.data, 4159 sizeof(chain->bref.check.sha192.data)); 4160 } 4161 break; 4162 case HAMMER2_CHECK_FREEMAP: 4163 chain->bref.check.freemap.icrc32 = 4164 hammer2_icrc32(bdata, chain->bytes); 4165 break; 4166 default: 4167 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4168 chain->bref.methods); 4169 break; 4170 } 4171 } 4172 4173 int 4174 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 4175 { 4176 int r; 4177 4178 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 4179 return 1; 4180 4181 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4182 case HAMMER2_CHECK_NONE: 4183 r = 1; 4184 break; 4185 case HAMMER2_CHECK_DISABLED: 4186 r = 1; 4187 break; 4188 case HAMMER2_CHECK_ISCSI32: 4189 r = (chain->bref.check.iscsi32.value == 4190 hammer2_icrc32(bdata, chain->bytes)); 4191 break; 4192 case HAMMER2_CHECK_CRC64: 4193 r = (chain->bref.check.crc64.value == 0); 4194 /* XXX */ 4195 break; 4196 case HAMMER2_CHECK_SHA192: 4197 { 4198 SHA256_CTX hash_ctx; 4199 union { 4200 uint8_t digest[SHA256_DIGEST_LENGTH]; 4201 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4202 } u; 4203 4204 SHA256_Init(&hash_ctx); 4205 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4206 SHA256_Final(u.digest, &hash_ctx); 4207 u.digest64[2] ^= u.digest64[3]; 4208 if (bcmp(u.digest, 4209 chain->bref.check.sha192.data, 4210 sizeof(chain->bref.check.sha192.data)) == 0) { 4211 r = 1; 4212 } else { 4213 r = 0; 4214 } 4215 } 4216 break; 4217 case HAMMER2_CHECK_FREEMAP: 4218 r = (chain->bref.check.freemap.icrc32 == 4219 hammer2_icrc32(bdata, chain->bytes)); 4220 if (r == 0) { 4221 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 4222 chain->bref.check.freemap.icrc32, 4223 hammer2_icrc32(bdata, chain->bytes), chain->bytes); 4224 if (chain->dio) 4225 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 4226 chain->dio, chain->dio->bp->b_loffset, chain->dio->bp->b_bufsize, bdata, chain->dio->bp->b_data); 4227 } 4228 4229 break; 4230 default: 4231 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4232 chain->bref.methods); 4233 r = 1; 4234 break; 4235 } 4236 return r; 4237 } 4238 4239 /* 4240 * The caller presents a shared-locked (parent, chain) where the chain 4241 * is of type HAMMER2_OBJTYPE_HARDLINK. The caller must hold the ip 4242 * structure representing the inode locked to prevent 4243 * consolidation/deconsolidation races. 4244 * 4245 * The flags passed in are LOOKUP flags, not RESOLVE flags. Only 4246 * HAMMER2_LOOKUP_SHARED is supported. 4247 * 4248 * We locate the hardlink in the current or a common parent directory. 4249 * 4250 * If we are unable to locate the hardlink, EIO is returned and 4251 * (*chainp) is unlocked and dropped. 4252 */ 4253 int 4254 hammer2_chain_hardlink_find(hammer2_inode_t *dip, 4255 hammer2_chain_t **parentp, 4256 hammer2_chain_t **chainp, 4257 int flags) 4258 { 4259 hammer2_chain_t *parent; 4260 hammer2_chain_t *rchain; 4261 hammer2_key_t key_dummy; 4262 hammer2_key_t lhc; 4263 int cache_index = -1; 4264 int resolve_flags; 4265 4266 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 4267 HAMMER2_RESOLVE_SHARED : 0; 4268 4269 /* 4270 * Obtain the key for the hardlink from *chainp. 4271 */ 4272 rchain = *chainp; 4273 lhc = rchain->data->ipdata.meta.inum; 4274 hammer2_chain_unlock(rchain); 4275 hammer2_chain_drop(rchain); 4276 rchain = NULL; 4277 4278 for (;;) { 4279 int nloops; 4280 rchain = hammer2_chain_lookup(parentp, &key_dummy, 4281 lhc, lhc, 4282 &cache_index, flags); 4283 if (rchain) 4284 break; 4285 4286 /* 4287 * Iterate parents, handle parent rename races by retrying 4288 * the operation. 4289 */ 4290 nloops = -1; 4291 while (nloops) { 4292 --nloops; 4293 parent = *parentp; 4294 if (nloops < 0 && 4295 parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4296 nloops = 1; 4297 } 4298 if (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) 4299 goto done; 4300 if (parent->parent == NULL) 4301 goto done; 4302 parent = parent->parent; 4303 hammer2_chain_ref(parent); 4304 hammer2_chain_unlock(*parentp); 4305 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 4306 resolve_flags); 4307 if ((*parentp)->parent == parent) { 4308 hammer2_chain_drop(*parentp); 4309 *parentp = parent; 4310 } else { 4311 hammer2_chain_unlock(parent); 4312 hammer2_chain_drop(parent); 4313 hammer2_chain_lock(*parentp, 4314 HAMMER2_RESOLVE_ALWAYS | 4315 resolve_flags); 4316 parent = NULL; /* safety */ 4317 /* retry */ 4318 } 4319 } 4320 } 4321 done: 4322 4323 *chainp = rchain; 4324 return (rchain ? EINVAL : 0); 4325 } 4326 4327 /* 4328 * Used by the bulkscan code to snapshot the synchronized storage for 4329 * a volume, allowing it to be scanned concurrently against normal 4330 * operation. 4331 */ 4332 hammer2_chain_t * 4333 hammer2_chain_bulksnap(hammer2_chain_t *chain) 4334 { 4335 hammer2_chain_t *copy; 4336 4337 copy = hammer2_chain_alloc(chain->hmp, chain->pmp, &chain->bref); 4338 switch(chain->bref.type) { 4339 case HAMMER2_BREF_TYPE_VOLUME: 4340 copy->data = kmalloc(sizeof(copy->data->voldata), 4341 chain->hmp->mchain, 4342 M_WAITOK | M_ZERO); 4343 hammer2_spin_ex(&chain->core.spin); 4344 copy->data->voldata = chain->data->voldata; 4345 hammer2_spin_unex(&chain->core.spin); 4346 break; 4347 case HAMMER2_BREF_TYPE_FREEMAP: 4348 copy->data = kmalloc(sizeof(hammer2_blockset_t), 4349 chain->hmp->mchain, 4350 M_WAITOK | M_ZERO); 4351 hammer2_spin_ex(&chain->core.spin); 4352 copy->data->blkset = chain->data->blkset; 4353 hammer2_spin_unex(&chain->core.spin); 4354 break; 4355 default: 4356 break; 4357 } 4358 return copy; 4359 } 4360 4361 void 4362 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 4363 { 4364 switch(copy->bref.type) { 4365 case HAMMER2_BREF_TYPE_VOLUME: 4366 case HAMMER2_BREF_TYPE_FREEMAP: 4367 KKASSERT(copy->data); 4368 kfree(copy->data, copy->hmp->mchain); 4369 copy->data = NULL; 4370 default: 4371 break; 4372 } 4373 hammer2_chain_drop(copy); 4374 } 4375 4376 /* 4377 * Create a snapshot of the specified {parent, ochain} with the specified 4378 * label. The originating hammer2_inode must be exclusively locked for 4379 * safety. 4380 * 4381 * The ioctl code has already synced the filesystem. 4382 */ 4383 int 4384 hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp, 4385 hammer2_tid_t mtid) 4386 { 4387 hammer2_dev_t *hmp; 4388 const hammer2_inode_data_t *ripdata; 4389 hammer2_inode_data_t *wipdata; 4390 hammer2_chain_t *nchain; 4391 hammer2_inode_t *nip; 4392 size_t name_len; 4393 hammer2_key_t lhc; 4394 struct vattr vat; 4395 #if 0 4396 uuid_t opfs_clid; 4397 #endif 4398 int error; 4399 4400 kprintf("snapshot %s\n", pmp->name); 4401 4402 name_len = strlen(pmp->name); 4403 lhc = hammer2_dirhash(pmp->name, name_len); 4404 4405 /* 4406 * Get the clid 4407 */ 4408 ripdata = &chain->data->ipdata; 4409 #if 0 4410 opfs_clid = ripdata->meta.pfs_clid; 4411 #endif 4412 hmp = chain->hmp; 4413 4414 /* 4415 * Create the snapshot directory under the super-root 4416 * 4417 * Set PFS type, generate a unique filesystem id, and generate 4418 * a cluster id. Use the same clid when snapshotting a PFS root, 4419 * which theoretically allows the snapshot to be used as part of 4420 * the same cluster (perhaps as a cache). 4421 * 4422 * Copy the (flushed) blockref array. Theoretically we could use 4423 * chain_duplicate() but it becomes difficult to disentangle 4424 * the shared core so for now just brute-force it. 4425 */ 4426 VATTR_NULL(&vat); 4427 vat.va_type = VDIR; 4428 vat.va_mode = 0755; 4429 nip = hammer2_inode_create(hmp->spmp->iroot, &vat, proc0.p_ucred, 4430 pmp->name, name_len, 0, 4431 1, 0, 0, 4432 HAMMER2_INSERT_PFSROOT, &error); 4433 4434 if (nip) { 4435 hammer2_inode_modify(nip); 4436 nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); 4437 hammer2_chain_modify(nchain, mtid, 0, 0); 4438 wipdata = &nchain->data->ipdata; 4439 4440 nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 4441 nip->meta.pfs_subtype = HAMMER2_PFSSUBTYPE_SNAPSHOT; 4442 nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 4443 kern_uuidgen(&nip->meta.pfs_fsid, 1); 4444 4445 /* 4446 * Give the snapshot its own private cluster id. As a 4447 * snapshot no further synchronization with the original 4448 * cluster will be done. 4449 */ 4450 #if 0 4451 if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) 4452 nip->meta.pfs_clid = opfs_clid; 4453 else 4454 kern_uuidgen(&nip->meta.pfs_clid, 1); 4455 #endif 4456 kern_uuidgen(&nip->meta.pfs_clid, 1); 4457 nchain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 4458 4459 /* XXX hack blockset copy */ 4460 /* XXX doesn't work with real cluster */ 4461 wipdata->meta = nip->meta; 4462 wipdata->u.blockset = ripdata->u.blockset; 4463 hammer2_flush(nchain, 1); 4464 hammer2_chain_unlock(nchain); 4465 hammer2_chain_drop(nchain); 4466 hammer2_inode_unlock(nip); 4467 } 4468 return (error); 4469 } 4470