1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 68 69 static hammer2_chain_t *hammer2_chain_create_indirect( 70 hammer2_chain_t *parent, 71 hammer2_key_t key, int keybits, 72 hammer2_tid_t mtid, int for_type, int *errorp); 73 static hammer2_io_t *hammer2_chain_drop_data(hammer2_chain_t *chain, 74 int lastdrop); 75 static hammer2_chain_t *hammer2_combined_find( 76 hammer2_chain_t *parent, 77 hammer2_blockref_t *base, int count, 78 int *cache_indexp, hammer2_key_t *key_nextp, 79 hammer2_key_t key_beg, hammer2_key_t key_end, 80 hammer2_blockref_t **bresp); 81 82 /* 83 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 84 * overlap in the RB trees. Deleted chains are moved from rbtree to either 85 * dbtree or to dbq. 86 * 87 * Chains in delete-duplicate sequences can always iterate through core_entry 88 * to locate the live version of the chain. 89 */ 90 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 91 92 extern int h2timer[32]; 93 extern int h2last; 94 extern int h2lid; 95 96 #define TIMER(which) do { \ 97 if (h2last) \ 98 h2timer[h2lid] += (int)(ticks - h2last);\ 99 h2last = ticks; \ 100 h2lid = which; \ 101 } while(0) 102 103 int 104 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 105 { 106 hammer2_key_t c1_beg; 107 hammer2_key_t c1_end; 108 hammer2_key_t c2_beg; 109 hammer2_key_t c2_end; 110 111 /* 112 * Compare chains. Overlaps are not supposed to happen and catch 113 * any software issues early we count overlaps as a match. 114 */ 115 c1_beg = chain1->bref.key; 116 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 117 c2_beg = chain2->bref.key; 118 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 119 120 if (c1_end < c2_beg) /* fully to the left */ 121 return(-1); 122 if (c1_beg > c2_end) /* fully to the right */ 123 return(1); 124 return(0); /* overlap (must not cross edge boundary) */ 125 } 126 127 static __inline 128 int 129 hammer2_isclusterable(hammer2_chain_t *chain) 130 { 131 if (hammer2_cluster_enable) { 132 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 133 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 134 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 135 return(1); 136 } 137 } 138 return(0); 139 } 140 141 /* 142 * Make a chain visible to the flusher. The flusher needs to be able to 143 * do flushes of subdirectory chains or single files so it does a top-down 144 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 145 * or UPDATE chains and flushes back up the chain to the volume root. 146 * 147 * This routine sets ONFLUSH upward until it hits the volume root. For 148 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 149 * Extra ONFLUSH flagging doesn't hurt the filesystem. 150 */ 151 void 152 hammer2_chain_setflush(hammer2_chain_t *chain) 153 { 154 hammer2_chain_t *parent; 155 156 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 157 hammer2_spin_sh(&chain->core.spin); 158 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 159 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 160 if ((parent = chain->parent) == NULL) 161 break; 162 hammer2_spin_sh(&parent->core.spin); 163 hammer2_spin_unsh(&chain->core.spin); 164 chain = parent; 165 } 166 hammer2_spin_unsh(&chain->core.spin); 167 } 168 } 169 170 /* 171 * Allocate a new disconnected chain element representing the specified 172 * bref. chain->refs is set to 1 and the passed bref is copied to 173 * chain->bref. chain->bytes is derived from the bref. 174 * 175 * chain->pmp inherits pmp unless the chain is an inode (other than the 176 * super-root inode). 177 * 178 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 179 */ 180 hammer2_chain_t * 181 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 182 hammer2_blockref_t *bref) 183 { 184 hammer2_chain_t *chain; 185 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 186 187 atomic_add_long(&hammer2_chain_allocs, 1); 188 189 /* 190 * Construct the appropriate system structure. 191 */ 192 switch(bref->type) { 193 case HAMMER2_BREF_TYPE_INODE: 194 case HAMMER2_BREF_TYPE_INDIRECT: 195 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 196 case HAMMER2_BREF_TYPE_DATA: 197 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 198 /* 199 * Chain's are really only associated with the hmp but we 200 * maintain a pmp association for per-mount memory tracking 201 * purposes. The pmp can be NULL. 202 */ 203 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 204 break; 205 case HAMMER2_BREF_TYPE_VOLUME: 206 case HAMMER2_BREF_TYPE_FREEMAP: 207 /* 208 * Only hammer2_chain_bulksnap() calls this function with these 209 * types. 210 */ 211 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 212 break; 213 default: 214 chain = NULL; 215 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 216 bref->type); 217 } 218 219 /* 220 * Initialize the new chain structure. pmp must be set to NULL for 221 * chains belonging to the super-root topology of a device mount. 222 */ 223 if (pmp == hmp->spmp) 224 chain->pmp = NULL; 225 else 226 chain->pmp = pmp; 227 chain->hmp = hmp; 228 chain->bref = *bref; 229 chain->bytes = bytes; 230 chain->refs = 1; 231 chain->flags = HAMMER2_CHAIN_ALLOCATED; 232 233 /* 234 * Set the PFS boundary flag if this chain represents a PFS root. 235 */ 236 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 237 atomic_set_int(&chain->flags, HAMMER2_CHAIN_PFSBOUNDARY); 238 hammer2_chain_core_init(chain); 239 240 return (chain); 241 } 242 243 /* 244 * Initialize a chain's core structure. This structure used to be allocated 245 * but is now embedded. 246 * 247 * The core is not locked. No additional refs on the chain are made. 248 * (trans) must not be NULL if (core) is not NULL. 249 */ 250 void 251 hammer2_chain_core_init(hammer2_chain_t *chain) 252 { 253 /* 254 * Fresh core under nchain (no multi-homing of ochain's 255 * sub-tree). 256 */ 257 RB_INIT(&chain->core.rbtree); /* live chains */ 258 hammer2_mtx_init(&chain->lock, "h2chain"); 259 } 260 261 /* 262 * Add a reference to a chain element, preventing its destruction. 263 * 264 * (can be called with spinlock held) 265 */ 266 void 267 hammer2_chain_ref(hammer2_chain_t *chain) 268 { 269 if (atomic_fetchadd_int(&chain->refs, 1) == 0) { 270 /* 271 * 0->non-zero transition must ensure that chain is removed 272 * from the LRU list. 273 * 274 * NOTE: Already holding lru_spin here so we cannot call 275 * hammer2_chain_ref() to get it off lru_list, do 276 * it manually. 277 */ 278 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 279 hammer2_pfs_t *pmp = chain->pmp; 280 hammer2_spin_ex(&pmp->lru_spin); 281 if (chain->flags & HAMMER2_CHAIN_ONLRU) { 282 atomic_add_int(&pmp->lru_count, -1); 283 atomic_clear_int(&chain->flags, 284 HAMMER2_CHAIN_ONLRU); 285 TAILQ_REMOVE(&pmp->lru_list, chain, lru_node); 286 } 287 hammer2_spin_unex(&pmp->lru_spin); 288 } 289 } 290 #if 0 291 kprintf("REFC %p %d %08x\n", chain, chain->refs - 1, chain->flags); 292 print_backtrace(8); 293 #endif 294 } 295 296 /* 297 * Insert the chain in the core rbtree. 298 * 299 * Normal insertions are placed in the live rbtree. Insertion of a deleted 300 * chain is a special case used by the flush code that is placed on the 301 * unstaged deleted list to avoid confusing the live view. 302 */ 303 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 304 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 305 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 306 307 static 308 int 309 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 310 int flags, int generation) 311 { 312 hammer2_chain_t *xchain; 313 int error = 0; 314 315 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 316 hammer2_spin_ex(&parent->core.spin); 317 318 /* 319 * Interlocked by spinlock, check for race 320 */ 321 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 322 parent->core.generation != generation) { 323 error = EAGAIN; 324 goto failed; 325 } 326 327 /* 328 * Insert chain 329 */ 330 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 331 KASSERT(xchain == NULL, 332 ("hammer2_chain_insert: collision %p %p (key=%016jx)", 333 chain, xchain, chain->bref.key)); 334 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 335 chain->parent = parent; 336 ++parent->core.chain_count; 337 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 338 339 /* 340 * We have to keep track of the effective live-view blockref count 341 * so the create code knows when to push an indirect block. 342 */ 343 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 344 atomic_add_int(&parent->core.live_count, 1); 345 failed: 346 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 347 hammer2_spin_unex(&parent->core.spin); 348 return error; 349 } 350 351 /* 352 * Drop the caller's reference to the chain. When the ref count drops to 353 * zero this function will try to disassociate the chain from its parent and 354 * deallocate it, then recursely drop the parent using the implied ref 355 * from the chain's chain->parent. 356 */ 357 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain); 358 359 void 360 hammer2_chain_drop(hammer2_chain_t *chain) 361 { 362 u_int refs; 363 364 if (hammer2_debug & 0x200000) 365 Debugger("drop"); 366 #if 0 367 kprintf("DROP %p %d %08x\n", chain, chain->refs - 1, chain->flags); 368 print_backtrace(8); 369 #endif 370 371 KKASSERT(chain->refs > 0); 372 373 while (chain) { 374 refs = chain->refs; 375 cpu_ccfence(); 376 KKASSERT(refs > 0); 377 378 if (refs == 1) { 379 chain = hammer2_chain_lastdrop(chain); 380 } else { 381 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 382 break; 383 /* retry the same chain */ 384 } 385 } 386 } 387 388 /* 389 * Safe handling of the 1->0 transition on chain. Returns a chain for 390 * recursive drop or NULL, possibly returning the same chain if the atomic 391 * op fails. 392 * 393 * When two chains need to be recursively dropped we use the chain we 394 * would otherwise free to placehold the additional chain. It's a bit 395 * convoluted but we can't just recurse without potentially blowing out 396 * the kernel stack. 397 * 398 * The chain cannot be freed if it has any children. 399 * The chain cannot be freed if flagged MODIFIED unless we can dispose of that. 400 * The chain cannot be freed if flagged UPDATE unless we can dispose of that. 401 * 402 * The core spinlock is allowed nest child-to-parent (not parent-to-child). 403 */ 404 static 405 hammer2_chain_t * 406 hammer2_chain_lastdrop(hammer2_chain_t *chain) 407 { 408 hammer2_pfs_t *pmp; 409 hammer2_dev_t *hmp; 410 hammer2_chain_t *parent; 411 hammer2_chain_t *rdrop; 412 hammer2_io_t *dio; 413 414 /* 415 * Critical field access. 416 */ 417 hammer2_spin_ex(&chain->core.spin); 418 419 if ((parent = chain->parent) != NULL) { 420 /* 421 * If the chain has a parent the UPDATE bit prevents scrapping 422 * as the chain is needed to properly flush the parent. Try 423 * to complete the 1->0 transition and return NULL. Retry 424 * (return chain) if we are unable to complete the 1->0 425 * transition, else return NULL (nothing more to do). 426 * 427 * If the chain has a parent the MODIFIED bit prevents 428 * scrapping. 429 * 430 * Chains with UPDATE/MODIFIED are *not* put on the LRU list! 431 */ 432 if (chain->flags & (HAMMER2_CHAIN_UPDATE | 433 HAMMER2_CHAIN_MODIFIED)) { 434 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 435 dio = hammer2_chain_drop_data(chain, 0); 436 hammer2_spin_unex(&chain->core.spin); 437 if (dio) 438 hammer2_io_bqrelse(&dio); 439 chain = NULL; 440 } else { 441 hammer2_spin_unex(&chain->core.spin); 442 } 443 return (chain); 444 } 445 /* spinlock still held */ 446 } else { 447 /* 448 * The chain has no parent and can be flagged for destruction. 449 * Since it has no parent, UPDATE can also be cleared. 450 */ 451 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 452 if (chain->flags & HAMMER2_CHAIN_UPDATE) 453 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 454 455 /* 456 * If the chain has children or if it has been MODIFIED and 457 * also recorded for DEDUP, we must still flush the chain. 458 * 459 * In the case where it has children, the DESTROY flag test 460 * in the flush code will prevent unnecessary flushes of 461 * MODIFIED chains that are not flagged DEDUP so don't worry 462 * about that here. 463 */ 464 if (chain->core.chain_count || 465 (chain->flags & (HAMMER2_CHAIN_MODIFIED | 466 HAMMER2_CHAIN_DEDUP)) == 467 (HAMMER2_CHAIN_MODIFIED | 468 HAMMER2_CHAIN_DEDUP)) { 469 /* 470 * Put on flushq (should ensure refs > 1), retry 471 * the drop. 472 */ 473 hammer2_spin_unex(&chain->core.spin); 474 hammer2_delayed_flush(chain); 475 return(chain); /* retry drop */ 476 } 477 478 /* 479 * Otherwise we can scrap the MODIFIED bit if it is set, 480 * and continue along the freeing path. 481 */ 482 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 483 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 484 atomic_add_long(&hammer2_count_modified_chains, -1); 485 } 486 /* spinlock still held */ 487 } 488 489 /* spinlock still held */ 490 dio = NULL; 491 492 /* 493 * If any children exist we must leave the chain intact with refs == 0. 494 * They exist because chains are retained below us which have refs or 495 * may require flushing. This case can occur when parent != NULL. 496 * 497 * Retry (return chain) if we fail to transition the refs to 0, else 498 * return NULL indication nothing more to do. 499 * 500 * Chains with children are NOT put on the LRU list. 501 */ 502 if (chain->core.chain_count) { 503 if (parent) 504 hammer2_spin_ex(&parent->core.spin); 505 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 506 dio = hammer2_chain_drop_data(chain, 1); 507 hammer2_spin_unex(&chain->core.spin); 508 if (parent) 509 hammer2_spin_unex(&parent->core.spin); 510 chain = NULL; 511 if (dio) 512 hammer2_io_bqrelse(&dio); 513 } else { 514 hammer2_spin_unex(&chain->core.spin); 515 if (parent) 516 hammer2_spin_unex(&parent->core.spin); 517 } 518 return (chain); 519 } 520 /* spinlock still held */ 521 /* no chains left under us */ 522 523 /* 524 * chain->core has no children left so no accessors can get to our 525 * chain from there. Now we have to lock the parent core to interlock 526 * remaining possible accessors that might bump chain's refs before 527 * we can safely drop chain's refs with intent to free the chain. 528 */ 529 hmp = chain->hmp; 530 pmp = chain->pmp; /* can be NULL */ 531 rdrop = NULL; 532 533 parent = chain->parent; 534 535 /* 536 * WARNING! chain's spin lock is still held here, and other spinlocks 537 * will be acquired and released in the code below. We 538 * cannot be making fancy procedure calls! 539 */ 540 541 /* 542 * We can cache the chain if it is associated with a pmp 543 * and not flagged as being destroyed or requesting a full 544 * release. In this situation the chain is not removed 545 * from its parent, i.e. it can still be looked up. 546 * 547 * We intentionally do not cache DATA chains because these 548 * were likely used to load data into the logical buffer cache 549 * and will not be accessed again for some time. 550 */ 551 if ((chain->flags & 552 (HAMMER2_CHAIN_DESTROY | HAMMER2_CHAIN_RELEASE)) == 0 && 553 chain->pmp && 554 chain->bref.type != HAMMER2_BREF_TYPE_DATA) { 555 if (parent) 556 hammer2_spin_ex(&parent->core.spin); 557 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 558 /* 559 * 1->0 transition failed, retry. Do not drop 560 * the chain's data yet! 561 */ 562 if (parent) 563 hammer2_spin_unex(&parent->core.spin); 564 hammer2_spin_unex(&chain->core.spin); 565 566 return(chain); 567 } 568 569 /* 570 * Success, be sure to clean out the chain's data 571 * before putting it on a queue that it might be 572 * reused from. 573 */ 574 dio = hammer2_chain_drop_data(chain, 1); 575 576 KKASSERT((chain->flags & HAMMER2_CHAIN_ONLRU) == 0); 577 hammer2_spin_ex(&pmp->lru_spin); 578 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONLRU); 579 TAILQ_INSERT_TAIL(&pmp->lru_list, chain, lru_node); 580 581 /* 582 * If we are over the LRU limit we need to drop something. 583 */ 584 if (pmp->lru_count > HAMMER2_LRU_LIMIT) { 585 rdrop = TAILQ_FIRST(&pmp->lru_list); 586 atomic_clear_int(&rdrop->flags, HAMMER2_CHAIN_ONLRU); 587 TAILQ_REMOVE(&pmp->lru_list, rdrop, lru_node); 588 atomic_add_int(&rdrop->refs, 1); 589 atomic_set_int(&rdrop->flags, HAMMER2_CHAIN_RELEASE); 590 } else { 591 atomic_add_int(&pmp->lru_count, 1); 592 } 593 hammer2_spin_unex(&pmp->lru_spin); 594 if (parent) { 595 hammer2_spin_unex(&parent->core.spin); 596 parent = NULL; /* safety */ 597 } 598 hammer2_spin_unex(&chain->core.spin); 599 if (dio) 600 hammer2_io_bqrelse(&dio); 601 602 return rdrop; 603 /* NOT REACHED */ 604 } 605 606 /* 607 * Spinlock the parent and try to drop the last ref on chain. 608 * On success determine if we should dispose of the chain 609 * (remove the chain from its parent, etc). 610 * 611 * (normal core locks are top-down recursive but we define 612 * core spinlocks as bottom-up recursive, so this is safe). 613 */ 614 if (parent) { 615 hammer2_spin_ex(&parent->core.spin); 616 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 617 /* 618 * 1->0 transition failed, retry. 619 */ 620 hammer2_spin_unex(&parent->core.spin); 621 dio = hammer2_chain_drop_data(chain, 0); 622 hammer2_spin_unex(&chain->core.spin); 623 if (dio) 624 hammer2_io_bqrelse(&dio); 625 return(chain); 626 } 627 628 /* 629 * 1->0 transition successful, remove chain from the 630 * parent. 631 */ 632 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 633 RB_REMOVE(hammer2_chain_tree, 634 &parent->core.rbtree, chain); 635 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 636 --parent->core.chain_count; 637 chain->parent = NULL; 638 } 639 640 /* 641 * If our chain was the last chain in the parent's core the 642 * core is now empty and its parent might have to be 643 * re-dropped if it has 0 refs. 644 */ 645 if (parent->core.chain_count == 0) { 646 rdrop = parent; 647 atomic_add_int(&rdrop->refs, 1); 648 /* 649 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) 650 rdrop = NULL; 651 */ 652 } 653 hammer2_spin_unex(&parent->core.spin); 654 parent = NULL; /* safety */ 655 /* FALL THROUGH */ 656 } 657 658 /* 659 * Successful 1->0 transition and the chain can be destroyed now. 660 * 661 * We still have the core spinlock, and core's chain_count is 0. 662 * Any parent spinlock is gone. 663 */ 664 hammer2_spin_unex(&chain->core.spin); 665 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 666 chain->core.chain_count == 0); 667 668 /* 669 * All spin locks are gone, no pointers remain to the chain, finish 670 * freeing it. 671 */ 672 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 673 HAMMER2_CHAIN_MODIFIED)) == 0); 674 dio = hammer2_chain_drop_data(chain, 1); 675 if (dio) 676 hammer2_io_bqrelse(&dio); 677 678 /* 679 * Once chain resources are gone we can use the now dead chain 680 * structure to placehold what might otherwise require a recursive 681 * drop, because we have potentially two things to drop and can only 682 * return one directly. 683 */ 684 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 685 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ALLOCATED); 686 chain->hmp = NULL; 687 kfree(chain, hmp->mchain); 688 } 689 690 /* 691 * Possible chaining loop when parent re-drop needed. 692 */ 693 return(rdrop); 694 } 695 696 /* 697 * On either last lock release or last drop 698 */ 699 static hammer2_io_t * 700 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 701 { 702 hammer2_io_t *dio; 703 704 if ((dio = chain->dio) != NULL) { 705 chain->dio = NULL; 706 chain->data = NULL; 707 } else { 708 switch(chain->bref.type) { 709 case HAMMER2_BREF_TYPE_VOLUME: 710 case HAMMER2_BREF_TYPE_FREEMAP: 711 if (lastdrop) 712 chain->data = NULL; 713 break; 714 default: 715 KKASSERT(chain->data == NULL); 716 break; 717 } 718 } 719 return dio; 720 } 721 722 /* 723 * Lock a referenced chain element, acquiring its data with I/O if necessary, 724 * and specify how you would like the data to be resolved. 725 * 726 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 727 * 728 * The lock is allowed to recurse, multiple locking ops will aggregate 729 * the requested resolve types. Once data is assigned it will not be 730 * removed until the last unlock. 731 * 732 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 733 * (typically used to avoid device/logical buffer 734 * aliasing for data) 735 * 736 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 737 * the INITIAL-create state (indirect blocks only). 738 * 739 * Do not resolve data elements for DATA chains. 740 * (typically used to avoid device/logical buffer 741 * aliasing for data) 742 * 743 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 744 * 745 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 746 * it will be locked exclusive. 747 * 748 * NOTE: Embedded elements (volume header, inodes) are always resolved 749 * regardless. 750 * 751 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 752 * element will instantiate and zero its buffer, and flush it on 753 * release. 754 * 755 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 756 * so as not to instantiate a device buffer, which could alias against 757 * a logical file buffer. However, if ALWAYS is specified the 758 * device buffer will be instantiated anyway. 759 * 760 * WARNING! This function blocks on I/O if data needs to be fetched. This 761 * blocking can run concurrent with other compatible lock holders 762 * who do not need data returning. The lock is not upgraded to 763 * exclusive during a data fetch, a separate bit is used to 764 * interlock I/O. However, an exclusive lock holder can still count 765 * on being interlocked against an I/O fetch managed by a shared 766 * lock holder. 767 */ 768 void 769 hammer2_chain_lock(hammer2_chain_t *chain, int how) 770 { 771 /* 772 * Ref and lock the element. Recursive locks are allowed. 773 */ 774 KKASSERT(chain->refs > 0); 775 atomic_add_int(&chain->lockcnt, 1); 776 777 TIMER(20); 778 /* 779 * Get the appropriate lock. 780 */ 781 if (how & HAMMER2_RESOLVE_SHARED) 782 hammer2_mtx_sh(&chain->lock); 783 else 784 hammer2_mtx_ex(&chain->lock); 785 ++curthread->td_tracker; 786 TIMER(21); 787 788 /* 789 * If we already have a valid data pointer no further action is 790 * necessary. 791 */ 792 if (chain->data) 793 return; 794 TIMER(22); 795 796 /* 797 * Do we have to resolve the data? 798 */ 799 switch(how & HAMMER2_RESOLVE_MASK) { 800 case HAMMER2_RESOLVE_NEVER: 801 return; 802 case HAMMER2_RESOLVE_MAYBE: 803 if (chain->flags & HAMMER2_CHAIN_INITIAL) 804 return; 805 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 806 return; 807 #if 0 808 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 809 return; 810 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 811 return; 812 #endif 813 /* fall through */ 814 case HAMMER2_RESOLVE_ALWAYS: 815 default: 816 break; 817 } 818 819 /* 820 * Caller requires data 821 */ 822 hammer2_chain_load_data(chain); 823 } 824 825 /* 826 * Downgrade an exclusive chain lock to a shared chain lock. 827 * 828 * NOTE: There is no upgrade equivalent due to the ease of 829 * deadlocks in that direction. 830 */ 831 void 832 hammer2_chain_lock_downgrade(hammer2_chain_t *chain) 833 { 834 hammer2_mtx_downgrade(&chain->lock); 835 } 836 837 /* 838 * Obtains a second shared lock on the chain, does not account the second 839 * shared lock as being owned by the current thread. 840 * 841 * Caller must already own a shared lock on this chain. 842 * 843 * The lock function is required to obtain the second shared lock without 844 * blocking on pending exclusive requests. 845 */ 846 void 847 hammer2_chain_push_shared_lock(hammer2_chain_t *chain) 848 { 849 hammer2_mtx_sh_again(&chain->lock); 850 atomic_add_int(&chain->lockcnt, 1); 851 /* do not count in td_tracker for this thread */ 852 } 853 854 /* 855 * Accounts for a shared lock that was pushed to us as being owned by our 856 * thread. 857 */ 858 void 859 hammer2_chain_pull_shared_lock(hammer2_chain_t *chain) 860 { 861 ++curthread->td_tracker; 862 } 863 864 /* 865 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 866 * may be of any type. 867 * 868 * Once chain->data is set it cannot be disposed of until all locks are 869 * released. 870 */ 871 void 872 hammer2_chain_load_data(hammer2_chain_t *chain) 873 { 874 hammer2_blockref_t *bref; 875 hammer2_dev_t *hmp; 876 char *bdata; 877 int error; 878 879 /* 880 * Degenerate case, data already present. 881 */ 882 if (chain->data) 883 return; 884 TIMER(23); 885 886 hmp = chain->hmp; 887 KKASSERT(hmp != NULL); 888 889 /* 890 * Gain the IOINPROG bit, interlocked block. 891 */ 892 for (;;) { 893 u_int oflags; 894 u_int nflags; 895 896 oflags = chain->flags; 897 cpu_ccfence(); 898 if (oflags & HAMMER2_CHAIN_IOINPROG) { 899 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 900 tsleep_interlock(&chain->flags, 0); 901 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 902 tsleep(&chain->flags, PINTERLOCKED, 903 "h2iocw", 0); 904 } 905 /* retry */ 906 } else { 907 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 908 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 909 break; 910 } 911 /* retry */ 912 } 913 } 914 TIMER(24); 915 916 /* 917 * We own CHAIN_IOINPROG 918 * 919 * Degenerate case if we raced another load. 920 */ 921 if (chain->data) 922 goto done; 923 924 /* 925 * We must resolve to a device buffer, either by issuing I/O or 926 * by creating a zero-fill element. We do not mark the buffer 927 * dirty when creating a zero-fill element (the hammer2_chain_modify() 928 * API must still be used to do that). 929 * 930 * The device buffer is variable-sized in powers of 2 down 931 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 932 * chunk always contains buffers of the same size. (XXX) 933 * 934 * The minimum physical IO size may be larger than the variable 935 * block size. 936 */ 937 bref = &chain->bref; 938 939 /* 940 * The getblk() optimization can only be used on newly created 941 * elements if the physical block size matches the request. 942 */ 943 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 944 error = hammer2_io_new(hmp, bref->type, 945 bref->data_off, chain->bytes, 946 &chain->dio); 947 } else { 948 error = hammer2_io_bread(hmp, bref->type, 949 bref->data_off, chain->bytes, 950 &chain->dio); 951 hammer2_adjreadcounter(&chain->bref, chain->bytes); 952 } 953 TIMER(25); 954 if (error) { 955 chain->error = HAMMER2_ERROR_IO; 956 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 957 (intmax_t)bref->data_off, error); 958 hammer2_io_bqrelse(&chain->dio); 959 goto done; 960 } 961 chain->error = 0; 962 963 /* 964 * NOTE: A locked chain's data cannot be modified without first 965 * calling hammer2_chain_modify(). 966 */ 967 968 /* 969 * Clear INITIAL. In this case we used io_new() and the buffer has 970 * been zero'd and marked dirty. 971 */ 972 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 973 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 974 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 975 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 976 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 977 /* 978 * check data not currently synchronized due to 979 * modification. XXX assumes data stays in the buffer 980 * cache, which might not be true (need biodep on flush 981 * to calculate crc? or simple crc?). 982 */ 983 } else if ((chain->flags & HAMMER2_CHAIN_TESTEDGOOD) == 0) { 984 uint64_t mask; 985 986 TIMER(26); 987 if (hammer2_io_crc_good(chain, &mask)) { 988 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 989 } else if (hammer2_chain_testcheck(chain, bdata) == 0) { 990 kprintf("chain %016jx.%02x meth=%02x " 991 "CHECK FAIL %08x (flags=%08x)\n", 992 chain->bref.data_off, 993 chain->bref.type, 994 chain->bref.methods, 995 hammer2_icrc32(bdata, chain->bytes), 996 chain->flags); 997 chain->error = HAMMER2_ERROR_CHECK; 998 } else { 999 hammer2_io_crc_setmask(chain->dio, mask); 1000 #if 0 1001 kprintf("chain %02x %016jx %u\n", 1002 chain->bref.type, chain->bref.key, 1003 chain->bytes); 1004 #endif 1005 atomic_set_int(&chain->flags, HAMMER2_CHAIN_TESTEDGOOD); 1006 } 1007 } 1008 TIMER(27); 1009 1010 /* 1011 * Setup the data pointer, either pointing it to an embedded data 1012 * structure and copying the data from the buffer, or pointing it 1013 * into the buffer. 1014 * 1015 * The buffer is not retained when copying to an embedded data 1016 * structure in order to avoid potential deadlocks or recursions 1017 * on the same physical buffer. 1018 * 1019 * WARNING! Other threads can start using the data the instant we 1020 * set chain->data non-NULL. 1021 */ 1022 switch (bref->type) { 1023 case HAMMER2_BREF_TYPE_VOLUME: 1024 case HAMMER2_BREF_TYPE_FREEMAP: 1025 /* 1026 * Copy data from bp to embedded buffer 1027 */ 1028 panic("hammer2_chain_lock: called on unresolved volume header"); 1029 break; 1030 case HAMMER2_BREF_TYPE_INODE: 1031 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1032 case HAMMER2_BREF_TYPE_INDIRECT: 1033 case HAMMER2_BREF_TYPE_DATA: 1034 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1035 default: 1036 /* 1037 * Point data at the device buffer and leave dio intact. 1038 */ 1039 chain->data = (void *)bdata; 1040 break; 1041 } 1042 1043 /* 1044 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 1045 */ 1046 done: 1047 for (;;) { 1048 u_int oflags; 1049 u_int nflags; 1050 1051 oflags = chain->flags; 1052 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 1053 HAMMER2_CHAIN_IOSIGNAL); 1054 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 1055 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 1056 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 1057 wakeup(&chain->flags); 1058 break; 1059 } 1060 } 1061 TIMER(28); 1062 } 1063 1064 /* 1065 * Unlock and deref a chain element. 1066 * 1067 * Remember that the presence of children under chain prevent the chain's 1068 * destruction but do not add additional references, so the dio will still 1069 * be dropped. 1070 */ 1071 void 1072 hammer2_chain_unlock(hammer2_chain_t *chain) 1073 { 1074 u_int lockcnt; 1075 1076 --curthread->td_tracker; 1077 /* 1078 * If multiple locks are present (or being attempted) on this 1079 * particular chain we can just unlock, drop refs, and return. 1080 * 1081 * Otherwise fall-through on the 1->0 transition. 1082 */ 1083 for (;;) { 1084 lockcnt = chain->lockcnt; 1085 KKASSERT(lockcnt > 0); 1086 cpu_ccfence(); 1087 if (lockcnt > 1) { 1088 if (atomic_cmpset_int(&chain->lockcnt, 1089 lockcnt, lockcnt - 1)) { 1090 hammer2_mtx_unlock(&chain->lock); 1091 return; 1092 } 1093 } else { 1094 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1095 break; 1096 } 1097 /* retry */ 1098 } 1099 1100 /* 1101 * Normally we want to disassociate the data on the last unlock, 1102 * but leave it intact if persist_refs is non-zero. The persist-data 1103 * user modifies persist_refs only while holding the chain locked 1104 * so there should be no race on the last unlock here. 1105 * 1106 * NOTE: If this was a shared lock we have to temporarily upgrade it 1107 * to prevent data load races. 1108 */ 1109 if (chain->persist_refs == 0) { 1110 hammer2_io_t *dio; 1111 1112 hammer2_mtx_upgrade(&chain->lock); 1113 if (chain->lockcnt == 0) { 1114 dio = hammer2_chain_drop_data(chain, 0); 1115 if (dio) 1116 hammer2_io_bqrelse(&dio); 1117 } 1118 } 1119 hammer2_mtx_unlock(&chain->lock); 1120 } 1121 1122 /* 1123 * Helper to obtain the blockref[] array base and count for a chain. 1124 * 1125 * XXX Not widely used yet, various use cases need to be validated and 1126 * converted to use this function. 1127 */ 1128 static 1129 hammer2_blockref_t * 1130 hammer2_chain_base_and_count(hammer2_chain_t *parent, int *countp) 1131 { 1132 hammer2_blockref_t *base; 1133 int count; 1134 1135 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1136 base = NULL; 1137 1138 switch(parent->bref.type) { 1139 case HAMMER2_BREF_TYPE_INODE: 1140 count = HAMMER2_SET_COUNT; 1141 break; 1142 case HAMMER2_BREF_TYPE_INDIRECT: 1143 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1144 count = parent->bytes / sizeof(hammer2_blockref_t); 1145 break; 1146 case HAMMER2_BREF_TYPE_VOLUME: 1147 count = HAMMER2_SET_COUNT; 1148 break; 1149 case HAMMER2_BREF_TYPE_FREEMAP: 1150 count = HAMMER2_SET_COUNT; 1151 break; 1152 default: 1153 panic("hammer2_chain_create_indirect: " 1154 "unrecognized blockref type: %d", 1155 parent->bref.type); 1156 count = 0; 1157 break; 1158 } 1159 } else { 1160 switch(parent->bref.type) { 1161 case HAMMER2_BREF_TYPE_INODE: 1162 base = &parent->data->ipdata.u.blockset.blockref[0]; 1163 count = HAMMER2_SET_COUNT; 1164 break; 1165 case HAMMER2_BREF_TYPE_INDIRECT: 1166 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1167 base = &parent->data->npdata[0]; 1168 count = parent->bytes / sizeof(hammer2_blockref_t); 1169 break; 1170 case HAMMER2_BREF_TYPE_VOLUME: 1171 base = &parent->data->voldata. 1172 sroot_blockset.blockref[0]; 1173 count = HAMMER2_SET_COUNT; 1174 break; 1175 case HAMMER2_BREF_TYPE_FREEMAP: 1176 base = &parent->data->blkset.blockref[0]; 1177 count = HAMMER2_SET_COUNT; 1178 break; 1179 default: 1180 panic("hammer2_chain_create_indirect: " 1181 "unrecognized blockref type: %d", 1182 parent->bref.type); 1183 count = 0; 1184 break; 1185 } 1186 } 1187 *countp = count; 1188 1189 return base; 1190 } 1191 1192 /* 1193 * This counts the number of live blockrefs in a block array and 1194 * also calculates the point at which all remaining blockrefs are empty. 1195 * This routine can only be called on a live chain (DUPLICATED flag not set). 1196 * 1197 * NOTE: Flag is not set until after the count is complete, allowing 1198 * callers to test the flag without holding the spinlock. 1199 * 1200 * NOTE: If base is NULL the related chain is still in the INITIAL 1201 * state and there are no blockrefs to count. 1202 * 1203 * NOTE: live_count may already have some counts accumulated due to 1204 * creation and deletion and could even be initially negative. 1205 */ 1206 void 1207 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1208 hammer2_blockref_t *base, int count) 1209 { 1210 hammer2_spin_ex(&chain->core.spin); 1211 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 1212 if (base) { 1213 while (--count >= 0) { 1214 if (base[count].type) 1215 break; 1216 } 1217 chain->core.live_zero = count + 1; 1218 while (count >= 0) { 1219 if (base[count].type) 1220 atomic_add_int(&chain->core.live_count, 1221 1); 1222 --count; 1223 } 1224 } else { 1225 chain->core.live_zero = 0; 1226 } 1227 /* else do not modify live_count */ 1228 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 1229 } 1230 hammer2_spin_unex(&chain->core.spin); 1231 } 1232 1233 /* 1234 * Resize the chain's physical storage allocation in-place. This function does 1235 * not adjust the data pointer and must be followed by (typically) a 1236 * hammer2_chain_modify() call to copy any old data over and adjust the 1237 * data pointer. 1238 * 1239 * Chains can be resized smaller without reallocating the storage. Resizing 1240 * larger will reallocate the storage. Excess or prior storage is reclaimed 1241 * asynchronously at a later time. 1242 * 1243 * Must be passed an exclusively locked parent and chain. 1244 * 1245 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1246 * to avoid instantiating a device buffer that conflicts with the vnode data 1247 * buffer. However, because H2 can compress or encrypt data, the chain may 1248 * have a dio assigned to it in those situations, and they do not conflict. 1249 * 1250 * XXX return error if cannot resize. 1251 */ 1252 void 1253 hammer2_chain_resize(hammer2_inode_t *ip, 1254 hammer2_chain_t *parent, hammer2_chain_t *chain, 1255 hammer2_tid_t mtid, hammer2_off_t dedup_off, 1256 int nradix, int flags) 1257 { 1258 hammer2_dev_t *hmp; 1259 size_t obytes; 1260 size_t nbytes; 1261 1262 hmp = chain->hmp; 1263 1264 /* 1265 * Only data and indirect blocks can be resized for now. 1266 * (The volu root, inodes, and freemap elements use a fixed size). 1267 */ 1268 KKASSERT(chain != &hmp->vchain); 1269 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1270 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 1271 KKASSERT(chain->parent == parent); 1272 1273 /* 1274 * Nothing to do if the element is already the proper size 1275 */ 1276 obytes = chain->bytes; 1277 nbytes = 1U << nradix; 1278 if (obytes == nbytes) 1279 return; 1280 1281 /* 1282 * Make sure the old data is instantiated so we can copy it. If this 1283 * is a data block, the device data may be superfluous since the data 1284 * might be in a logical block, but compressed or encrypted data is 1285 * another matter. 1286 * 1287 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1288 */ 1289 hammer2_chain_modify(chain, mtid, dedup_off, 0); 1290 1291 /* 1292 * Relocate the block, even if making it smaller (because different 1293 * block sizes may be in different regions). 1294 * 1295 * (data blocks only, we aren't copying the storage here). 1296 */ 1297 hammer2_freemap_alloc(chain, nbytes); 1298 chain->bytes = nbytes; 1299 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1300 1301 /* 1302 * We don't want the followup chain_modify() to try to copy data 1303 * from the old (wrong-sized) buffer. It won't know how much to 1304 * copy. This case should only occur during writes when the 1305 * originator already has the data to write in-hand. 1306 */ 1307 if (chain->dio) { 1308 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1309 hammer2_io_brelse(&chain->dio); 1310 chain->data = NULL; 1311 } 1312 } 1313 1314 /* 1315 * Set the chain modified so its data can be changed by the caller. 1316 * 1317 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1318 * is a CLC (cluster level change) field and is not updated by parent 1319 * propagation during a flush. 1320 * 1321 * If the caller passes a non-zero dedup_off we assign data_off to that 1322 * instead of allocating a ne block. Caller must not modify the data already 1323 * present at the target offset. 1324 */ 1325 void 1326 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1327 hammer2_off_t dedup_off, int flags) 1328 { 1329 hammer2_blockref_t obref; 1330 hammer2_dev_t *hmp; 1331 hammer2_io_t *dio; 1332 int error; 1333 int wasinitial; 1334 int newmod; 1335 char *bdata; 1336 1337 hmp = chain->hmp; 1338 obref = chain->bref; 1339 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1340 1341 /* 1342 * Data is not optional for freemap chains (we must always be sure 1343 * to copy the data on COW storage allocations). 1344 */ 1345 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1346 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1347 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1348 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1349 } 1350 1351 /* 1352 * Data must be resolved if already assigned, unless explicitly 1353 * flagged otherwise. 1354 */ 1355 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1356 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1357 hammer2_chain_load_data(chain); 1358 } 1359 1360 /* 1361 * Set MODIFIED to indicate that the chain has been modified. 1362 * Set UPDATE to ensure that the blockref is updated in the parent. 1363 */ 1364 if ((chain->flags & (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) == 1365 (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) { 1366 /* 1367 * Modified already set but a new allocation is needed 1368 * anyway because we recorded this data_off for possible 1369 * dedup operation. 1370 */ 1371 newmod = 1; 1372 } else if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1373 /* 1374 * Must set modified bit. 1375 */ 1376 atomic_add_long(&hammer2_count_modified_chains, 1); 1377 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1378 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1379 newmod = 1; 1380 } else { 1381 /* 1382 * Already flagged modified, no new allocation is needed. 1383 */ 1384 newmod = 0; 1385 } 1386 1387 /* 1388 * Flag parent update required, clear DEDUP flag (already processed 1389 * above). 1390 */ 1391 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 1392 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1393 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DEDUP); 1394 1395 /* 1396 * The modification or re-modification requires an allocation and 1397 * possible COW. 1398 * 1399 * If dedup_off is non-zero, caller already has a data offset 1400 * containing the caller's desired data. The dedup offset is 1401 * allowed to be in a partially free state and we must be sure 1402 * to reset it to a fully allocated state to force two bulkfree 1403 * passes to free it again. 1404 * 1405 * XXX can a chain already be marked MODIFIED without a data 1406 * assignment? If not, assert here instead of testing the case. 1407 */ 1408 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1409 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1410 newmod 1411 ) { 1412 if (dedup_off) { 1413 chain->bref.data_off = dedup_off; 1414 chain->bytes = 1 << (dedup_off & 1415 HAMMER2_OFF_MASK_RADIX); 1416 atomic_set_int(&chain->flags, 1417 HAMMER2_CHAIN_DEDUP); 1418 hammer2_freemap_adjust(hmp, &chain->bref, 1419 HAMMER2_FREEMAP_DORECOVER); 1420 } else { 1421 hammer2_freemap_alloc(chain, chain->bytes); 1422 } 1423 /* XXX failed allocation */ 1424 } 1425 } 1426 1427 /* 1428 * Update mirror_tid and modify_tid. modify_tid is only updated 1429 * if not passed as zero (during flushes, parent propagation passes 1430 * the value 0). 1431 * 1432 * NOTE: chain->pmp could be the device spmp. 1433 */ 1434 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1435 if (mtid) 1436 chain->bref.modify_tid = mtid; 1437 1438 /* 1439 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1440 * requires updating as well as to tell the delete code that the 1441 * chain's blockref might not exactly match (in terms of physical size 1442 * or block offset) the one in the parent's blocktable. The base key 1443 * of course will still match. 1444 */ 1445 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1446 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1447 1448 /* 1449 * Short-cut data blocks which the caller does not need an actual 1450 * data reference to (aka OPTDATA), as long as the chain does not 1451 * already have a data pointer to the data. This generally means 1452 * that the modifications are being done via the logical buffer cache. 1453 * The INITIAL flag relates only to the device data buffer and thus 1454 * remains unchange in this situation. 1455 */ 1456 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1457 (flags & HAMMER2_MODIFY_OPTDATA) && 1458 chain->data == NULL) { 1459 goto skip2; 1460 } 1461 1462 /* 1463 * Clearing the INITIAL flag (for indirect blocks) indicates that 1464 * we've processed the uninitialized storage allocation. 1465 * 1466 * If this flag is already clear we are likely in a copy-on-write 1467 * situation but we have to be sure NOT to bzero the storage if 1468 * no data is present. 1469 */ 1470 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1471 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1472 wasinitial = 1; 1473 } else { 1474 wasinitial = 0; 1475 } 1476 1477 /* 1478 * Instantiate data buffer and possibly execute COW operation 1479 */ 1480 switch(chain->bref.type) { 1481 case HAMMER2_BREF_TYPE_VOLUME: 1482 case HAMMER2_BREF_TYPE_FREEMAP: 1483 /* 1484 * The data is embedded, no copy-on-write operation is 1485 * needed. 1486 */ 1487 KKASSERT(chain->dio == NULL); 1488 break; 1489 case HAMMER2_BREF_TYPE_INODE: 1490 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1491 case HAMMER2_BREF_TYPE_DATA: 1492 case HAMMER2_BREF_TYPE_INDIRECT: 1493 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1494 /* 1495 * Perform the copy-on-write operation 1496 * 1497 * zero-fill or copy-on-write depending on whether 1498 * chain->data exists or not and set the dirty state for 1499 * the new buffer. hammer2_io_new() will handle the 1500 * zero-fill. 1501 * 1502 * If a dedup_off was supplied this is an existing block 1503 * and no COW, copy, or further modification is required. 1504 */ 1505 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1506 1507 if (wasinitial && dedup_off == 0) { 1508 error = hammer2_io_new(hmp, chain->bref.type, 1509 chain->bref.data_off, 1510 chain->bytes, &dio); 1511 } else { 1512 error = hammer2_io_bread(hmp, chain->bref.type, 1513 chain->bref.data_off, 1514 chain->bytes, &dio); 1515 } 1516 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1517 1518 /* 1519 * If an I/O error occurs make sure callers cannot accidently 1520 * modify the old buffer's contents and corrupt the filesystem. 1521 */ 1522 if (error) { 1523 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1524 hmp); 1525 chain->error = HAMMER2_ERROR_IO; 1526 hammer2_io_brelse(&dio); 1527 hammer2_io_brelse(&chain->dio); 1528 chain->data = NULL; 1529 break; 1530 } 1531 chain->error = 0; 1532 bdata = hammer2_io_data(dio, chain->bref.data_off); 1533 1534 if (chain->data) { 1535 /* 1536 * COW (unless a dedup). 1537 */ 1538 KKASSERT(chain->dio != NULL); 1539 if (chain->data != (void *)bdata && dedup_off == 0) { 1540 bcopy(chain->data, bdata, chain->bytes); 1541 } 1542 } else if (wasinitial == 0) { 1543 /* 1544 * We have a problem. We were asked to COW but 1545 * we don't have any data to COW with! 1546 */ 1547 panic("hammer2_chain_modify: having a COW %p\n", 1548 chain); 1549 } 1550 1551 /* 1552 * Retire the old buffer, replace with the new. Dirty or 1553 * redirty the new buffer. 1554 * 1555 * WARNING! The system buffer cache may have already flushed 1556 * the buffer, so we must be sure to [re]dirty it 1557 * for further modification. 1558 * 1559 * If dedup_off was supplied, the caller is not 1560 * expected to make any further modification to the 1561 * buffer. 1562 */ 1563 if (chain->dio) 1564 hammer2_io_bqrelse(&chain->dio); 1565 chain->data = (void *)bdata; 1566 chain->dio = dio; 1567 if (dedup_off == 0) 1568 hammer2_io_setdirty(dio); 1569 break; 1570 default: 1571 panic("hammer2_chain_modify: illegal non-embedded type %d", 1572 chain->bref.type); 1573 break; 1574 1575 } 1576 skip2: 1577 /* 1578 * setflush on parent indicating that the parent must recurse down 1579 * to us. Do not call on chain itself which might already have it 1580 * set. 1581 */ 1582 if (chain->parent) 1583 hammer2_chain_setflush(chain->parent); 1584 } 1585 1586 /* 1587 * Modify the chain associated with an inode. 1588 */ 1589 void 1590 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1591 hammer2_tid_t mtid, int flags) 1592 { 1593 hammer2_inode_modify(ip); 1594 hammer2_chain_modify(chain, mtid, 0, flags); 1595 } 1596 1597 /* 1598 * Volume header data locks 1599 */ 1600 void 1601 hammer2_voldata_lock(hammer2_dev_t *hmp) 1602 { 1603 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 1604 } 1605 1606 void 1607 hammer2_voldata_unlock(hammer2_dev_t *hmp) 1608 { 1609 lockmgr(&hmp->vollk, LK_RELEASE); 1610 } 1611 1612 void 1613 hammer2_voldata_modify(hammer2_dev_t *hmp) 1614 { 1615 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1616 atomic_add_long(&hammer2_count_modified_chains, 1); 1617 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 1618 hammer2_pfs_memory_inc(hmp->vchain.pmp); 1619 } 1620 } 1621 1622 /* 1623 * This function returns the chain at the nearest key within the specified 1624 * range. The returned chain will be referenced but not locked. 1625 * 1626 * This function will recurse through chain->rbtree as necessary and will 1627 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1628 * the iteration value is less than the current value of *key_nextp. 1629 * 1630 * The caller should use (*key_nextp) to calculate the actual range of 1631 * the returned element, which will be (key_beg to *key_nextp - 1), because 1632 * there might be another element which is superior to the returned element 1633 * and overlaps it. 1634 * 1635 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1636 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1637 * it will wind up being (key_end + 1). 1638 * 1639 * WARNING! Must be called with child's spinlock held. Spinlock remains 1640 * held through the operation. 1641 */ 1642 struct hammer2_chain_find_info { 1643 hammer2_chain_t *best; 1644 hammer2_key_t key_beg; 1645 hammer2_key_t key_end; 1646 hammer2_key_t key_next; 1647 }; 1648 1649 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1650 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1651 1652 static 1653 hammer2_chain_t * 1654 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1655 hammer2_key_t key_beg, hammer2_key_t key_end) 1656 { 1657 struct hammer2_chain_find_info info; 1658 1659 info.best = NULL; 1660 info.key_beg = key_beg; 1661 info.key_end = key_end; 1662 info.key_next = *key_nextp; 1663 1664 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1665 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1666 &info); 1667 *key_nextp = info.key_next; 1668 #if 0 1669 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1670 parent, key_beg, key_end, *key_nextp); 1671 #endif 1672 1673 return (info.best); 1674 } 1675 1676 static 1677 int 1678 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1679 { 1680 struct hammer2_chain_find_info *info = data; 1681 hammer2_key_t child_beg; 1682 hammer2_key_t child_end; 1683 1684 child_beg = child->bref.key; 1685 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1686 1687 if (child_end < info->key_beg) 1688 return(-1); 1689 if (child_beg > info->key_end) 1690 return(1); 1691 return(0); 1692 } 1693 1694 static 1695 int 1696 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1697 { 1698 struct hammer2_chain_find_info *info = data; 1699 hammer2_chain_t *best; 1700 hammer2_key_t child_end; 1701 1702 /* 1703 * WARNING! Do not discard DUPLICATED chains, it is possible that 1704 * we are catching an insertion half-way done. If a 1705 * duplicated chain turns out to be the best choice the 1706 * caller will re-check its flags after locking it. 1707 * 1708 * WARNING! Layerq is scanned forwards, exact matches should keep 1709 * the existing info->best. 1710 */ 1711 if ((best = info->best) == NULL) { 1712 /* 1713 * No previous best. Assign best 1714 */ 1715 info->best = child; 1716 } else if (best->bref.key <= info->key_beg && 1717 child->bref.key <= info->key_beg) { 1718 /* 1719 * Illegal overlap. 1720 */ 1721 KKASSERT(0); 1722 /*info->best = child;*/ 1723 } else if (child->bref.key < best->bref.key) { 1724 /* 1725 * Child has a nearer key and best is not flush with key_beg. 1726 * Set best to child. Truncate key_next to the old best key. 1727 */ 1728 info->best = child; 1729 if (info->key_next > best->bref.key || info->key_next == 0) 1730 info->key_next = best->bref.key; 1731 } else if (child->bref.key == best->bref.key) { 1732 /* 1733 * If our current best is flush with the child then this 1734 * is an illegal overlap. 1735 * 1736 * key_next will automatically be limited to the smaller of 1737 * the two end-points. 1738 */ 1739 KKASSERT(0); 1740 info->best = child; 1741 } else { 1742 /* 1743 * Keep the current best but truncate key_next to the child's 1744 * base. 1745 * 1746 * key_next will also automatically be limited to the smaller 1747 * of the two end-points (probably not necessary for this case 1748 * but we do it anyway). 1749 */ 1750 if (info->key_next > child->bref.key || info->key_next == 0) 1751 info->key_next = child->bref.key; 1752 } 1753 1754 /* 1755 * Always truncate key_next based on child's end-of-range. 1756 */ 1757 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1758 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1759 info->key_next = child_end; 1760 1761 return(0); 1762 } 1763 1764 /* 1765 * Retrieve the specified chain from a media blockref, creating the 1766 * in-memory chain structure which reflects it. 1767 * 1768 * To handle insertion races pass the INSERT_RACE flag along with the 1769 * generation number of the core. NULL will be returned if the generation 1770 * number changes before we have a chance to insert the chain. Insert 1771 * races can occur because the parent might be held shared. 1772 * 1773 * Caller must hold the parent locked shared or exclusive since we may 1774 * need the parent's bref array to find our block. 1775 * 1776 * WARNING! chain->pmp is always set to NULL for any chain representing 1777 * part of the super-root topology. 1778 */ 1779 hammer2_chain_t * 1780 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1781 hammer2_blockref_t *bref) 1782 { 1783 hammer2_dev_t *hmp = parent->hmp; 1784 hammer2_chain_t *chain; 1785 int error; 1786 1787 /* 1788 * Allocate a chain structure representing the existing media 1789 * entry. Resulting chain has one ref and is not locked. 1790 */ 1791 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 1792 chain = hammer2_chain_alloc(hmp, NULL, bref); 1793 else 1794 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 1795 /* ref'd chain returned */ 1796 1797 /* 1798 * Flag that the chain is in the parent's blockmap so delete/flush 1799 * knows what to do with it. 1800 */ 1801 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1802 1803 /* 1804 * Link the chain into its parent. A spinlock is required to safely 1805 * access the RBTREE, and it is possible to collide with another 1806 * hammer2_chain_get() operation because the caller might only hold 1807 * a shared lock on the parent. 1808 * 1809 * NOTE: Get races can occur quite often when we distribute 1810 * asynchronous read-aheads across multiple threads. 1811 */ 1812 KKASSERT(parent->refs > 0); 1813 error = hammer2_chain_insert(parent, chain, 1814 HAMMER2_CHAIN_INSERT_SPIN | 1815 HAMMER2_CHAIN_INSERT_RACE, 1816 generation); 1817 if (error) { 1818 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 1819 /*kprintf("chain %p get race\n", chain);*/ 1820 hammer2_chain_drop(chain); 1821 chain = NULL; 1822 } else { 1823 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1824 } 1825 1826 /* 1827 * Return our new chain referenced but not locked, or NULL if 1828 * a race occurred. 1829 */ 1830 return (chain); 1831 } 1832 1833 /* 1834 * Lookup initialization/completion API 1835 */ 1836 hammer2_chain_t * 1837 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1838 { 1839 hammer2_chain_ref(parent); 1840 if (flags & HAMMER2_LOOKUP_SHARED) { 1841 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1842 HAMMER2_RESOLVE_SHARED); 1843 } else { 1844 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1845 } 1846 return (parent); 1847 } 1848 1849 void 1850 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1851 { 1852 if (parent) { 1853 hammer2_chain_unlock(parent); 1854 hammer2_chain_drop(parent); 1855 } 1856 } 1857 1858 hammer2_chain_t * 1859 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1860 { 1861 hammer2_chain_t *oparent; 1862 hammer2_chain_t *nparent; 1863 1864 /* 1865 * Be careful of order, oparent must be unlocked before nparent 1866 * is locked below to avoid a deadlock. 1867 */ 1868 oparent = *parentp; 1869 hammer2_spin_ex(&oparent->core.spin); 1870 nparent = oparent->parent; 1871 hammer2_chain_ref(nparent); 1872 hammer2_spin_unex(&oparent->core.spin); 1873 if (oparent) { 1874 hammer2_chain_unlock(oparent); 1875 hammer2_chain_drop(oparent); 1876 oparent = NULL; 1877 } 1878 1879 hammer2_chain_lock(nparent, how); 1880 *parentp = nparent; 1881 1882 return (nparent); 1883 } 1884 1885 /* 1886 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1887 * (*parentp) typically points to an inode but can also point to a related 1888 * indirect block and this function will recurse upwards and find the inode 1889 * again. 1890 * 1891 * (*parentp) must be exclusively locked and referenced and can be an inode 1892 * or an existing indirect block within the inode. 1893 * 1894 * On return (*parentp) will be modified to point at the deepest parent chain 1895 * element encountered during the search, as a helper for an insertion or 1896 * deletion. The new (*parentp) will be locked and referenced and the old 1897 * will be unlocked and dereferenced (no change if they are both the same). 1898 * 1899 * The matching chain will be returned exclusively locked. If NOLOCK is 1900 * requested the chain will be returned only referenced. Note that the 1901 * parent chain must always be locked shared or exclusive, matching the 1902 * HAMMER2_LOOKUP_SHARED flag. We can conceivably lock it SHARED temporarily 1903 * when NOLOCK is specified but that complicates matters if *parentp must 1904 * inherit the chain. 1905 * 1906 * NOLOCK also implies NODATA, since an unlocked chain usually has a NULL 1907 * data pointer or can otherwise be in flux. 1908 * 1909 * NULL is returned if no match was found, but (*parentp) will still 1910 * potentially be adjusted. 1911 * 1912 * If a fatal error occurs (typically an I/O error), a dummy chain is 1913 * returned with chain->error and error-identifying information set. This 1914 * chain will assert if you try to do anything fancy with it. 1915 * 1916 * XXX Depending on where the error occurs we should allow continued iteration. 1917 * 1918 * On return (*key_nextp) will point to an iterative value for key_beg. 1919 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 1920 * 1921 * This function will also recurse up the chain if the key is not within the 1922 * current parent's range. (*parentp) can never be set to NULL. An iteration 1923 * can simply allow (*parentp) to float inside the loop. 1924 * 1925 * NOTE! chain->data is not always resolved. By default it will not be 1926 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1927 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1928 * BREF_TYPE_DATA as the device buffer can alias the logical file 1929 * buffer). 1930 */ 1931 1932 hammer2_chain_t * 1933 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1934 hammer2_key_t key_beg, hammer2_key_t key_end, 1935 int *cache_indexp, int flags) 1936 { 1937 hammer2_dev_t *hmp; 1938 hammer2_chain_t *parent; 1939 hammer2_chain_t *chain; 1940 hammer2_blockref_t *base; 1941 hammer2_blockref_t *bref; 1942 hammer2_blockref_t bcopy; 1943 hammer2_key_t scan_beg; 1944 hammer2_key_t scan_end; 1945 int count = 0; 1946 int how_always = HAMMER2_RESOLVE_ALWAYS; 1947 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1948 int how; 1949 int generation; 1950 int maxloops = 300000; 1951 1952 TIMER(8); 1953 1954 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1955 how_maybe = how_always; 1956 how = HAMMER2_RESOLVE_ALWAYS; 1957 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1958 how = HAMMER2_RESOLVE_NEVER; 1959 } else { 1960 how = HAMMER2_RESOLVE_MAYBE; 1961 } 1962 if (flags & HAMMER2_LOOKUP_SHARED) { 1963 how_maybe |= HAMMER2_RESOLVE_SHARED; 1964 how_always |= HAMMER2_RESOLVE_SHARED; 1965 how |= HAMMER2_RESOLVE_SHARED; 1966 } 1967 1968 /* 1969 * Recurse (*parentp) upward if necessary until the parent completely 1970 * encloses the key range or we hit the inode. 1971 * 1972 * This function handles races against the flusher doing a delete- 1973 * duplicate above us and re-homes the parent to the duplicate in 1974 * that case, otherwise we'd wind up recursing down a stale chain. 1975 */ 1976 parent = *parentp; 1977 hmp = parent->hmp; 1978 1979 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1980 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1981 scan_beg = parent->bref.key; 1982 scan_end = scan_beg + 1983 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1984 if (key_beg >= scan_beg && key_end <= scan_end) 1985 break; 1986 parent = hammer2_chain_getparent(parentp, how_maybe); 1987 } 1988 1989 again: 1990 TIMER(9); 1991 if (--maxloops == 0) 1992 panic("hammer2_chain_lookup: maxloops"); 1993 /* 1994 * Locate the blockref array. Currently we do a fully associative 1995 * search through the array. 1996 */ 1997 switch(parent->bref.type) { 1998 case HAMMER2_BREF_TYPE_INODE: 1999 /* 2000 * Special shortcut for embedded data returns the inode 2001 * itself. Callers must detect this condition and access 2002 * the embedded data (the strategy code does this for us). 2003 * 2004 * This is only applicable to regular files and softlinks. 2005 */ 2006 if (parent->data->ipdata.meta.op_flags & 2007 HAMMER2_OPFLAG_DIRECTDATA) { 2008 if (flags & HAMMER2_LOOKUP_NODIRECT) { 2009 chain = NULL; 2010 *key_nextp = key_end + 1; 2011 goto done; 2012 } 2013 hammer2_chain_ref(parent); 2014 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) 2015 hammer2_chain_lock(parent, how_always); 2016 *key_nextp = key_end + 1; 2017 return (parent); 2018 } 2019 base = &parent->data->ipdata.u.blockset.blockref[0]; 2020 count = HAMMER2_SET_COUNT; 2021 break; 2022 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2023 case HAMMER2_BREF_TYPE_INDIRECT: 2024 /* 2025 * Handle MATCHIND on the parent 2026 */ 2027 if (flags & HAMMER2_LOOKUP_MATCHIND) { 2028 scan_beg = parent->bref.key; 2029 scan_end = scan_beg + 2030 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 2031 if (key_beg == scan_beg && key_end == scan_end) { 2032 chain = parent; 2033 hammer2_chain_ref(chain); 2034 hammer2_chain_lock(chain, how_maybe); 2035 *key_nextp = scan_end + 1; 2036 goto done; 2037 } 2038 } 2039 /* 2040 * Optimize indirect blocks in the INITIAL state to avoid 2041 * I/O. 2042 */ 2043 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2044 base = NULL; 2045 } else { 2046 if (parent->data == NULL) { 2047 kprintf("parent->data is NULL %p\n", parent); 2048 while (1) 2049 tsleep(parent, 0, "xxx", 0); 2050 } 2051 base = &parent->data->npdata[0]; 2052 } 2053 count = parent->bytes / sizeof(hammer2_blockref_t); 2054 break; 2055 case HAMMER2_BREF_TYPE_VOLUME: 2056 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2057 count = HAMMER2_SET_COUNT; 2058 break; 2059 case HAMMER2_BREF_TYPE_FREEMAP: 2060 base = &parent->data->blkset.blockref[0]; 2061 count = HAMMER2_SET_COUNT; 2062 break; 2063 default: 2064 kprintf("hammer2_chain_lookup: unrecognized " 2065 "blockref(B) type: %d", 2066 parent->bref.type); 2067 while (1) 2068 tsleep(&base, 0, "dead", 0); 2069 panic("hammer2_chain_lookup: unrecognized " 2070 "blockref(B) type: %d", 2071 parent->bref.type); 2072 base = NULL; /* safety */ 2073 count = 0; /* safety */ 2074 } 2075 TIMER(10); 2076 2077 /* 2078 * Merged scan to find next candidate. 2079 * 2080 * hammer2_base_*() functions require the parent->core.live_* fields 2081 * to be synchronized. 2082 * 2083 * We need to hold the spinlock to access the block array and RB tree 2084 * and to interlock chain creation. 2085 */ 2086 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2087 hammer2_chain_countbrefs(parent, base, count); 2088 2089 TIMER(11); 2090 2091 /* 2092 * Combined search 2093 */ 2094 hammer2_spin_ex(&parent->core.spin); 2095 chain = hammer2_combined_find(parent, base, count, 2096 cache_indexp, key_nextp, 2097 key_beg, key_end, 2098 &bref); 2099 generation = parent->core.generation; 2100 2101 TIMER(12); 2102 2103 /* 2104 * Exhausted parent chain, iterate. 2105 */ 2106 if (bref == NULL) { 2107 TIMER(13); 2108 hammer2_spin_unex(&parent->core.spin); 2109 if (key_beg == key_end) /* short cut single-key case */ 2110 return (NULL); 2111 2112 /* 2113 * Stop if we reached the end of the iteration. 2114 */ 2115 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2116 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2117 return (NULL); 2118 } 2119 2120 /* 2121 * Calculate next key, stop if we reached the end of the 2122 * iteration, otherwise go up one level and loop. 2123 */ 2124 key_beg = parent->bref.key + 2125 ((hammer2_key_t)1 << parent->bref.keybits); 2126 if (key_beg == 0 || key_beg > key_end) 2127 return (NULL); 2128 parent = hammer2_chain_getparent(parentp, how_maybe); 2129 goto again; 2130 } 2131 2132 /* 2133 * Selected from blockref or in-memory chain. 2134 */ 2135 if (chain == NULL) { 2136 TIMER(14); 2137 bcopy = *bref; 2138 hammer2_spin_unex(&parent->core.spin); 2139 chain = hammer2_chain_get(parent, generation, 2140 &bcopy); 2141 if (chain == NULL) { 2142 /* 2143 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 2144 parent, key_beg, key_end); 2145 */ 2146 goto again; 2147 } 2148 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2149 hammer2_chain_drop(chain); 2150 goto again; 2151 } 2152 } else { 2153 TIMER(15); 2154 hammer2_chain_ref(chain); 2155 hammer2_spin_unex(&parent->core.spin); 2156 } 2157 2158 TIMER(16); 2159 /* 2160 * chain is referenced but not locked. We must lock the chain 2161 * to obtain definitive DUPLICATED/DELETED state 2162 */ 2163 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2164 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2165 hammer2_chain_lock(chain, how_maybe); 2166 } else { 2167 hammer2_chain_lock(chain, how); 2168 } 2169 TIMER(17); 2170 2171 /* 2172 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2173 * 2174 * NOTE: Chain's key range is not relevant as there might be 2175 * one-offs within the range that are not deleted. 2176 * 2177 * NOTE: Lookups can race delete-duplicate because 2178 * delete-duplicate does not lock the parent's core 2179 * (they just use the spinlock on the core). We must 2180 * check for races by comparing the DUPLICATED flag before 2181 * releasing the spinlock with the flag after locking the 2182 * chain. 2183 */ 2184 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2185 hammer2_chain_unlock(chain); 2186 hammer2_chain_drop(chain); 2187 key_beg = *key_nextp; 2188 if (key_beg == 0 || key_beg > key_end) 2189 return(NULL); 2190 goto again; 2191 } 2192 TIMER(18); 2193 2194 /* 2195 * If the chain element is an indirect block it becomes the new 2196 * parent and we loop on it. We must maintain our top-down locks 2197 * to prevent the flusher from interfering (i.e. doing a 2198 * delete-duplicate and leaving us recursing down a deleted chain). 2199 * 2200 * The parent always has to be locked with at least RESOLVE_MAYBE 2201 * so we can access its data. It might need a fixup if the caller 2202 * passed incompatible flags. Be careful not to cause a deadlock 2203 * as a data-load requires an exclusive lock. 2204 * 2205 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2206 * range is within the requested key range we return the indirect 2207 * block and do NOT loop. This is usually only used to acquire 2208 * freemap nodes. 2209 */ 2210 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2211 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2212 hammer2_chain_unlock(parent); 2213 hammer2_chain_drop(parent); 2214 *parentp = parent = chain; 2215 goto again; 2216 } 2217 TIMER(19); 2218 done: 2219 /* 2220 * All done, return the chain. 2221 * 2222 * If the caller does not want a locked chain, replace the lock with 2223 * a ref. Perhaps this can eventually be optimized to not obtain the 2224 * lock in the first place for situations where the data does not 2225 * need to be resolved. 2226 */ 2227 if (chain) { 2228 if (flags & HAMMER2_LOOKUP_NOLOCK) 2229 hammer2_chain_unlock(chain); 2230 } 2231 TIMER(20); 2232 2233 return (chain); 2234 } 2235 2236 /* 2237 * After having issued a lookup we can iterate all matching keys. 2238 * 2239 * If chain is non-NULL we continue the iteration from just after it's index. 2240 * 2241 * If chain is NULL we assume the parent was exhausted and continue the 2242 * iteration at the next parent. 2243 * 2244 * If a fatal error occurs (typically an I/O error), a dummy chain is 2245 * returned with chain->error and error-identifying information set. This 2246 * chain will assert if you try to do anything fancy with it. 2247 * 2248 * XXX Depending on where the error occurs we should allow continued iteration. 2249 * 2250 * parent must be locked on entry and remains locked throughout. chain's 2251 * lock status must match flags. Chain is always at least referenced. 2252 * 2253 * WARNING! The MATCHIND flag does not apply to this function. 2254 */ 2255 hammer2_chain_t * 2256 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2257 hammer2_key_t *key_nextp, 2258 hammer2_key_t key_beg, hammer2_key_t key_end, 2259 int *cache_indexp, int flags) 2260 { 2261 hammer2_chain_t *parent; 2262 int how_maybe; 2263 2264 /* 2265 * Calculate locking flags for upward recursion. 2266 */ 2267 how_maybe = HAMMER2_RESOLVE_MAYBE; 2268 if (flags & HAMMER2_LOOKUP_SHARED) 2269 how_maybe |= HAMMER2_RESOLVE_SHARED; 2270 2271 parent = *parentp; 2272 2273 /* 2274 * Calculate the next index and recalculate the parent if necessary. 2275 */ 2276 if (chain) { 2277 key_beg = chain->bref.key + 2278 ((hammer2_key_t)1 << chain->bref.keybits); 2279 if ((flags & (HAMMER2_LOOKUP_NOLOCK | 2280 HAMMER2_LOOKUP_NOUNLOCK)) == 0) { 2281 hammer2_chain_unlock(chain); 2282 } 2283 hammer2_chain_drop(chain); 2284 2285 /* 2286 * chain invalid past this point, but we can still do a 2287 * pointer comparison w/parent. 2288 * 2289 * Any scan where the lookup returned degenerate data embedded 2290 * in the inode has an invalid index and must terminate. 2291 */ 2292 if (chain == parent) 2293 return(NULL); 2294 if (key_beg == 0 || key_beg > key_end) 2295 return(NULL); 2296 chain = NULL; 2297 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2298 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2299 /* 2300 * We reached the end of the iteration. 2301 */ 2302 return (NULL); 2303 } else { 2304 /* 2305 * Continue iteration with next parent unless the current 2306 * parent covers the range. 2307 */ 2308 key_beg = parent->bref.key + 2309 ((hammer2_key_t)1 << parent->bref.keybits); 2310 if (key_beg == 0 || key_beg > key_end) 2311 return (NULL); 2312 parent = hammer2_chain_getparent(parentp, how_maybe); 2313 } 2314 2315 /* 2316 * And execute 2317 */ 2318 return (hammer2_chain_lookup(parentp, key_nextp, 2319 key_beg, key_end, 2320 cache_indexp, flags)); 2321 } 2322 2323 /* 2324 * The raw scan function is similar to lookup/next but does not seek to a key. 2325 * Blockrefs are iterated via first_bref = (parent, NULL) and 2326 * next_chain = (parent, bref). 2327 * 2328 * The passed-in parent must be locked and its data resolved. The function 2329 * nominally returns a locked and referenced *chainp != NULL for chains 2330 * the caller might need to recurse on (and will dipose of any *chainp passed 2331 * in). The caller must check the chain->bref.type either way. 2332 * 2333 * *chainp is not set for leaf elements. 2334 * 2335 * This function takes a pointer to a stack-based bref structure whos 2336 * contents is updated for each iteration. The same pointer is returned, 2337 * or NULL when the iteration is complete. *firstp must be set to 1 for 2338 * the first ieration. This function will set it to 0. 2339 */ 2340 hammer2_blockref_t * 2341 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2342 hammer2_blockref_t *bref, int *firstp, 2343 int *cache_indexp, int flags) 2344 { 2345 hammer2_dev_t *hmp; 2346 hammer2_blockref_t *base; 2347 hammer2_blockref_t *bref_ptr; 2348 hammer2_key_t key; 2349 hammer2_key_t next_key; 2350 hammer2_chain_t *chain = NULL; 2351 int count = 0; 2352 int how_always = HAMMER2_RESOLVE_ALWAYS; 2353 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2354 int how; 2355 int generation; 2356 int maxloops = 300000; 2357 2358 hmp = parent->hmp; 2359 2360 /* 2361 * Scan flags borrowed from lookup. 2362 */ 2363 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2364 how_maybe = how_always; 2365 how = HAMMER2_RESOLVE_ALWAYS; 2366 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2367 how = HAMMER2_RESOLVE_NEVER; 2368 } else { 2369 how = HAMMER2_RESOLVE_MAYBE; 2370 } 2371 if (flags & HAMMER2_LOOKUP_SHARED) { 2372 how_maybe |= HAMMER2_RESOLVE_SHARED; 2373 how_always |= HAMMER2_RESOLVE_SHARED; 2374 how |= HAMMER2_RESOLVE_SHARED; 2375 } 2376 2377 /* 2378 * Calculate key to locate first/next element, unlocking the previous 2379 * element as we go. Be careful, the key calculation can overflow. 2380 * 2381 * (also reset bref to NULL) 2382 */ 2383 if (*firstp) { 2384 key = 0; 2385 *firstp = 0; 2386 } else { 2387 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2388 if ((chain = *chainp) != NULL) { 2389 *chainp = NULL; 2390 hammer2_chain_unlock(chain); 2391 hammer2_chain_drop(chain); 2392 chain = NULL; 2393 } 2394 if (key == 0) { 2395 bref = NULL; 2396 goto done; 2397 } 2398 } 2399 2400 again: 2401 KKASSERT(parent->error == 0); /* XXX case not handled yet */ 2402 if (--maxloops == 0) 2403 panic("hammer2_chain_scan: maxloops"); 2404 /* 2405 * Locate the blockref array. Currently we do a fully associative 2406 * search through the array. 2407 */ 2408 switch(parent->bref.type) { 2409 case HAMMER2_BREF_TYPE_INODE: 2410 /* 2411 * An inode with embedded data has no sub-chains. 2412 * 2413 * WARNING! Bulk scan code may pass a static chain marked 2414 * as BREF_TYPE_INODE with a copy of the volume 2415 * root blockset to snapshot the volume. 2416 */ 2417 if (parent->data->ipdata.meta.op_flags & 2418 HAMMER2_OPFLAG_DIRECTDATA) { 2419 bref = NULL; 2420 goto done; 2421 } 2422 base = &parent->data->ipdata.u.blockset.blockref[0]; 2423 count = HAMMER2_SET_COUNT; 2424 break; 2425 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2426 case HAMMER2_BREF_TYPE_INDIRECT: 2427 /* 2428 * Optimize indirect blocks in the INITIAL state to avoid 2429 * I/O. 2430 */ 2431 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2432 base = NULL; 2433 } else { 2434 if (parent->data == NULL) 2435 panic("parent->data is NULL"); 2436 base = &parent->data->npdata[0]; 2437 } 2438 count = parent->bytes / sizeof(hammer2_blockref_t); 2439 break; 2440 case HAMMER2_BREF_TYPE_VOLUME: 2441 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2442 count = HAMMER2_SET_COUNT; 2443 break; 2444 case HAMMER2_BREF_TYPE_FREEMAP: 2445 base = &parent->data->blkset.blockref[0]; 2446 count = HAMMER2_SET_COUNT; 2447 break; 2448 default: 2449 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2450 parent->bref.type); 2451 base = NULL; /* safety */ 2452 count = 0; /* safety */ 2453 } 2454 2455 /* 2456 * Merged scan to find next candidate. 2457 * 2458 * hammer2_base_*() functions require the parent->core.live_* fields 2459 * to be synchronized. 2460 * 2461 * We need to hold the spinlock to access the block array and RB tree 2462 * and to interlock chain creation. 2463 */ 2464 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2465 hammer2_chain_countbrefs(parent, base, count); 2466 2467 next_key = 0; 2468 bref_ptr = NULL; 2469 hammer2_spin_ex(&parent->core.spin); 2470 chain = hammer2_combined_find(parent, base, count, 2471 cache_indexp, &next_key, 2472 key, HAMMER2_KEY_MAX, 2473 &bref_ptr); 2474 generation = parent->core.generation; 2475 2476 /* 2477 * Exhausted parent chain, we're done. 2478 */ 2479 if (bref_ptr == NULL) { 2480 hammer2_spin_unex(&parent->core.spin); 2481 KKASSERT(chain == NULL); 2482 bref = NULL; 2483 goto done; 2484 } 2485 2486 /* 2487 * Copy into the supplied stack-based blockref. 2488 */ 2489 *bref = *bref_ptr; 2490 2491 /* 2492 * Selected from blockref or in-memory chain. 2493 */ 2494 if (chain == NULL) { 2495 switch(bref->type) { 2496 case HAMMER2_BREF_TYPE_INODE: 2497 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2498 case HAMMER2_BREF_TYPE_INDIRECT: 2499 case HAMMER2_BREF_TYPE_VOLUME: 2500 case HAMMER2_BREF_TYPE_FREEMAP: 2501 /* 2502 * Recursion, always get the chain 2503 */ 2504 hammer2_spin_unex(&parent->core.spin); 2505 chain = hammer2_chain_get(parent, generation, bref); 2506 if (chain == NULL) { 2507 kprintf("retry scan parent %p keys %016jx\n", 2508 parent, key); 2509 goto again; 2510 } 2511 if (bcmp(bref, bref_ptr, sizeof(*bref))) { 2512 hammer2_chain_drop(chain); 2513 chain = NULL; 2514 goto again; 2515 } 2516 break; 2517 default: 2518 /* 2519 * No recursion, do not waste time instantiating 2520 * a chain, just iterate using the bref. 2521 */ 2522 hammer2_spin_unex(&parent->core.spin); 2523 break; 2524 } 2525 } else { 2526 /* 2527 * Recursion or not we need the chain in order to supply 2528 * the bref. 2529 */ 2530 hammer2_chain_ref(chain); 2531 hammer2_spin_unex(&parent->core.spin); 2532 } 2533 2534 /* 2535 * chain is referenced but not locked. We must lock the chain 2536 * to obtain definitive DUPLICATED/DELETED state 2537 */ 2538 if (chain) 2539 hammer2_chain_lock(chain, how); 2540 2541 /* 2542 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2543 * 2544 * NOTE: chain's key range is not relevant as there might be 2545 * one-offs within the range that are not deleted. 2546 * 2547 * NOTE: XXX this could create problems with scans used in 2548 * situations other than mount-time recovery. 2549 * 2550 * NOTE: Lookups can race delete-duplicate because 2551 * delete-duplicate does not lock the parent's core 2552 * (they just use the spinlock on the core). We must 2553 * check for races by comparing the DUPLICATED flag before 2554 * releasing the spinlock with the flag after locking the 2555 * chain. 2556 */ 2557 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 2558 hammer2_chain_unlock(chain); 2559 hammer2_chain_drop(chain); 2560 chain = NULL; 2561 2562 key = next_key; 2563 if (key == 0) { 2564 bref = NULL; 2565 goto done; 2566 } 2567 goto again; 2568 } 2569 2570 done: 2571 /* 2572 * All done, return the bref or NULL, supply chain if necessary. 2573 */ 2574 if (chain) 2575 *chainp = chain; 2576 return (bref); 2577 } 2578 2579 /* 2580 * Create and return a new hammer2 system memory structure of the specified 2581 * key, type and size and insert it under (*parentp). This is a full 2582 * insertion, based on the supplied key/keybits, and may involve creating 2583 * indirect blocks and moving other chains around via delete/duplicate. 2584 * 2585 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2586 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2587 * FULL. This typically means that the caller is creating the chain after 2588 * doing a hammer2_chain_lookup(). 2589 * 2590 * (*parentp) must be exclusive locked and may be replaced on return 2591 * depending on how much work the function had to do. 2592 * 2593 * (*parentp) must not be errored or this function will assert. 2594 * 2595 * (*chainp) usually starts out NULL and returns the newly created chain, 2596 * but if the caller desires the caller may allocate a disconnected chain 2597 * and pass it in instead. 2598 * 2599 * This function should NOT be used to insert INDIRECT blocks. It is 2600 * typically used to create/insert inodes and data blocks. 2601 * 2602 * Caller must pass-in an exclusively locked parent the new chain is to 2603 * be inserted under, and optionally pass-in a disconnected, exclusively 2604 * locked chain to insert (else we create a new chain). The function will 2605 * adjust (*parentp) as necessary, create or connect the chain, and 2606 * return an exclusively locked chain in *chainp. 2607 * 2608 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 2609 * and will be reassigned. 2610 */ 2611 int 2612 hammer2_chain_create(hammer2_chain_t **parentp, 2613 hammer2_chain_t **chainp, hammer2_pfs_t *pmp, 2614 hammer2_key_t key, int keybits, int type, size_t bytes, 2615 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 2616 { 2617 hammer2_dev_t *hmp; 2618 hammer2_chain_t *chain; 2619 hammer2_chain_t *parent; 2620 hammer2_blockref_t *base; 2621 hammer2_blockref_t dummy; 2622 int allocated = 0; 2623 int error = 0; 2624 int count; 2625 int maxloops = 300000; 2626 2627 /* 2628 * Topology may be crossing a PFS boundary. 2629 */ 2630 parent = *parentp; 2631 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2632 KKASSERT(parent->error == 0); 2633 hmp = parent->hmp; 2634 chain = *chainp; 2635 2636 if (chain == NULL) { 2637 /* 2638 * First allocate media space and construct the dummy bref, 2639 * then allocate the in-memory chain structure. Set the 2640 * INITIAL flag for fresh chains which do not have embedded 2641 * data. 2642 * 2643 * XXX for now set the check mode of the child based on 2644 * the parent or, if the parent is an inode, the 2645 * specification in the inode. 2646 */ 2647 bzero(&dummy, sizeof(dummy)); 2648 dummy.type = type; 2649 dummy.key = key; 2650 dummy.keybits = keybits; 2651 dummy.data_off = hammer2_getradix(bytes); 2652 dummy.methods = parent->bref.methods; 2653 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 2654 parent->data) { 2655 dummy.methods &= ~HAMMER2_ENC_CHECK(-1); 2656 dummy.methods |= HAMMER2_ENC_CHECK( 2657 parent->data->ipdata.meta.check_algo); 2658 } 2659 2660 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 2661 2662 /* 2663 * Lock the chain manually, chain_lock will load the chain 2664 * which we do NOT want to do. (note: chain->refs is set 2665 * to 1 by chain_alloc() for us, but lockcnt is not). 2666 */ 2667 chain->lockcnt = 1; 2668 hammer2_mtx_ex(&chain->lock); 2669 allocated = 1; 2670 ++curthread->td_tracker; 2671 2672 /* 2673 * Set INITIAL to optimize I/O. The flag will generally be 2674 * processed when we call hammer2_chain_modify(). 2675 * 2676 * Recalculate bytes to reflect the actual media block 2677 * allocation. 2678 */ 2679 bytes = (hammer2_off_t)1 << 2680 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2681 chain->bytes = bytes; 2682 2683 switch(type) { 2684 case HAMMER2_BREF_TYPE_VOLUME: 2685 case HAMMER2_BREF_TYPE_FREEMAP: 2686 panic("hammer2_chain_create: called with volume type"); 2687 break; 2688 case HAMMER2_BREF_TYPE_INDIRECT: 2689 panic("hammer2_chain_create: cannot be used to" 2690 "create indirect block"); 2691 break; 2692 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2693 panic("hammer2_chain_create: cannot be used to" 2694 "create freemap root or node"); 2695 break; 2696 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2697 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2698 /* fall through */ 2699 case HAMMER2_BREF_TYPE_INODE: 2700 case HAMMER2_BREF_TYPE_DATA: 2701 default: 2702 /* 2703 * leave chain->data NULL, set INITIAL 2704 */ 2705 KKASSERT(chain->data == NULL); 2706 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2707 break; 2708 } 2709 } else { 2710 /* 2711 * We are reattaching a previously deleted chain, possibly 2712 * under a new parent and possibly with a new key/keybits. 2713 * The chain does not have to be in a modified state. The 2714 * UPDATE flag will be set later on in this routine. 2715 * 2716 * Do NOT mess with the current state of the INITIAL flag. 2717 */ 2718 chain->bref.key = key; 2719 chain->bref.keybits = keybits; 2720 if (chain->flags & HAMMER2_CHAIN_DELETED) 2721 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2722 KKASSERT(chain->parent == NULL); 2723 } 2724 if (flags & HAMMER2_INSERT_PFSROOT) 2725 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 2726 else 2727 chain->bref.flags &= ~HAMMER2_BREF_FLAG_PFSROOT; 2728 2729 /* 2730 * Calculate how many entries we have in the blockref array and 2731 * determine if an indirect block is required. 2732 */ 2733 again: 2734 if (--maxloops == 0) 2735 panic("hammer2_chain_create: maxloops"); 2736 2737 switch(parent->bref.type) { 2738 case HAMMER2_BREF_TYPE_INODE: 2739 if ((parent->data->ipdata.meta.op_flags & 2740 HAMMER2_OPFLAG_DIRECTDATA) != 0) { 2741 kprintf("hammer2: parent set for direct-data! " 2742 "pkey=%016jx ckey=%016jx\n", 2743 parent->bref.key, 2744 chain->bref.key); 2745 } 2746 KKASSERT((parent->data->ipdata.meta.op_flags & 2747 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2748 KKASSERT(parent->data != NULL); 2749 base = &parent->data->ipdata.u.blockset.blockref[0]; 2750 count = HAMMER2_SET_COUNT; 2751 break; 2752 case HAMMER2_BREF_TYPE_INDIRECT: 2753 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2754 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2755 base = NULL; 2756 else 2757 base = &parent->data->npdata[0]; 2758 count = parent->bytes / sizeof(hammer2_blockref_t); 2759 break; 2760 case HAMMER2_BREF_TYPE_VOLUME: 2761 KKASSERT(parent->data != NULL); 2762 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2763 count = HAMMER2_SET_COUNT; 2764 break; 2765 case HAMMER2_BREF_TYPE_FREEMAP: 2766 KKASSERT(parent->data != NULL); 2767 base = &parent->data->blkset.blockref[0]; 2768 count = HAMMER2_SET_COUNT; 2769 break; 2770 default: 2771 panic("hammer2_chain_create: unrecognized blockref type: %d", 2772 parent->bref.type); 2773 base = NULL; 2774 count = 0; 2775 break; 2776 } 2777 2778 /* 2779 * Make sure we've counted the brefs 2780 */ 2781 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2782 hammer2_chain_countbrefs(parent, base, count); 2783 2784 KKASSERT(parent->core.live_count >= 0 && 2785 parent->core.live_count <= count); 2786 2787 /* 2788 * If no free blockref could be found we must create an indirect 2789 * block and move a number of blockrefs into it. With the parent 2790 * locked we can safely lock each child in order to delete+duplicate 2791 * it without causing a deadlock. 2792 * 2793 * This may return the new indirect block or the old parent depending 2794 * on where the key falls. NULL is returned on error. 2795 */ 2796 if (parent->core.live_count == count) { 2797 hammer2_chain_t *nparent; 2798 2799 nparent = hammer2_chain_create_indirect(parent, key, keybits, 2800 mtid, type, &error); 2801 if (nparent == NULL) { 2802 if (allocated) 2803 hammer2_chain_drop(chain); 2804 chain = NULL; 2805 goto done; 2806 } 2807 if (parent != nparent) { 2808 hammer2_chain_unlock(parent); 2809 hammer2_chain_drop(parent); 2810 parent = *parentp = nparent; 2811 } 2812 goto again; 2813 } 2814 2815 /* 2816 * Link the chain into its parent. 2817 */ 2818 if (chain->parent != NULL) 2819 panic("hammer2: hammer2_chain_create: chain already connected"); 2820 KKASSERT(chain->parent == NULL); 2821 hammer2_chain_insert(parent, chain, 2822 HAMMER2_CHAIN_INSERT_SPIN | 2823 HAMMER2_CHAIN_INSERT_LIVE, 2824 0); 2825 2826 if (allocated) { 2827 /* 2828 * Mark the newly created chain modified. This will cause 2829 * UPDATE to be set and process the INITIAL flag. 2830 * 2831 * Device buffers are not instantiated for DATA elements 2832 * as these are handled by logical buffers. 2833 * 2834 * Indirect and freemap node indirect blocks are handled 2835 * by hammer2_chain_create_indirect() and not by this 2836 * function. 2837 * 2838 * Data for all other bref types is expected to be 2839 * instantiated (INODE, LEAF). 2840 */ 2841 switch(chain->bref.type) { 2842 case HAMMER2_BREF_TYPE_DATA: 2843 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2844 case HAMMER2_BREF_TYPE_INODE: 2845 hammer2_chain_modify(chain, mtid, dedup_off, 2846 HAMMER2_MODIFY_OPTDATA); 2847 break; 2848 default: 2849 /* 2850 * Remaining types are not supported by this function. 2851 * In particular, INDIRECT and LEAF_NODE types are 2852 * handled by create_indirect(). 2853 */ 2854 panic("hammer2_chain_create: bad type: %d", 2855 chain->bref.type); 2856 /* NOT REACHED */ 2857 break; 2858 } 2859 } else { 2860 /* 2861 * When reconnecting a chain we must set UPDATE and 2862 * setflush so the flush recognizes that it must update 2863 * the bref in the parent. 2864 */ 2865 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) 2866 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 2867 } 2868 2869 /* 2870 * We must setflush(parent) to ensure that it recurses through to 2871 * chain. setflush(chain) might not work because ONFLUSH is possibly 2872 * already set in the chain (so it won't recurse up to set it in the 2873 * parent). 2874 */ 2875 hammer2_chain_setflush(parent); 2876 2877 done: 2878 *chainp = chain; 2879 2880 return (error); 2881 } 2882 2883 /* 2884 * Move the chain from its old parent to a new parent. The chain must have 2885 * already been deleted or already disconnected (or never associated) with 2886 * a parent. The chain is reassociated with the new parent and the deleted 2887 * flag will be cleared (no longer deleted). The chain's modification state 2888 * is not altered. 2889 * 2890 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 2891 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2892 * FULL. This typically means that the caller is creating the chain after 2893 * doing a hammer2_chain_lookup(). 2894 * 2895 * A non-NULL bref is typically passed when key and keybits must be overridden. 2896 * Note that hammer2_cluster_duplicate() *ONLY* uses the key and keybits fields 2897 * from a passed-in bref and uses the old chain's bref for everything else. 2898 * 2899 * Neither (parent) or (chain) can be errored. 2900 * 2901 * If (parent) is non-NULL then the new duplicated chain is inserted under 2902 * the parent. 2903 * 2904 * If (parent) is NULL then the newly duplicated chain is not inserted 2905 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2906 * passing into hammer2_chain_create() after this function returns). 2907 * 2908 * WARNING! This function calls create which means it can insert indirect 2909 * blocks. This can cause other unrelated chains in the parent to 2910 * be moved to a newly inserted indirect block in addition to the 2911 * specific chain. 2912 */ 2913 void 2914 hammer2_chain_rename(hammer2_blockref_t *bref, 2915 hammer2_chain_t **parentp, hammer2_chain_t *chain, 2916 hammer2_tid_t mtid, int flags) 2917 { 2918 hammer2_dev_t *hmp; 2919 hammer2_chain_t *parent; 2920 size_t bytes; 2921 2922 /* 2923 * WARNING! We should never resolve DATA to device buffers 2924 * (XXX allow it if the caller did?), and since 2925 * we currently do not have the logical buffer cache 2926 * buffer in-hand to fix its cached physical offset 2927 * we also force the modify code to not COW it. XXX 2928 */ 2929 hmp = chain->hmp; 2930 KKASSERT(chain->parent == NULL); 2931 KKASSERT(chain->error == 0); 2932 2933 /* 2934 * Now create a duplicate of the chain structure, associating 2935 * it with the same core, making it the same size, pointing it 2936 * to the same bref (the same media block). 2937 */ 2938 if (bref == NULL) 2939 bref = &chain->bref; 2940 bytes = (hammer2_off_t)1 << 2941 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2942 2943 /* 2944 * If parent is not NULL the duplicated chain will be entered under 2945 * the parent and the UPDATE bit set to tell flush to update 2946 * the blockref. 2947 * 2948 * We must setflush(parent) to ensure that it recurses through to 2949 * chain. setflush(chain) might not work because ONFLUSH is possibly 2950 * already set in the chain (so it won't recurse up to set it in the 2951 * parent). 2952 * 2953 * Having both chains locked is extremely important for atomicy. 2954 */ 2955 if (parentp && (parent = *parentp) != NULL) { 2956 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2957 KKASSERT(parent->refs > 0); 2958 KKASSERT(parent->error == 0); 2959 2960 hammer2_chain_create(parentp, &chain, chain->pmp, 2961 bref->key, bref->keybits, bref->type, 2962 chain->bytes, mtid, 0, flags); 2963 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 2964 hammer2_chain_setflush(*parentp); 2965 } 2966 } 2967 2968 /* 2969 * Helper function for deleting chains. 2970 * 2971 * The chain is removed from the live view (the RBTREE) as well as the parent's 2972 * blockmap. Both chain and its parent must be locked. 2973 * 2974 * parent may not be errored. chain can be errored. 2975 */ 2976 static void 2977 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 2978 hammer2_tid_t mtid, int flags) 2979 { 2980 hammer2_dev_t *hmp; 2981 2982 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2983 HAMMER2_CHAIN_FICTITIOUS)) == 0); 2984 KKASSERT(chain->parent == parent); 2985 hmp = chain->hmp; 2986 2987 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2988 /* 2989 * Chain is blockmapped, so there must be a parent. 2990 * Atomically remove the chain from the parent and remove 2991 * the blockmap entry. The parent must be set modified 2992 * to remove the blockmap entry. 2993 */ 2994 hammer2_blockref_t *base; 2995 int count; 2996 2997 KKASSERT(parent != NULL); 2998 KKASSERT(parent->error == 0); 2999 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 3000 hammer2_chain_modify(parent, mtid, 0, HAMMER2_MODIFY_OPTDATA); 3001 3002 /* 3003 * Calculate blockmap pointer 3004 */ 3005 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 3006 hammer2_spin_ex(&parent->core.spin); 3007 3008 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3009 atomic_add_int(&parent->core.live_count, -1); 3010 ++parent->core.generation; 3011 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3012 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3013 --parent->core.chain_count; 3014 chain->parent = NULL; 3015 3016 switch(parent->bref.type) { 3017 case HAMMER2_BREF_TYPE_INODE: 3018 /* 3019 * Access the inode's block array. However, there 3020 * is no block array if the inode is flagged 3021 * DIRECTDATA. The DIRECTDATA case typicaly only 3022 * occurs when a hardlink has been shifted up the 3023 * tree and the original inode gets replaced with 3024 * an OBJTYPE_HARDLINK placeholding inode. 3025 */ 3026 if (parent->data && 3027 (parent->data->ipdata.meta.op_flags & 3028 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 3029 base = 3030 &parent->data->ipdata.u.blockset.blockref[0]; 3031 } else { 3032 base = NULL; 3033 } 3034 count = HAMMER2_SET_COUNT; 3035 break; 3036 case HAMMER2_BREF_TYPE_INDIRECT: 3037 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3038 if (parent->data) 3039 base = &parent->data->npdata[0]; 3040 else 3041 base = NULL; 3042 count = parent->bytes / sizeof(hammer2_blockref_t); 3043 break; 3044 case HAMMER2_BREF_TYPE_VOLUME: 3045 base = &parent->data->voldata. 3046 sroot_blockset.blockref[0]; 3047 count = HAMMER2_SET_COUNT; 3048 break; 3049 case HAMMER2_BREF_TYPE_FREEMAP: 3050 base = &parent->data->blkset.blockref[0]; 3051 count = HAMMER2_SET_COUNT; 3052 break; 3053 default: 3054 base = NULL; 3055 count = 0; 3056 panic("hammer2_flush_pass2: " 3057 "unrecognized blockref type: %d", 3058 parent->bref.type); 3059 } 3060 3061 /* 3062 * delete blockmapped chain from its parent. 3063 * 3064 * The parent is not affected by any statistics in chain 3065 * which are pending synchronization. That is, there is 3066 * nothing to undo in the parent since they have not yet 3067 * been incorporated into the parent. 3068 * 3069 * The parent is affected by statistics stored in inodes. 3070 * Those have already been synchronized, so they must be 3071 * undone. XXX split update possible w/delete in middle? 3072 */ 3073 if (base) { 3074 int cache_index = -1; 3075 hammer2_base_delete(parent, base, count, 3076 &cache_index, chain); 3077 } 3078 hammer2_spin_unex(&parent->core.spin); 3079 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 3080 /* 3081 * Chain is not blockmapped but a parent is present. 3082 * Atomically remove the chain from the parent. There is 3083 * no blockmap entry to remove. 3084 * 3085 * Because chain was associated with a parent but not 3086 * synchronized, the chain's *_count_up fields contain 3087 * inode adjustment statistics which must be undone. 3088 */ 3089 hammer2_spin_ex(&parent->core.spin); 3090 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3091 atomic_add_int(&parent->core.live_count, -1); 3092 ++parent->core.generation; 3093 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 3094 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 3095 --parent->core.chain_count; 3096 chain->parent = NULL; 3097 hammer2_spin_unex(&parent->core.spin); 3098 } else { 3099 /* 3100 * Chain is not blockmapped and has no parent. This 3101 * is a degenerate case. 3102 */ 3103 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 3104 } 3105 } 3106 3107 /* 3108 * Create an indirect block that covers one or more of the elements in the 3109 * current parent. Either returns the existing parent with no locking or 3110 * ref changes or returns the new indirect block locked and referenced 3111 * and leaving the original parent lock/ref intact as well. 3112 * 3113 * If an error occurs, NULL is returned and *errorp is set to the error. 3114 * 3115 * The returned chain depends on where the specified key falls. 3116 * 3117 * The key/keybits for the indirect mode only needs to follow three rules: 3118 * 3119 * (1) That all elements underneath it fit within its key space and 3120 * 3121 * (2) That all elements outside it are outside its key space. 3122 * 3123 * (3) When creating the new indirect block any elements in the current 3124 * parent that fit within the new indirect block's keyspace must be 3125 * moved into the new indirect block. 3126 * 3127 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3128 * keyspace the the current parent, but lookup/iteration rules will 3129 * ensure (and must ensure) that rule (2) for all parents leading up 3130 * to the nearest inode or the root volume header is adhered to. This 3131 * is accomplished by always recursing through matching keyspaces in 3132 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3133 * 3134 * The current implementation calculates the current worst-case keyspace by 3135 * iterating the current parent and then divides it into two halves, choosing 3136 * whichever half has the most elements (not necessarily the half containing 3137 * the requested key). 3138 * 3139 * We can also opt to use the half with the least number of elements. This 3140 * causes lower-numbered keys (aka logical file offsets) to recurse through 3141 * fewer indirect blocks and higher-numbered keys to recurse through more. 3142 * This also has the risk of not moving enough elements to the new indirect 3143 * block and being forced to create several indirect blocks before the element 3144 * can be inserted. 3145 * 3146 * Must be called with an exclusively locked parent. 3147 */ 3148 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3149 hammer2_key_t *keyp, int keybits, 3150 hammer2_blockref_t *base, int count); 3151 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 3152 hammer2_key_t *keyp, int keybits, 3153 hammer2_blockref_t *base, int count); 3154 static 3155 hammer2_chain_t * 3156 hammer2_chain_create_indirect(hammer2_chain_t *parent, 3157 hammer2_key_t create_key, int create_bits, 3158 hammer2_tid_t mtid, int for_type, int *errorp) 3159 { 3160 hammer2_dev_t *hmp; 3161 hammer2_blockref_t *base; 3162 hammer2_blockref_t *bref; 3163 hammer2_blockref_t bcopy; 3164 hammer2_chain_t *chain; 3165 hammer2_chain_t *ichain; 3166 hammer2_chain_t dummy; 3167 hammer2_key_t key = create_key; 3168 hammer2_key_t key_beg; 3169 hammer2_key_t key_end; 3170 hammer2_key_t key_next; 3171 int keybits = create_bits; 3172 int count; 3173 int nbytes; 3174 int cache_index; 3175 int loops; 3176 int reason; 3177 int generation; 3178 int maxloops = 300000; 3179 3180 /* 3181 * Calculate the base blockref pointer or NULL if the chain 3182 * is known to be empty. We need to calculate the array count 3183 * for RB lookups either way. 3184 */ 3185 hmp = parent->hmp; 3186 *errorp = 0; 3187 KKASSERT(hammer2_mtx_owned(&parent->lock)); 3188 3189 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 3190 base = hammer2_chain_base_and_count(parent, &count); 3191 3192 /* 3193 * dummy used in later chain allocation (no longer used for lookups). 3194 */ 3195 bzero(&dummy, sizeof(dummy)); 3196 3197 /* 3198 * When creating an indirect block for a freemap node or leaf 3199 * the key/keybits must be fitted to static radix levels because 3200 * particular radix levels use particular reserved blocks in the 3201 * related zone. 3202 * 3203 * This routine calculates the key/radix of the indirect block 3204 * we need to create, and whether it is on the high-side or the 3205 * low-side. 3206 */ 3207 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3208 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3209 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3210 base, count); 3211 } else { 3212 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 3213 base, count); 3214 } 3215 3216 /* 3217 * Normalize the key for the radix being represented, keeping the 3218 * high bits and throwing away the low bits. 3219 */ 3220 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3221 3222 /* 3223 * How big should our new indirect block be? It has to be at least 3224 * as large as its parent. 3225 * 3226 * The freemap uses a specific indirect block size. 3227 * 3228 * The first indirect block level down from an inode typically 3229 * uses LBUFSIZE (16384), else it uses PBUFSIZE (65536). 3230 */ 3231 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3232 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3233 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 3234 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 3235 nbytes = HAMMER2_IND_BYTES_MIN; 3236 } else { 3237 nbytes = HAMMER2_IND_BYTES_MAX; 3238 } 3239 if (nbytes < count * sizeof(hammer2_blockref_t)) { 3240 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 3241 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 3242 nbytes = count * sizeof(hammer2_blockref_t); 3243 } 3244 3245 /* 3246 * Ok, create our new indirect block 3247 */ 3248 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3249 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3250 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3251 } else { 3252 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 3253 } 3254 dummy.bref.key = key; 3255 dummy.bref.keybits = keybits; 3256 dummy.bref.data_off = hammer2_getradix(nbytes); 3257 dummy.bref.methods = parent->bref.methods; 3258 3259 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 3260 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3261 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3262 /* ichain has one ref at this point */ 3263 3264 /* 3265 * We have to mark it modified to allocate its block, but use 3266 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3267 * it won't be acted upon by the flush code. 3268 */ 3269 hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 3270 3271 /* 3272 * Iterate the original parent and move the matching brefs into 3273 * the new indirect block. 3274 * 3275 * XXX handle flushes. 3276 */ 3277 key_beg = 0; 3278 key_end = HAMMER2_KEY_MAX; 3279 cache_index = 0; 3280 hammer2_spin_ex(&parent->core.spin); 3281 loops = 0; 3282 reason = 0; 3283 3284 for (;;) { 3285 /* 3286 * Parent may have been modified, relocating its block array. 3287 * Reload the base pointer. 3288 */ 3289 base = hammer2_chain_base_and_count(parent, &count); 3290 3291 if (++loops > 100000) { 3292 hammer2_spin_unex(&parent->core.spin); 3293 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3294 reason, parent, base, count, key_next); 3295 } 3296 3297 /* 3298 * NOTE: spinlock stays intact, returned chain (if not NULL) 3299 * is not referenced or locked which means that we 3300 * cannot safely check its flagged / deletion status 3301 * until we lock it. 3302 */ 3303 chain = hammer2_combined_find(parent, base, count, 3304 &cache_index, &key_next, 3305 key_beg, key_end, 3306 &bref); 3307 generation = parent->core.generation; 3308 if (bref == NULL) 3309 break; 3310 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3311 3312 /* 3313 * Skip keys that are not within the key/radix of the new 3314 * indirect block. They stay in the parent. 3315 */ 3316 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3317 (key ^ bref->key)) != 0) { 3318 goto next_key_spinlocked; 3319 } 3320 3321 /* 3322 * Load the new indirect block by acquiring the related 3323 * chains (potentially from media as it might not be 3324 * in-memory). Then move it to the new parent (ichain) 3325 * via DELETE-DUPLICATE. 3326 * 3327 * chain is referenced but not locked. We must lock the 3328 * chain to obtain definitive DUPLICATED/DELETED state 3329 */ 3330 if (chain) { 3331 /* 3332 * Use chain already present in the RBTREE 3333 */ 3334 hammer2_chain_ref(chain); 3335 hammer2_spin_unex(&parent->core.spin); 3336 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3337 } else { 3338 /* 3339 * Get chain for blockref element. _get returns NULL 3340 * on insertion race. 3341 */ 3342 bcopy = *bref; 3343 hammer2_spin_unex(&parent->core.spin); 3344 chain = hammer2_chain_get(parent, generation, &bcopy); 3345 if (chain == NULL) { 3346 reason = 1; 3347 hammer2_spin_ex(&parent->core.spin); 3348 continue; 3349 } 3350 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3351 kprintf("REASON 2\n"); 3352 reason = 2; 3353 hammer2_chain_drop(chain); 3354 hammer2_spin_ex(&parent->core.spin); 3355 continue; 3356 } 3357 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3358 } 3359 3360 /* 3361 * This is always live so if the chain has been deleted 3362 * we raced someone and we have to retry. 3363 * 3364 * NOTE: Lookups can race delete-duplicate because 3365 * delete-duplicate does not lock the parent's core 3366 * (they just use the spinlock on the core). We must 3367 * check for races by comparing the DUPLICATED flag before 3368 * releasing the spinlock with the flag after locking the 3369 * chain. 3370 * 3371 * (note reversed logic for this one) 3372 */ 3373 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3374 hammer2_chain_unlock(chain); 3375 hammer2_chain_drop(chain); 3376 goto next_key; 3377 } 3378 3379 /* 3380 * Shift the chain to the indirect block. 3381 * 3382 * WARNING! No reason for us to load chain data, pass NOSTATS 3383 * to prevent delete/insert from trying to access 3384 * inode stats (and thus asserting if there is no 3385 * chain->data loaded). 3386 * 3387 * WARNING! The (parent, chain) deletion may modify the parent 3388 * and invalidate the base pointer. 3389 */ 3390 hammer2_chain_delete(parent, chain, mtid, 0); 3391 hammer2_chain_rename(NULL, &ichain, chain, mtid, 0); 3392 hammer2_chain_unlock(chain); 3393 hammer2_chain_drop(chain); 3394 KKASSERT(parent->refs > 0); 3395 chain = NULL; 3396 base = NULL; /* safety */ 3397 next_key: 3398 hammer2_spin_ex(&parent->core.spin); 3399 next_key_spinlocked: 3400 if (--maxloops == 0) 3401 panic("hammer2_chain_create_indirect: maxloops"); 3402 reason = 4; 3403 if (key_next == 0 || key_next > key_end) 3404 break; 3405 key_beg = key_next; 3406 /* loop */ 3407 } 3408 hammer2_spin_unex(&parent->core.spin); 3409 3410 /* 3411 * Insert the new indirect block into the parent now that we've 3412 * cleared out some entries in the parent. We calculated a good 3413 * insertion index in the loop above (ichain->index). 3414 * 3415 * We don't have to set UPDATE here because we mark ichain 3416 * modified down below (so the normal modified -> flush -> set-moved 3417 * sequence applies). 3418 * 3419 * The insertion shouldn't race as this is a completely new block 3420 * and the parent is locked. 3421 */ 3422 base = NULL; /* safety, parent modify may change address */ 3423 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3424 hammer2_chain_insert(parent, ichain, 3425 HAMMER2_CHAIN_INSERT_SPIN | 3426 HAMMER2_CHAIN_INSERT_LIVE, 3427 0); 3428 3429 /* 3430 * Make sure flushes propogate after our manual insertion. 3431 */ 3432 hammer2_chain_setflush(ichain); 3433 hammer2_chain_setflush(parent); 3434 3435 /* 3436 * Figure out what to return. 3437 */ 3438 if (~(((hammer2_key_t)1 << keybits) - 1) & 3439 (create_key ^ key)) { 3440 /* 3441 * Key being created is outside the key range, 3442 * return the original parent. 3443 */ 3444 hammer2_chain_unlock(ichain); 3445 hammer2_chain_drop(ichain); 3446 } else { 3447 /* 3448 * Otherwise its in the range, return the new parent. 3449 * (leave both the new and old parent locked). 3450 */ 3451 parent = ichain; 3452 } 3453 3454 return(parent); 3455 } 3456 3457 /* 3458 * Calculate the keybits and highside/lowside of the freemap node the 3459 * caller is creating. 3460 * 3461 * This routine will specify the next higher-level freemap key/radix 3462 * representing the lowest-ordered set. By doing so, eventually all 3463 * low-ordered sets will be moved one level down. 3464 * 3465 * We have to be careful here because the freemap reserves a limited 3466 * number of blocks for a limited number of levels. So we can't just 3467 * push indiscriminately. 3468 */ 3469 int 3470 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3471 int keybits, hammer2_blockref_t *base, int count) 3472 { 3473 hammer2_chain_t *chain; 3474 hammer2_blockref_t *bref; 3475 hammer2_key_t key; 3476 hammer2_key_t key_beg; 3477 hammer2_key_t key_end; 3478 hammer2_key_t key_next; 3479 int cache_index; 3480 int locount; 3481 int hicount; 3482 int maxloops = 300000; 3483 3484 key = *keyp; 3485 locount = 0; 3486 hicount = 0; 3487 keybits = 64; 3488 3489 /* 3490 * Calculate the range of keys in the array being careful to skip 3491 * slots which are overridden with a deletion. 3492 */ 3493 key_beg = 0; 3494 key_end = HAMMER2_KEY_MAX; 3495 cache_index = 0; 3496 hammer2_spin_ex(&parent->core.spin); 3497 3498 for (;;) { 3499 if (--maxloops == 0) { 3500 panic("indkey_freemap shit %p %p:%d\n", 3501 parent, base, count); 3502 } 3503 chain = hammer2_combined_find(parent, base, count, 3504 &cache_index, &key_next, 3505 key_beg, key_end, 3506 &bref); 3507 3508 /* 3509 * Exhausted search 3510 */ 3511 if (bref == NULL) 3512 break; 3513 3514 /* 3515 * Skip deleted chains. 3516 */ 3517 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3518 if (key_next == 0 || key_next > key_end) 3519 break; 3520 key_beg = key_next; 3521 continue; 3522 } 3523 3524 /* 3525 * Use the full live (not deleted) element for the scan 3526 * iteration. HAMMER2 does not allow partial replacements. 3527 * 3528 * XXX should be built into hammer2_combined_find(). 3529 */ 3530 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3531 3532 if (keybits > bref->keybits) { 3533 key = bref->key; 3534 keybits = bref->keybits; 3535 } else if (keybits == bref->keybits && bref->key < key) { 3536 key = bref->key; 3537 } 3538 if (key_next == 0) 3539 break; 3540 key_beg = key_next; 3541 } 3542 hammer2_spin_unex(&parent->core.spin); 3543 3544 /* 3545 * Return the keybits for a higher-level FREEMAP_NODE covering 3546 * this node. 3547 */ 3548 switch(keybits) { 3549 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3550 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3551 break; 3552 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3553 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3554 break; 3555 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3556 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3557 break; 3558 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3559 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3560 break; 3561 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3562 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 3563 break; 3564 case HAMMER2_FREEMAP_LEVEL5_RADIX: 3565 panic("hammer2_chain_indkey_freemap: level too high"); 3566 break; 3567 default: 3568 panic("hammer2_chain_indkey_freemap: bad radix"); 3569 break; 3570 } 3571 *keyp = key; 3572 3573 return (keybits); 3574 } 3575 3576 /* 3577 * Calculate the keybits and highside/lowside of the indirect block the 3578 * caller is creating. 3579 */ 3580 static int 3581 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3582 int keybits, hammer2_blockref_t *base, int count) 3583 { 3584 hammer2_blockref_t *bref; 3585 hammer2_chain_t *chain; 3586 hammer2_key_t key_beg; 3587 hammer2_key_t key_end; 3588 hammer2_key_t key_next; 3589 hammer2_key_t key; 3590 int nkeybits; 3591 int locount; 3592 int hicount; 3593 int cache_index; 3594 int maxloops = 300000; 3595 3596 key = *keyp; 3597 locount = 0; 3598 hicount = 0; 3599 3600 /* 3601 * Calculate the range of keys in the array being careful to skip 3602 * slots which are overridden with a deletion. Once the scan 3603 * completes we will cut the key range in half and shift half the 3604 * range into the new indirect block. 3605 */ 3606 key_beg = 0; 3607 key_end = HAMMER2_KEY_MAX; 3608 cache_index = 0; 3609 hammer2_spin_ex(&parent->core.spin); 3610 3611 for (;;) { 3612 if (--maxloops == 0) { 3613 panic("indkey_freemap shit %p %p:%d\n", 3614 parent, base, count); 3615 } 3616 chain = hammer2_combined_find(parent, base, count, 3617 &cache_index, &key_next, 3618 key_beg, key_end, 3619 &bref); 3620 3621 /* 3622 * Exhausted search 3623 */ 3624 if (bref == NULL) 3625 break; 3626 3627 /* 3628 * NOTE: No need to check DUPLICATED here because we do 3629 * not release the spinlock. 3630 */ 3631 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3632 if (key_next == 0 || key_next > key_end) 3633 break; 3634 key_beg = key_next; 3635 continue; 3636 } 3637 3638 /* 3639 * Use the full live (not deleted) element for the scan 3640 * iteration. HAMMER2 does not allow partial replacements. 3641 * 3642 * XXX should be built into hammer2_combined_find(). 3643 */ 3644 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3645 3646 /* 3647 * Expand our calculated key range (key, keybits) to fit 3648 * the scanned key. nkeybits represents the full range 3649 * that we will later cut in half (two halves @ nkeybits - 1). 3650 */ 3651 nkeybits = keybits; 3652 if (nkeybits < bref->keybits) { 3653 if (bref->keybits > 64) { 3654 kprintf("bad bref chain %p bref %p\n", 3655 chain, bref); 3656 Debugger("fubar"); 3657 } 3658 nkeybits = bref->keybits; 3659 } 3660 while (nkeybits < 64 && 3661 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3662 (key ^ bref->key)) != 0) { 3663 ++nkeybits; 3664 } 3665 3666 /* 3667 * If the new key range is larger we have to determine 3668 * which side of the new key range the existing keys fall 3669 * under by checking the high bit, then collapsing the 3670 * locount into the hicount or vise-versa. 3671 */ 3672 if (keybits != nkeybits) { 3673 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3674 hicount += locount; 3675 locount = 0; 3676 } else { 3677 locount += hicount; 3678 hicount = 0; 3679 } 3680 keybits = nkeybits; 3681 } 3682 3683 /* 3684 * The newly scanned key will be in the lower half or the 3685 * upper half of the (new) key range. 3686 */ 3687 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3688 ++hicount; 3689 else 3690 ++locount; 3691 3692 if (key_next == 0) 3693 break; 3694 key_beg = key_next; 3695 } 3696 hammer2_spin_unex(&parent->core.spin); 3697 bref = NULL; /* now invalid (safety) */ 3698 3699 /* 3700 * Adjust keybits to represent half of the full range calculated 3701 * above (radix 63 max) 3702 */ 3703 --keybits; 3704 3705 /* 3706 * Select whichever half contains the most elements. Theoretically 3707 * we can select either side as long as it contains at least one 3708 * element (in order to ensure that a free slot is present to hold 3709 * the indirect block). 3710 */ 3711 if (hammer2_indirect_optimize) { 3712 /* 3713 * Insert node for least number of keys, this will arrange 3714 * the first few blocks of a large file or the first few 3715 * inodes in a directory with fewer indirect blocks when 3716 * created linearly. 3717 */ 3718 if (hicount < locount && hicount != 0) 3719 key |= (hammer2_key_t)1 << keybits; 3720 else 3721 key &= ~(hammer2_key_t)1 << keybits; 3722 } else { 3723 /* 3724 * Insert node for most number of keys, best for heavily 3725 * fragmented files. 3726 */ 3727 if (hicount > locount) 3728 key |= (hammer2_key_t)1 << keybits; 3729 else 3730 key &= ~(hammer2_key_t)1 << keybits; 3731 } 3732 *keyp = key; 3733 3734 return (keybits); 3735 } 3736 3737 /* 3738 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 3739 * it exists. 3740 * 3741 * Both parent and chain must be locked exclusively. 3742 * 3743 * This function will modify the parent if the blockref requires removal 3744 * from the parent's block table. 3745 * 3746 * This function is NOT recursive. Any entity already pushed into the 3747 * chain (such as an inode) may still need visibility into its contents, 3748 * as well as the ability to read and modify the contents. For example, 3749 * for an unlinked file which is still open. 3750 */ 3751 void 3752 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 3753 hammer2_tid_t mtid, int flags) 3754 { 3755 KKASSERT(hammer2_mtx_owned(&chain->lock)); 3756 3757 /* 3758 * Nothing to do if already marked. 3759 * 3760 * We need the spinlock on the core whos RBTREE contains chain 3761 * to protect against races. 3762 */ 3763 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 3764 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 3765 chain->parent == parent); 3766 _hammer2_chain_delete_helper(parent, chain, mtid, flags); 3767 } 3768 3769 /* 3770 * Permanent deletions mark the chain as destroyed. 3771 */ 3772 if (flags & HAMMER2_DELETE_PERMANENT) { 3773 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 3774 } else { 3775 /* XXX might not be needed */ 3776 hammer2_chain_setflush(chain); 3777 } 3778 } 3779 3780 /* 3781 * Returns the index of the nearest element in the blockref array >= elm. 3782 * Returns (count) if no element could be found. 3783 * 3784 * Sets *key_nextp to the next key for loop purposes but does not modify 3785 * it if the next key would be higher than the current value of *key_nextp. 3786 * Note that *key_nexp can overflow to 0, which should be tested by the 3787 * caller. 3788 * 3789 * (*cache_indexp) is a heuristic and can be any value without effecting 3790 * the result. 3791 * 3792 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3793 * held through the operation. 3794 */ 3795 static int 3796 hammer2_base_find(hammer2_chain_t *parent, 3797 hammer2_blockref_t *base, int count, 3798 int *cache_indexp, hammer2_key_t *key_nextp, 3799 hammer2_key_t key_beg, hammer2_key_t key_end) 3800 { 3801 hammer2_blockref_t *scan; 3802 hammer2_key_t scan_end; 3803 int i; 3804 int limit; 3805 3806 /* 3807 * Require the live chain's already have their core's counted 3808 * so we can optimize operations. 3809 */ 3810 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 3811 3812 /* 3813 * Degenerate case 3814 */ 3815 if (count == 0 || base == NULL) 3816 return(count); 3817 3818 /* 3819 * Sequential optimization using *cache_indexp. This is the most 3820 * likely scenario. 3821 * 3822 * We can avoid trailing empty entries on live chains, otherwise 3823 * we might have to check the whole block array. 3824 */ 3825 i = *cache_indexp; 3826 cpu_ccfence(); 3827 limit = parent->core.live_zero; 3828 if (i >= limit) 3829 i = limit - 1; 3830 if (i < 0) 3831 i = 0; 3832 KKASSERT(i < count); 3833 3834 /* 3835 * Search backwards 3836 */ 3837 scan = &base[i]; 3838 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 3839 --scan; 3840 --i; 3841 } 3842 *cache_indexp = i; 3843 3844 /* 3845 * Search forwards, stop when we find a scan element which 3846 * encloses the key or until we know that there are no further 3847 * elements. 3848 */ 3849 while (i < count) { 3850 if (scan->type != 0) { 3851 scan_end = scan->key + 3852 ((hammer2_key_t)1 << scan->keybits) - 1; 3853 if (scan->key > key_beg || scan_end >= key_beg) 3854 break; 3855 } 3856 if (i >= limit) 3857 return (count); 3858 ++scan; 3859 ++i; 3860 } 3861 if (i != count) { 3862 *cache_indexp = i; 3863 if (i >= limit) { 3864 i = count; 3865 } else { 3866 scan_end = scan->key + 3867 ((hammer2_key_t)1 << scan->keybits); 3868 if (scan_end && (*key_nextp > scan_end || 3869 *key_nextp == 0)) { 3870 *key_nextp = scan_end; 3871 } 3872 } 3873 } 3874 return (i); 3875 } 3876 3877 /* 3878 * Do a combined search and return the next match either from the blockref 3879 * array or from the in-memory chain. Sets *bresp to the returned bref in 3880 * both cases, or sets it to NULL if the search exhausted. Only returns 3881 * a non-NULL chain if the search matched from the in-memory chain. 3882 * 3883 * When no in-memory chain has been found and a non-NULL bref is returned 3884 * in *bresp. 3885 * 3886 * 3887 * The returned chain is not locked or referenced. Use the returned bref 3888 * to determine if the search exhausted or not. Iterate if the base find 3889 * is chosen but matches a deleted chain. 3890 * 3891 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3892 * held through the operation. 3893 */ 3894 static hammer2_chain_t * 3895 hammer2_combined_find(hammer2_chain_t *parent, 3896 hammer2_blockref_t *base, int count, 3897 int *cache_indexp, hammer2_key_t *key_nextp, 3898 hammer2_key_t key_beg, hammer2_key_t key_end, 3899 hammer2_blockref_t **bresp) 3900 { 3901 hammer2_blockref_t *bref; 3902 hammer2_chain_t *chain; 3903 int i; 3904 3905 /* 3906 * Lookup in block array and in rbtree. 3907 */ 3908 *key_nextp = key_end + 1; 3909 i = hammer2_base_find(parent, base, count, cache_indexp, 3910 key_nextp, key_beg, key_end); 3911 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 3912 3913 /* 3914 * Neither matched 3915 */ 3916 if (i == count && chain == NULL) { 3917 *bresp = NULL; 3918 return(NULL); 3919 } 3920 3921 /* 3922 * Only chain matched. 3923 */ 3924 if (i == count) { 3925 bref = &chain->bref; 3926 goto found; 3927 } 3928 3929 /* 3930 * Only blockref matched. 3931 */ 3932 if (chain == NULL) { 3933 bref = &base[i]; 3934 goto found; 3935 } 3936 3937 /* 3938 * Both in-memory and blockref matched, select the nearer element. 3939 * 3940 * If both are flush with the left-hand side or both are the 3941 * same distance away, select the chain. In this situation the 3942 * chain must have been loaded from the matching blockmap. 3943 */ 3944 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 3945 chain->bref.key == base[i].key) { 3946 KKASSERT(chain->bref.key == base[i].key); 3947 bref = &chain->bref; 3948 goto found; 3949 } 3950 3951 /* 3952 * Select the nearer key 3953 */ 3954 if (chain->bref.key < base[i].key) { 3955 bref = &chain->bref; 3956 } else { 3957 bref = &base[i]; 3958 chain = NULL; 3959 } 3960 3961 /* 3962 * If the bref is out of bounds we've exhausted our search. 3963 */ 3964 found: 3965 if (bref->key > key_end) { 3966 *bresp = NULL; 3967 chain = NULL; 3968 } else { 3969 *bresp = bref; 3970 } 3971 return(chain); 3972 } 3973 3974 /* 3975 * Locate the specified block array element and delete it. The element 3976 * must exist. 3977 * 3978 * The spin lock on the related chain must be held. 3979 * 3980 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3981 * need to be adjusted when we commit the media change. 3982 */ 3983 void 3984 hammer2_base_delete(hammer2_chain_t *parent, 3985 hammer2_blockref_t *base, int count, 3986 int *cache_indexp, hammer2_chain_t *chain) 3987 { 3988 hammer2_blockref_t *elm = &chain->bref; 3989 hammer2_key_t key_next; 3990 int i; 3991 3992 /* 3993 * Delete element. Expect the element to exist. 3994 * 3995 * XXX see caller, flush code not yet sophisticated enough to prevent 3996 * re-flushed in some cases. 3997 */ 3998 key_next = 0; /* max range */ 3999 i = hammer2_base_find(parent, base, count, cache_indexp, 4000 &key_next, elm->key, elm->key); 4001 if (i == count || base[i].type == 0 || 4002 base[i].key != elm->key || 4003 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 4004 base[i].keybits != elm->keybits)) { 4005 hammer2_spin_unex(&parent->core.spin); 4006 panic("delete base %p element not found at %d/%d elm %p\n", 4007 base, i, count, elm); 4008 return; 4009 } 4010 4011 /* 4012 * Update stats and zero the entry 4013 */ 4014 parent->bref.data_count -= base[i].data_count; 4015 parent->bref.data_count -= (hammer2_off_t)1 << 4016 (int)(base[i].data_off & HAMMER2_OFF_MASK_RADIX); 4017 parent->bref.inode_count -= base[i].inode_count; 4018 if (base[i].type == HAMMER2_BREF_TYPE_INODE) 4019 parent->bref.inode_count -= 1; 4020 4021 bzero(&base[i], sizeof(*base)); 4022 4023 /* 4024 * We can only optimize parent->core.live_zero for live chains. 4025 */ 4026 if (parent->core.live_zero == i + 1) { 4027 while (--i >= 0 && base[i].type == 0) 4028 ; 4029 parent->core.live_zero = i + 1; 4030 } 4031 4032 /* 4033 * Clear appropriate blockmap flags in chain. 4034 */ 4035 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 4036 HAMMER2_CHAIN_BMAPUPD); 4037 } 4038 4039 /* 4040 * Insert the specified element. The block array must not already have the 4041 * element and must have space available for the insertion. 4042 * 4043 * The spin lock on the related chain must be held. 4044 * 4045 * NOTE: live_count was adjusted when the chain was deleted, so it does not 4046 * need to be adjusted when we commit the media change. 4047 */ 4048 void 4049 hammer2_base_insert(hammer2_chain_t *parent, 4050 hammer2_blockref_t *base, int count, 4051 int *cache_indexp, hammer2_chain_t *chain) 4052 { 4053 hammer2_blockref_t *elm = &chain->bref; 4054 hammer2_key_t key_next; 4055 hammer2_key_t xkey; 4056 int i; 4057 int j; 4058 int k; 4059 int l; 4060 int u = 1; 4061 4062 /* 4063 * Insert new element. Expect the element to not already exist 4064 * unless we are replacing it. 4065 * 4066 * XXX see caller, flush code not yet sophisticated enough to prevent 4067 * re-flushed in some cases. 4068 */ 4069 key_next = 0; /* max range */ 4070 i = hammer2_base_find(parent, base, count, cache_indexp, 4071 &key_next, elm->key, elm->key); 4072 4073 /* 4074 * Shortcut fill optimization, typical ordered insertion(s) may not 4075 * require a search. 4076 */ 4077 KKASSERT(i >= 0 && i <= count); 4078 4079 /* 4080 * Set appropriate blockmap flags in chain. 4081 */ 4082 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 4083 4084 /* 4085 * Update stats and zero the entry 4086 */ 4087 parent->bref.data_count += elm->data_count; 4088 parent->bref.data_count += (hammer2_off_t)1 << 4089 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 4090 parent->bref.inode_count += elm->inode_count; 4091 if (elm->type == HAMMER2_BREF_TYPE_INODE) 4092 parent->bref.inode_count += 1; 4093 4094 4095 /* 4096 * We can only optimize parent->core.live_zero for live chains. 4097 */ 4098 if (i == count && parent->core.live_zero < count) { 4099 i = parent->core.live_zero++; 4100 base[i] = *elm; 4101 return; 4102 } 4103 4104 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 4105 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 4106 hammer2_spin_unex(&parent->core.spin); 4107 panic("insert base %p overlapping elements at %d elm %p\n", 4108 base, i, elm); 4109 } 4110 4111 /* 4112 * Try to find an empty slot before or after. 4113 */ 4114 j = i; 4115 k = i; 4116 while (j > 0 || k < count) { 4117 --j; 4118 if (j >= 0 && base[j].type == 0) { 4119 if (j == i - 1) { 4120 base[j] = *elm; 4121 } else { 4122 bcopy(&base[j+1], &base[j], 4123 (i - j - 1) * sizeof(*base)); 4124 base[i - 1] = *elm; 4125 } 4126 goto validate; 4127 } 4128 ++k; 4129 if (k < count && base[k].type == 0) { 4130 bcopy(&base[i], &base[i+1], 4131 (k - i) * sizeof(hammer2_blockref_t)); 4132 base[i] = *elm; 4133 4134 /* 4135 * We can only update parent->core.live_zero for live 4136 * chains. 4137 */ 4138 if (parent->core.live_zero <= k) 4139 parent->core.live_zero = k + 1; 4140 u = 2; 4141 goto validate; 4142 } 4143 } 4144 panic("hammer2_base_insert: no room!"); 4145 4146 /* 4147 * Debugging 4148 */ 4149 validate: 4150 key_next = 0; 4151 for (l = 0; l < count; ++l) { 4152 if (base[l].type) { 4153 key_next = base[l].key + 4154 ((hammer2_key_t)1 << base[l].keybits) - 1; 4155 break; 4156 } 4157 } 4158 while (++l < count) { 4159 if (base[l].type) { 4160 if (base[l].key <= key_next) 4161 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 4162 key_next = base[l].key + 4163 ((hammer2_key_t)1 << base[l].keybits) - 1; 4164 4165 } 4166 } 4167 4168 } 4169 4170 #if 0 4171 4172 /* 4173 * Sort the blockref array for the chain. Used by the flush code to 4174 * sort the blockref[] array. 4175 * 4176 * The chain must be exclusively locked AND spin-locked. 4177 */ 4178 typedef hammer2_blockref_t *hammer2_blockref_p; 4179 4180 static 4181 int 4182 hammer2_base_sort_callback(const void *v1, const void *v2) 4183 { 4184 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 4185 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 4186 4187 /* 4188 * Make sure empty elements are placed at the end of the array 4189 */ 4190 if (bref1->type == 0) { 4191 if (bref2->type == 0) 4192 return(0); 4193 return(1); 4194 } else if (bref2->type == 0) { 4195 return(-1); 4196 } 4197 4198 /* 4199 * Sort by key 4200 */ 4201 if (bref1->key < bref2->key) 4202 return(-1); 4203 if (bref1->key > bref2->key) 4204 return(1); 4205 return(0); 4206 } 4207 4208 void 4209 hammer2_base_sort(hammer2_chain_t *chain) 4210 { 4211 hammer2_blockref_t *base; 4212 int count; 4213 4214 switch(chain->bref.type) { 4215 case HAMMER2_BREF_TYPE_INODE: 4216 /* 4217 * Special shortcut for embedded data returns the inode 4218 * itself. Callers must detect this condition and access 4219 * the embedded data (the strategy code does this for us). 4220 * 4221 * This is only applicable to regular files and softlinks. 4222 */ 4223 if (chain->data->ipdata.meta.op_flags & 4224 HAMMER2_OPFLAG_DIRECTDATA) { 4225 return; 4226 } 4227 base = &chain->data->ipdata.u.blockset.blockref[0]; 4228 count = HAMMER2_SET_COUNT; 4229 break; 4230 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4231 case HAMMER2_BREF_TYPE_INDIRECT: 4232 /* 4233 * Optimize indirect blocks in the INITIAL state to avoid 4234 * I/O. 4235 */ 4236 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4237 base = &chain->data->npdata[0]; 4238 count = chain->bytes / sizeof(hammer2_blockref_t); 4239 break; 4240 case HAMMER2_BREF_TYPE_VOLUME: 4241 base = &chain->data->voldata.sroot_blockset.blockref[0]; 4242 count = HAMMER2_SET_COUNT; 4243 break; 4244 case HAMMER2_BREF_TYPE_FREEMAP: 4245 base = &chain->data->blkset.blockref[0]; 4246 count = HAMMER2_SET_COUNT; 4247 break; 4248 default: 4249 kprintf("hammer2_chain_lookup: unrecognized " 4250 "blockref(A) type: %d", 4251 chain->bref.type); 4252 while (1) 4253 tsleep(&base, 0, "dead", 0); 4254 panic("hammer2_chain_lookup: unrecognized " 4255 "blockref(A) type: %d", 4256 chain->bref.type); 4257 base = NULL; /* safety */ 4258 count = 0; /* safety */ 4259 } 4260 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 4261 } 4262 4263 #endif 4264 4265 /* 4266 * Chain memory management 4267 */ 4268 void 4269 hammer2_chain_wait(hammer2_chain_t *chain) 4270 { 4271 tsleep(chain, 0, "chnflw", 1); 4272 } 4273 4274 const hammer2_media_data_t * 4275 hammer2_chain_rdata(hammer2_chain_t *chain) 4276 { 4277 KKASSERT(chain->data != NULL); 4278 return (chain->data); 4279 } 4280 4281 hammer2_media_data_t * 4282 hammer2_chain_wdata(hammer2_chain_t *chain) 4283 { 4284 KKASSERT(chain->data != NULL); 4285 return (chain->data); 4286 } 4287 4288 /* 4289 * Set the check data for a chain. This can be a heavy-weight operation 4290 * and typically only runs on-flush. For file data check data is calculated 4291 * when the logical buffers are flushed. 4292 */ 4293 void 4294 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 4295 { 4296 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 4297 4298 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4299 case HAMMER2_CHECK_NONE: 4300 break; 4301 case HAMMER2_CHECK_DISABLED: 4302 break; 4303 case HAMMER2_CHECK_ISCSI32: 4304 chain->bref.check.iscsi32.value = 4305 hammer2_icrc32(bdata, chain->bytes); 4306 break; 4307 case HAMMER2_CHECK_XXHASH64: 4308 chain->bref.check.xxhash64.value = 4309 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED); 4310 break; 4311 case HAMMER2_CHECK_SHA192: 4312 { 4313 SHA256_CTX hash_ctx; 4314 union { 4315 uint8_t digest[SHA256_DIGEST_LENGTH]; 4316 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4317 } u; 4318 4319 SHA256_Init(&hash_ctx); 4320 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4321 SHA256_Final(u.digest, &hash_ctx); 4322 u.digest64[2] ^= u.digest64[3]; 4323 bcopy(u.digest, 4324 chain->bref.check.sha192.data, 4325 sizeof(chain->bref.check.sha192.data)); 4326 } 4327 break; 4328 case HAMMER2_CHECK_FREEMAP: 4329 chain->bref.check.freemap.icrc32 = 4330 hammer2_icrc32(bdata, chain->bytes); 4331 break; 4332 default: 4333 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4334 chain->bref.methods); 4335 break; 4336 } 4337 } 4338 4339 int 4340 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 4341 { 4342 int r; 4343 4344 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 4345 return 1; 4346 4347 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4348 case HAMMER2_CHECK_NONE: 4349 r = 1; 4350 break; 4351 case HAMMER2_CHECK_DISABLED: 4352 r = 1; 4353 break; 4354 case HAMMER2_CHECK_ISCSI32: 4355 r = (chain->bref.check.iscsi32.value == 4356 hammer2_icrc32(bdata, chain->bytes)); 4357 hammer2_check_icrc32 += chain->bytes; 4358 break; 4359 case HAMMER2_CHECK_XXHASH64: 4360 r = (chain->bref.check.xxhash64.value == 4361 XXH64(bdata, chain->bytes, XXH_HAMMER2_SEED)); 4362 hammer2_check_xxhash64 += chain->bytes; 4363 break; 4364 case HAMMER2_CHECK_SHA192: 4365 { 4366 SHA256_CTX hash_ctx; 4367 union { 4368 uint8_t digest[SHA256_DIGEST_LENGTH]; 4369 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4370 } u; 4371 4372 SHA256_Init(&hash_ctx); 4373 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4374 SHA256_Final(u.digest, &hash_ctx); 4375 u.digest64[2] ^= u.digest64[3]; 4376 if (bcmp(u.digest, 4377 chain->bref.check.sha192.data, 4378 sizeof(chain->bref.check.sha192.data)) == 0) { 4379 r = 1; 4380 } else { 4381 r = 0; 4382 } 4383 } 4384 break; 4385 case HAMMER2_CHECK_FREEMAP: 4386 r = (chain->bref.check.freemap.icrc32 == 4387 hammer2_icrc32(bdata, chain->bytes)); 4388 if (r == 0) { 4389 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 4390 chain->bref.check.freemap.icrc32, 4391 hammer2_icrc32(bdata, chain->bytes), chain->bytes); 4392 if (chain->dio) 4393 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 4394 chain->dio, chain->dio->bp->b_loffset, chain->dio->bp->b_bufsize, bdata, chain->dio->bp->b_data); 4395 } 4396 4397 break; 4398 default: 4399 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4400 chain->bref.methods); 4401 r = 1; 4402 break; 4403 } 4404 return r; 4405 } 4406 4407 /* 4408 * The caller presents a shared-locked (parent, chain) where the chain 4409 * is of type HAMMER2_OBJTYPE_HARDLINK. The caller must hold the ip 4410 * structure representing the inode locked to prevent 4411 * consolidation/deconsolidation races. 4412 * 4413 * The flags passed in are LOOKUP flags, not RESOLVE flags. Only 4414 * HAMMER2_LOOKUP_SHARED is supported. 4415 * 4416 * We locate the hardlink in the current or a common parent directory. 4417 * 4418 * If we are unable to locate the hardlink, EIO is returned and 4419 * (*chainp) is unlocked and dropped. 4420 */ 4421 int 4422 hammer2_chain_hardlink_find(hammer2_inode_t *dip, 4423 hammer2_chain_t **parentp, 4424 hammer2_chain_t **chainp, 4425 int flags) 4426 { 4427 hammer2_chain_t *parent; 4428 hammer2_chain_t *rchain; 4429 hammer2_key_t key_dummy; 4430 hammer2_key_t lhc; 4431 int cache_index = -1; 4432 int resolve_flags; 4433 4434 resolve_flags = (flags & HAMMER2_LOOKUP_SHARED) ? 4435 HAMMER2_RESOLVE_SHARED : 0; 4436 4437 /* 4438 * Obtain the key for the hardlink from *chainp. 4439 */ 4440 rchain = *chainp; 4441 lhc = rchain->data->ipdata.meta.inum; 4442 hammer2_chain_unlock(rchain); 4443 hammer2_chain_drop(rchain); 4444 rchain = NULL; 4445 4446 for (;;) { 4447 int again; 4448 4449 rchain = hammer2_chain_lookup(parentp, &key_dummy, 4450 lhc, lhc, 4451 &cache_index, flags); 4452 if (rchain) 4453 break; 4454 4455 /* 4456 * Iterate parents, handle parent rename races by retrying 4457 * the operation. 4458 */ 4459 again = 0; 4460 for (;;) { 4461 parent = *parentp; 4462 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4463 if (again == 1) 4464 break; 4465 ++again; 4466 } 4467 if (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) 4468 goto done; 4469 for (;;) { 4470 if (parent->parent == NULL) 4471 goto done; 4472 parent = parent->parent; 4473 hammer2_chain_ref(parent); 4474 hammer2_chain_unlock(*parentp); 4475 hammer2_chain_lock(parent, 4476 HAMMER2_RESOLVE_ALWAYS | 4477 resolve_flags); 4478 if ((*parentp)->parent == parent) { 4479 hammer2_chain_drop(*parentp); 4480 *parentp = parent; 4481 break; 4482 } 4483 hammer2_chain_unlock(parent); 4484 hammer2_chain_drop(parent); 4485 hammer2_chain_lock(*parentp, 4486 HAMMER2_RESOLVE_ALWAYS | 4487 resolve_flags); 4488 parent = *parentp; 4489 } 4490 } 4491 } 4492 done: 4493 4494 *chainp = rchain; 4495 return (rchain ? 0 : EINVAL); 4496 } 4497 4498 /* 4499 * Used by the bulkscan code to snapshot the synchronized storage for 4500 * a volume, allowing it to be scanned concurrently against normal 4501 * operation. 4502 */ 4503 hammer2_chain_t * 4504 hammer2_chain_bulksnap(hammer2_chain_t *chain) 4505 { 4506 hammer2_chain_t *copy; 4507 4508 copy = hammer2_chain_alloc(chain->hmp, chain->pmp, &chain->bref); 4509 switch(chain->bref.type) { 4510 case HAMMER2_BREF_TYPE_VOLUME: 4511 copy->data = kmalloc(sizeof(copy->data->voldata), 4512 chain->hmp->mchain, 4513 M_WAITOK | M_ZERO); 4514 hammer2_spin_ex(&chain->core.spin); 4515 copy->data->voldata = chain->data->voldata; 4516 hammer2_spin_unex(&chain->core.spin); 4517 break; 4518 case HAMMER2_BREF_TYPE_FREEMAP: 4519 copy->data = kmalloc(sizeof(hammer2_blockset_t), 4520 chain->hmp->mchain, 4521 M_WAITOK | M_ZERO); 4522 hammer2_spin_ex(&chain->core.spin); 4523 copy->data->blkset = chain->data->blkset; 4524 hammer2_spin_unex(&chain->core.spin); 4525 break; 4526 default: 4527 break; 4528 } 4529 return copy; 4530 } 4531 4532 void 4533 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 4534 { 4535 switch(copy->bref.type) { 4536 case HAMMER2_BREF_TYPE_VOLUME: 4537 case HAMMER2_BREF_TYPE_FREEMAP: 4538 KKASSERT(copy->data); 4539 kfree(copy->data, copy->hmp->mchain); 4540 copy->data = NULL; 4541 atomic_add_long(&hammer2_chain_allocs, -1); 4542 break; 4543 default: 4544 break; 4545 } 4546 hammer2_chain_drop(copy); 4547 } 4548 4549 /* 4550 * Create a snapshot of the specified {parent, ochain} with the specified 4551 * label. The originating hammer2_inode must be exclusively locked for 4552 * safety. 4553 * 4554 * The ioctl code has already synced the filesystem. 4555 */ 4556 int 4557 hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp, 4558 hammer2_tid_t mtid) 4559 { 4560 hammer2_dev_t *hmp; 4561 const hammer2_inode_data_t *ripdata; 4562 hammer2_inode_data_t *wipdata; 4563 hammer2_chain_t *nchain; 4564 hammer2_inode_t *nip; 4565 size_t name_len; 4566 hammer2_key_t lhc; 4567 struct vattr vat; 4568 #if 0 4569 uuid_t opfs_clid; 4570 #endif 4571 int error; 4572 4573 kprintf("snapshot %s\n", pmp->name); 4574 4575 name_len = strlen(pmp->name); 4576 lhc = hammer2_dirhash(pmp->name, name_len); 4577 4578 /* 4579 * Get the clid 4580 */ 4581 ripdata = &chain->data->ipdata; 4582 #if 0 4583 opfs_clid = ripdata->meta.pfs_clid; 4584 #endif 4585 hmp = chain->hmp; 4586 4587 /* 4588 * Create the snapshot directory under the super-root 4589 * 4590 * Set PFS type, generate a unique filesystem id, and generate 4591 * a cluster id. Use the same clid when snapshotting a PFS root, 4592 * which theoretically allows the snapshot to be used as part of 4593 * the same cluster (perhaps as a cache). 4594 * 4595 * Copy the (flushed) blockref array. Theoretically we could use 4596 * chain_duplicate() but it becomes difficult to disentangle 4597 * the shared core so for now just brute-force it. 4598 */ 4599 VATTR_NULL(&vat); 4600 vat.va_type = VDIR; 4601 vat.va_mode = 0755; 4602 nip = hammer2_inode_create(hmp->spmp->iroot, &vat, proc0.p_ucred, 4603 pmp->name, name_len, 0, 4604 1, 0, 0, 4605 HAMMER2_INSERT_PFSROOT, &error); 4606 4607 if (nip) { 4608 hammer2_inode_modify(nip); 4609 nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); 4610 hammer2_chain_modify(nchain, mtid, 0, 0); 4611 wipdata = &nchain->data->ipdata; 4612 4613 nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 4614 nip->meta.pfs_subtype = HAMMER2_PFSSUBTYPE_SNAPSHOT; 4615 nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 4616 kern_uuidgen(&nip->meta.pfs_fsid, 1); 4617 4618 /* 4619 * Give the snapshot its own private cluster id. As a 4620 * snapshot no further synchronization with the original 4621 * cluster will be done. 4622 */ 4623 #if 0 4624 if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) 4625 nip->meta.pfs_clid = opfs_clid; 4626 else 4627 kern_uuidgen(&nip->meta.pfs_clid, 1); 4628 #endif 4629 kern_uuidgen(&nip->meta.pfs_clid, 1); 4630 nchain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 4631 4632 /* XXX hack blockset copy */ 4633 /* XXX doesn't work with real cluster */ 4634 wipdata->meta = nip->meta; 4635 wipdata->u.blockset = ripdata->u.blockset; 4636 hammer2_flush(nchain, 1); 4637 hammer2_chain_unlock(nchain); 4638 hammer2_chain_drop(nchain); 4639 hammer2_inode_unlock(nip); 4640 } 4641 return (error); 4642 } 4643