1 /* 2 * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * and Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * Chains are no-longer delete-duplicated. Instead, the original in-memory 44 * chain will be moved along with its block reference (e.g. for things like 45 * renames, hardlink operations, modifications, etc), and will be indexed 46 * on a secondary list for flush handling instead of propagating a flag 47 * upward to the root. 48 * 49 * Concurrent front-end operations can still run against backend flushes 50 * as long as they do not cross the current flush boundary. An operation 51 * running above the current flush (in areas not yet flushed) can become 52 * part of the current flush while ano peration running below the current 53 * flush can become part of the next flush. 54 */ 55 #include <sys/cdefs.h> 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/types.h> 59 #include <sys/lock.h> 60 #include <sys/kern_syscall.h> 61 #include <sys/uuid.h> 62 63 #include <crypto/sha2/sha2.h> 64 65 #include "hammer2.h" 66 67 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 68 69 static hammer2_chain_t *hammer2_chain_create_indirect( 70 hammer2_chain_t *parent, 71 hammer2_key_t key, int keybits, 72 hammer2_tid_t mtid, int for_type, int *errorp); 73 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); 74 static hammer2_chain_t *hammer2_combined_find( 75 hammer2_chain_t *parent, 76 hammer2_blockref_t *base, int count, 77 int *cache_indexp, hammer2_key_t *key_nextp, 78 hammer2_key_t key_beg, hammer2_key_t key_end, 79 hammer2_blockref_t **bresp); 80 81 /* 82 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 83 * overlap in the RB trees. Deleted chains are moved from rbtree to either 84 * dbtree or to dbq. 85 * 86 * Chains in delete-duplicate sequences can always iterate through core_entry 87 * to locate the live version of the chain. 88 */ 89 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 90 91 int 92 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 93 { 94 hammer2_key_t c1_beg; 95 hammer2_key_t c1_end; 96 hammer2_key_t c2_beg; 97 hammer2_key_t c2_end; 98 99 /* 100 * Compare chains. Overlaps are not supposed to happen and catch 101 * any software issues early we count overlaps as a match. 102 */ 103 c1_beg = chain1->bref.key; 104 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 105 c2_beg = chain2->bref.key; 106 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 107 108 if (c1_end < c2_beg) /* fully to the left */ 109 return(-1); 110 if (c1_beg > c2_end) /* fully to the right */ 111 return(1); 112 return(0); /* overlap (must not cross edge boundary) */ 113 } 114 115 static __inline 116 int 117 hammer2_isclusterable(hammer2_chain_t *chain) 118 { 119 if (hammer2_cluster_enable) { 120 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 121 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 122 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 123 return(1); 124 } 125 } 126 return(0); 127 } 128 129 /* 130 * Make a chain visible to the flusher. The flusher needs to be able to 131 * do flushes of subdirectory chains or single files so it does a top-down 132 * recursion using the ONFLUSH flag for the recursion. It locates MODIFIED 133 * or UPDATE chains and flushes back up the chain to the volume root. 134 * 135 * This routine sets ONFLUSH upward until it hits the volume root. For 136 * simplicity we ignore PFSROOT boundaries whos rules can be complex. 137 * Extra ONFLUSH flagging doesn't hurt the filesystem. 138 */ 139 void 140 hammer2_chain_setflush(hammer2_chain_t *chain) 141 { 142 hammer2_chain_t *parent; 143 144 if ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 145 hammer2_spin_sh(&chain->core.spin); 146 while ((chain->flags & HAMMER2_CHAIN_ONFLUSH) == 0) { 147 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 148 if ((parent = chain->parent) == NULL) 149 break; 150 hammer2_spin_sh(&parent->core.spin); 151 hammer2_spin_unsh(&chain->core.spin); 152 chain = parent; 153 } 154 hammer2_spin_unsh(&chain->core.spin); 155 } 156 } 157 158 /* 159 * Allocate a new disconnected chain element representing the specified 160 * bref. chain->refs is set to 1 and the passed bref is copied to 161 * chain->bref. chain->bytes is derived from the bref. 162 * 163 * chain->pmp inherits pmp unless the chain is an inode (other than the 164 * super-root inode). 165 * 166 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 167 */ 168 hammer2_chain_t * 169 hammer2_chain_alloc(hammer2_dev_t *hmp, hammer2_pfs_t *pmp, 170 hammer2_blockref_t *bref) 171 { 172 hammer2_chain_t *chain; 173 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 174 175 /* 176 * Construct the appropriate system structure. 177 */ 178 switch(bref->type) { 179 case HAMMER2_BREF_TYPE_INODE: 180 case HAMMER2_BREF_TYPE_INDIRECT: 181 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 182 case HAMMER2_BREF_TYPE_DATA: 183 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 184 /* 185 * Chain's are really only associated with the hmp but we 186 * maintain a pmp association for per-mount memory tracking 187 * purposes. The pmp can be NULL. 188 */ 189 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 190 break; 191 case HAMMER2_BREF_TYPE_VOLUME: 192 case HAMMER2_BREF_TYPE_FREEMAP: 193 /* 194 * Only hammer2_chain_bulksnap() calls this function with these 195 * types. 196 */ 197 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 198 break; 199 default: 200 chain = NULL; 201 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 202 bref->type); 203 } 204 205 /* 206 * Initialize the new chain structure. pmp must be set to NULL for 207 * chains belonging to the super-root topology of a device mount. 208 */ 209 if (pmp == hmp->spmp) 210 chain->pmp = NULL; 211 else 212 chain->pmp = pmp; 213 chain->hmp = hmp; 214 chain->bref = *bref; 215 chain->bytes = bytes; 216 chain->refs = 1; 217 chain->flags = HAMMER2_CHAIN_ALLOCATED; 218 219 /* 220 * Set the PFS boundary flag if this chain represents a PFS root. 221 */ 222 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 223 chain->flags |= HAMMER2_CHAIN_PFSBOUNDARY; 224 hammer2_chain_core_init(chain); 225 226 return (chain); 227 } 228 229 /* 230 * Initialize a chain's core structure. This structure used to be allocated 231 * but is now embedded. 232 * 233 * The core is not locked. No additional refs on the chain are made. 234 * (trans) must not be NULL if (core) is not NULL. 235 */ 236 void 237 hammer2_chain_core_init(hammer2_chain_t *chain) 238 { 239 /* 240 * Fresh core under nchain (no multi-homing of ochain's 241 * sub-tree). 242 */ 243 RB_INIT(&chain->core.rbtree); /* live chains */ 244 hammer2_mtx_init(&chain->lock, "h2chain"); 245 } 246 247 /* 248 * Add a reference to a chain element, preventing its destruction. 249 * 250 * (can be called with spinlock held) 251 */ 252 void 253 hammer2_chain_ref(hammer2_chain_t *chain) 254 { 255 atomic_add_int(&chain->refs, 1); 256 #if 0 257 kprintf("REFC %p %d %08x\n", chain, chain->refs - 1, chain->flags); 258 print_backtrace(8); 259 #endif 260 } 261 262 /* 263 * Insert the chain in the core rbtree. 264 * 265 * Normal insertions are placed in the live rbtree. Insertion of a deleted 266 * chain is a special case used by the flush code that is placed on the 267 * unstaged deleted list to avoid confusing the live view. 268 */ 269 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 270 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 271 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 272 273 static 274 int 275 hammer2_chain_insert(hammer2_chain_t *parent, hammer2_chain_t *chain, 276 int flags, int generation) 277 { 278 hammer2_chain_t *xchain; 279 int error = 0; 280 281 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 282 hammer2_spin_ex(&parent->core.spin); 283 284 /* 285 * Interlocked by spinlock, check for race 286 */ 287 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 288 parent->core.generation != generation) { 289 error = EAGAIN; 290 goto failed; 291 } 292 293 /* 294 * Insert chain 295 */ 296 xchain = RB_INSERT(hammer2_chain_tree, &parent->core.rbtree, chain); 297 KASSERT(xchain == NULL, 298 ("hammer2_chain_insert: collision %p %p", chain, xchain)); 299 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 300 chain->parent = parent; 301 ++parent->core.chain_count; 302 ++parent->core.generation; /* XXX incs for _get() too, XXX */ 303 304 /* 305 * We have to keep track of the effective live-view blockref count 306 * so the create code knows when to push an indirect block. 307 */ 308 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 309 atomic_add_int(&parent->core.live_count, 1); 310 failed: 311 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 312 hammer2_spin_unex(&parent->core.spin); 313 return error; 314 } 315 316 /* 317 * Drop the caller's reference to the chain. When the ref count drops to 318 * zero this function will try to disassociate the chain from its parent and 319 * deallocate it, then recursely drop the parent using the implied ref 320 * from the chain's chain->parent. 321 */ 322 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain); 323 324 void 325 hammer2_chain_drop(hammer2_chain_t *chain) 326 { 327 u_int refs; 328 u_int need = 0; 329 330 if (hammer2_debug & 0x200000) 331 Debugger("drop"); 332 #if 0 333 kprintf("DROP %p %d %08x\n", chain, chain->refs - 1, chain->flags); 334 print_backtrace(8); 335 #endif 336 337 if (chain->flags & HAMMER2_CHAIN_UPDATE) 338 ++need; 339 if (chain->flags & HAMMER2_CHAIN_MODIFIED) 340 ++need; 341 KKASSERT(chain->refs > need); 342 343 while (chain) { 344 refs = chain->refs; 345 cpu_ccfence(); 346 KKASSERT(refs > 0); 347 348 if (refs == 1) { 349 chain = hammer2_chain_lastdrop(chain); 350 } else { 351 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 352 break; 353 /* retry the same chain */ 354 } 355 } 356 } 357 358 /* 359 * Safe handling of the 1->0 transition on chain. Returns a chain for 360 * recursive drop or NULL, possibly returning the same chain if the atomic 361 * op fails. 362 * 363 * Whem two chains need to be recursively dropped we use the chain 364 * we would otherwise free to placehold the additional chain. It's a bit 365 * convoluted but we can't just recurse without potentially blowing out 366 * the kernel stack. 367 * 368 * The chain cannot be freed if it has any children. 369 * 370 * The core spinlock is allowed nest child-to-parent (not parent-to-child). 371 */ 372 static 373 hammer2_chain_t * 374 hammer2_chain_lastdrop(hammer2_chain_t *chain) 375 { 376 hammer2_pfs_t *pmp; 377 hammer2_dev_t *hmp; 378 hammer2_chain_t *parent; 379 hammer2_chain_t *rdrop; 380 381 /* 382 * Spinlock the core and check to see if it is empty. If it is 383 * not empty we leave chain intact with refs == 0. The elements 384 * in core->rbtree are associated with other chains contemporary 385 * with ours but not with our chain directly. 386 */ 387 hammer2_spin_ex(&chain->core.spin); 388 389 /* 390 * We can't free non-stale chains with children until we are 391 * able to free the children because there might be a flush 392 * dependency. Flushes of stale children (which should also 393 * have their deleted flag set) short-cut recursive flush 394 * dependencies and can be freed here. Any flushes which run 395 * through stale children due to the flush synchronization 396 * point should have a FLUSH_* bit set in the chain and not 397 * reach lastdrop at this time. 398 * 399 * NOTE: We return (chain) on failure to retry. 400 */ 401 if (chain->core.chain_count) { 402 if (atomic_cmpset_int(&chain->refs, 1, 0)) { 403 hammer2_spin_unex(&chain->core.spin); 404 chain = NULL; /* success */ 405 } else { 406 hammer2_spin_unex(&chain->core.spin); 407 } 408 return(chain); 409 } 410 /* no chains left under us */ 411 412 /* 413 * chain->core has no children left so no accessors can get to our 414 * chain from there. Now we have to lock the parent core to interlock 415 * remaining possible accessors that might bump chain's refs before 416 * we can safely drop chain's refs with intent to free the chain. 417 */ 418 hmp = chain->hmp; 419 pmp = chain->pmp; /* can be NULL */ 420 rdrop = NULL; 421 422 /* 423 * Spinlock the parent and try to drop the last ref on chain. 424 * On success remove chain from its parent, otherwise return NULL. 425 * 426 * (normal core locks are top-down recursive but we define core 427 * spinlocks as bottom-up recursive, so this is safe). 428 */ 429 if ((parent = chain->parent) != NULL) { 430 hammer2_spin_ex(&parent->core.spin); 431 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 432 /* 1->0 transition failed */ 433 hammer2_spin_unex(&parent->core.spin); 434 hammer2_spin_unex(&chain->core.spin); 435 return(chain); /* retry */ 436 } 437 438 /* 439 * 1->0 transition successful, remove chain from its 440 * above core. 441 */ 442 if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 443 RB_REMOVE(hammer2_chain_tree, 444 &parent->core.rbtree, chain); 445 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 446 --parent->core.chain_count; 447 chain->parent = NULL; 448 } 449 450 /* 451 * If our chain was the last chain in the parent's core the 452 * core is now empty and its parent might have to be 453 * re-dropped if it has 0 refs. 454 */ 455 if (parent->core.chain_count == 0) { 456 rdrop = parent; 457 if (atomic_cmpset_int(&rdrop->refs, 0, 1) == 0) { 458 rdrop = NULL; 459 } 460 } 461 hammer2_spin_unex(&parent->core.spin); 462 parent = NULL; /* safety */ 463 } 464 465 /* 466 * Successful 1->0 transition and the chain can be destroyed now. 467 * 468 * We still have the core spinlock, and core's chain_count is 0. 469 * Any parent spinlock is gone. 470 */ 471 hammer2_spin_unex(&chain->core.spin); 472 KKASSERT(RB_EMPTY(&chain->core.rbtree) && 473 chain->core.chain_count == 0); 474 475 /* 476 * All spin locks are gone, finish freeing stuff. 477 */ 478 KKASSERT((chain->flags & (HAMMER2_CHAIN_UPDATE | 479 HAMMER2_CHAIN_MODIFIED)) == 0); 480 hammer2_chain_drop_data(chain, 1); 481 482 KKASSERT(chain->dio == NULL); 483 484 /* 485 * Once chain resources are gone we can use the now dead chain 486 * structure to placehold what might otherwise require a recursive 487 * drop, because we have potentially two things to drop and can only 488 * return one directly. 489 */ 490 if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 491 chain->flags &= ~HAMMER2_CHAIN_ALLOCATED; 492 chain->hmp = NULL; 493 kfree(chain, hmp->mchain); 494 } 495 496 /* 497 * Possible chaining loop when parent re-drop needed. 498 */ 499 return(rdrop); 500 } 501 502 /* 503 * On either last lock release or last drop 504 */ 505 static void 506 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 507 { 508 /*hammer2_dev_t *hmp = chain->hmp;*/ 509 510 switch(chain->bref.type) { 511 case HAMMER2_BREF_TYPE_VOLUME: 512 case HAMMER2_BREF_TYPE_FREEMAP: 513 if (lastdrop) 514 chain->data = NULL; 515 break; 516 default: 517 KKASSERT(chain->data == NULL); 518 break; 519 } 520 } 521 522 /* 523 * Lock a referenced chain element, acquiring its data with I/O if necessary, 524 * and specify how you would like the data to be resolved. 525 * 526 * If an I/O or other fatal error occurs, chain->error will be set to non-zero. 527 * 528 * The lock is allowed to recurse, multiple locking ops will aggregate 529 * the requested resolve types. Once data is assigned it will not be 530 * removed until the last unlock. 531 * 532 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 533 * (typically used to avoid device/logical buffer 534 * aliasing for data) 535 * 536 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 537 * the INITIAL-create state (indirect blocks only). 538 * 539 * Do not resolve data elements for DATA chains. 540 * (typically used to avoid device/logical buffer 541 * aliasing for data) 542 * 543 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 544 * 545 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 546 * it will be locked exclusive. 547 * 548 * NOTE: Embedded elements (volume header, inodes) are always resolved 549 * regardless. 550 * 551 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 552 * element will instantiate and zero its buffer, and flush it on 553 * release. 554 * 555 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 556 * so as not to instantiate a device buffer, which could alias against 557 * a logical file buffer. However, if ALWAYS is specified the 558 * device buffer will be instantiated anyway. 559 * 560 * WARNING! This function blocks on I/O if data needs to be fetched. This 561 * blocking can run concurrent with other compatible lock holders 562 * who do not need data returning. The lock is not upgraded to 563 * exclusive during a data fetch, a separate bit is used to 564 * interlock I/O. However, an exclusive lock holder can still count 565 * on being interlocked against an I/O fetch managed by a shared 566 * lock holder. 567 */ 568 void 569 hammer2_chain_lock(hammer2_chain_t *chain, int how) 570 { 571 /* 572 * Ref and lock the element. Recursive locks are allowed. 573 */ 574 KKASSERT(chain->refs > 0); 575 atomic_add_int(&chain->lockcnt, 1); 576 577 /* 578 * Get the appropriate lock. 579 */ 580 if (how & HAMMER2_RESOLVE_SHARED) 581 hammer2_mtx_sh(&chain->lock); 582 else 583 hammer2_mtx_ex(&chain->lock); 584 585 /* 586 * If we already have a valid data pointer no further action is 587 * necessary. 588 */ 589 if (chain->data) 590 return; 591 592 /* 593 * Do we have to resolve the data? 594 */ 595 switch(how & HAMMER2_RESOLVE_MASK) { 596 case HAMMER2_RESOLVE_NEVER: 597 return; 598 case HAMMER2_RESOLVE_MAYBE: 599 if (chain->flags & HAMMER2_CHAIN_INITIAL) 600 return; 601 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 602 return; 603 #if 0 604 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 605 return; 606 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 607 return; 608 #endif 609 /* fall through */ 610 case HAMMER2_RESOLVE_ALWAYS: 611 default: 612 break; 613 } 614 615 /* 616 * Caller requires data 617 */ 618 hammer2_chain_load_data(chain); 619 } 620 621 /* 622 * Issue I/O and install chain->data. Caller must hold a chain lock, lock 623 * may be of any type. 624 * 625 * Once chain->data is set it cannot be disposed of until all locks are 626 * released. 627 */ 628 void 629 hammer2_chain_load_data(hammer2_chain_t *chain) 630 { 631 hammer2_blockref_t *bref; 632 hammer2_dev_t *hmp; 633 char *bdata; 634 int error; 635 636 /* 637 * Degenerate case, data already present. 638 */ 639 if (chain->data) 640 return; 641 642 hmp = chain->hmp; 643 KKASSERT(hmp != NULL); 644 645 /* 646 * Gain the IOINPROG bit, interlocked block. 647 */ 648 for (;;) { 649 u_int oflags; 650 u_int nflags; 651 652 oflags = chain->flags; 653 cpu_ccfence(); 654 if (oflags & HAMMER2_CHAIN_IOINPROG) { 655 nflags = oflags | HAMMER2_CHAIN_IOSIGNAL; 656 tsleep_interlock(&chain->flags, 0); 657 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 658 tsleep(&chain->flags, PINTERLOCKED, 659 "h2iocw", 0); 660 } 661 /* retry */ 662 } else { 663 nflags = oflags | HAMMER2_CHAIN_IOINPROG; 664 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 665 break; 666 } 667 /* retry */ 668 } 669 } 670 671 /* 672 * We own CHAIN_IOINPROG 673 * 674 * Degenerate case if we raced another load. 675 */ 676 if (chain->data) 677 goto done; 678 679 /* 680 * We must resolve to a device buffer, either by issuing I/O or 681 * by creating a zero-fill element. We do not mark the buffer 682 * dirty when creating a zero-fill element (the hammer2_chain_modify() 683 * API must still be used to do that). 684 * 685 * The device buffer is variable-sized in powers of 2 down 686 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 687 * chunk always contains buffers of the same size. (XXX) 688 * 689 * The minimum physical IO size may be larger than the variable 690 * block size. 691 */ 692 bref = &chain->bref; 693 694 /* 695 * The getblk() optimization can only be used on newly created 696 * elements if the physical block size matches the request. 697 */ 698 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 699 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, 700 &chain->dio); 701 } else { 702 error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 703 &chain->dio); 704 hammer2_adjreadcounter(&chain->bref, chain->bytes); 705 } 706 if (error) { 707 chain->error = HAMMER2_ERROR_IO; 708 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 709 (intmax_t)bref->data_off, error); 710 hammer2_io_bqrelse(&chain->dio); 711 goto done; 712 } 713 chain->error = 0; 714 715 /* 716 * NOTE: A locked chain's data cannot be modified without first 717 * calling hammer2_chain_modify(). 718 */ 719 720 /* 721 * Clear INITIAL. In this case we used io_new() and the buffer has 722 * been zero'd and marked dirty. 723 */ 724 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 725 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 726 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 727 chain->bref.flags |= HAMMER2_BREF_FLAG_ZERO; 728 } else if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 729 /* 730 * check data not currently synchronized due to 731 * modification. XXX assumes data stays in the buffer 732 * cache, which might not be true (need biodep on flush 733 * to calculate crc? or simple crc?). 734 */ 735 } else { 736 if (hammer2_chain_testcheck(chain, bdata) == 0) { 737 kprintf("chain %016jx.%02x meth=%02x " 738 "CHECK FAIL %08x (flags=%08x)\n", 739 chain->bref.data_off, 740 chain->bref.type, 741 chain->bref.methods, 742 hammer2_icrc32(bdata, chain->bytes), 743 chain->flags); 744 chain->error = HAMMER2_ERROR_CHECK; 745 } 746 } 747 748 /* 749 * Setup the data pointer, either pointing it to an embedded data 750 * structure and copying the data from the buffer, or pointing it 751 * into the buffer. 752 * 753 * The buffer is not retained when copying to an embedded data 754 * structure in order to avoid potential deadlocks or recursions 755 * on the same physical buffer. 756 * 757 * WARNING! Other threads can start using the data the instant we 758 * set chain->data non-NULL. 759 */ 760 switch (bref->type) { 761 case HAMMER2_BREF_TYPE_VOLUME: 762 case HAMMER2_BREF_TYPE_FREEMAP: 763 /* 764 * Copy data from bp to embedded buffer 765 */ 766 panic("hammer2_chain_lock: called on unresolved volume header"); 767 break; 768 case HAMMER2_BREF_TYPE_INODE: 769 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 770 case HAMMER2_BREF_TYPE_INDIRECT: 771 case HAMMER2_BREF_TYPE_DATA: 772 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 773 default: 774 /* 775 * Point data at the device buffer and leave dio intact. 776 */ 777 chain->data = (void *)bdata; 778 break; 779 } 780 781 /* 782 * Release HAMMER2_CHAIN_IOINPROG and signal waiters if requested. 783 */ 784 done: 785 for (;;) { 786 u_int oflags; 787 u_int nflags; 788 789 oflags = chain->flags; 790 nflags = oflags & ~(HAMMER2_CHAIN_IOINPROG | 791 HAMMER2_CHAIN_IOSIGNAL); 792 KKASSERT(oflags & HAMMER2_CHAIN_IOINPROG); 793 if (atomic_cmpset_int(&chain->flags, oflags, nflags)) { 794 if (oflags & HAMMER2_CHAIN_IOSIGNAL) 795 wakeup(&chain->flags); 796 break; 797 } 798 } 799 } 800 801 /* 802 * Unlock and deref a chain element. 803 * 804 * On the last lock release any non-embedded data (chain->dio) will be 805 * retired. 806 */ 807 void 808 hammer2_chain_unlock(hammer2_chain_t *chain) 809 { 810 hammer2_mtx_state_t ostate; 811 long *counterp; 812 u_int lockcnt; 813 814 /* 815 * If multiple locks are present (or being attempted) on this 816 * particular chain we can just unlock, drop refs, and return. 817 * 818 * Otherwise fall-through on the 1->0 transition. 819 */ 820 for (;;) { 821 lockcnt = chain->lockcnt; 822 KKASSERT(lockcnt > 0); 823 cpu_ccfence(); 824 if (lockcnt > 1) { 825 if (atomic_cmpset_int(&chain->lockcnt, 826 lockcnt, lockcnt - 1)) { 827 hammer2_mtx_unlock(&chain->lock); 828 return; 829 } 830 } else { 831 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 832 break; 833 } 834 /* retry */ 835 } 836 837 /* 838 * On the 1->0 transition we upgrade the core lock (if necessary) 839 * to exclusive for terminal processing. If after upgrading we find 840 * that lockcnt is non-zero, another thread is racing us and will 841 * handle the unload for us later on, so just cleanup and return 842 * leaving the data/io intact 843 * 844 * Otherwise if lockcnt is still 0 it is possible for it to become 845 * non-zero and race, but since we hold the core->lock exclusively 846 * all that will happen is that the chain will be reloaded after we 847 * unload it. 848 */ 849 ostate = hammer2_mtx_upgrade(&chain->lock); 850 if (chain->lockcnt) { 851 hammer2_mtx_unlock(&chain->lock); 852 return; 853 } 854 855 /* 856 * Shortcut the case if the data is embedded or not resolved. 857 * 858 * Do NOT NULL out chain->data (e.g. inode data), it might be 859 * dirty. 860 */ 861 if (chain->dio == NULL) { 862 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 863 hammer2_chain_drop_data(chain, 0); 864 hammer2_mtx_unlock(&chain->lock); 865 return; 866 } 867 868 /* 869 * Statistics 870 */ 871 if (hammer2_io_isdirty(chain->dio)) { 872 switch(chain->bref.type) { 873 case HAMMER2_BREF_TYPE_DATA: 874 counterp = &hammer2_iod_file_write; 875 break; 876 case HAMMER2_BREF_TYPE_INODE: 877 counterp = &hammer2_iod_meta_write; 878 break; 879 case HAMMER2_BREF_TYPE_INDIRECT: 880 counterp = &hammer2_iod_indr_write; 881 break; 882 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 883 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 884 counterp = &hammer2_iod_fmap_write; 885 break; 886 default: 887 counterp = &hammer2_iod_volu_write; 888 break; 889 } 890 *counterp += chain->bytes; 891 } 892 893 /* 894 * Clean out the dio. 895 * 896 * NOTE: Freemap leaf's use reserved blocks and thus no aliasing 897 * is possible. 898 */ 899 chain->data = NULL; 900 hammer2_io_bqrelse(&chain->dio); 901 hammer2_mtx_unlock(&chain->lock); 902 } 903 904 /* 905 * This counts the number of live blockrefs in a block array and 906 * also calculates the point at which all remaining blockrefs are empty. 907 * This routine can only be called on a live chain (DUPLICATED flag not set). 908 * 909 * NOTE: Flag is not set until after the count is complete, allowing 910 * callers to test the flag without holding the spinlock. 911 * 912 * NOTE: If base is NULL the related chain is still in the INITIAL 913 * state and there are no blockrefs to count. 914 * 915 * NOTE: live_count may already have some counts accumulated due to 916 * creation and deletion and could even be initially negative. 917 */ 918 void 919 hammer2_chain_countbrefs(hammer2_chain_t *chain, 920 hammer2_blockref_t *base, int count) 921 { 922 hammer2_spin_ex(&chain->core.spin); 923 if ((chain->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) { 924 if (base) { 925 while (--count >= 0) { 926 if (base[count].type) 927 break; 928 } 929 chain->core.live_zero = count + 1; 930 while (count >= 0) { 931 if (base[count].type) 932 atomic_add_int(&chain->core.live_count, 933 1); 934 --count; 935 } 936 } else { 937 chain->core.live_zero = 0; 938 } 939 /* else do not modify live_count */ 940 atomic_set_int(&chain->flags, HAMMER2_CHAIN_COUNTEDBREFS); 941 } 942 hammer2_spin_unex(&chain->core.spin); 943 } 944 945 /* 946 * Resize the chain's physical storage allocation in-place. This function does 947 * not adjust the data pointer and must be followed by (typically) a 948 * hammer2_chain_modify() call to copy any old data over and adjust the 949 * data pointer. 950 * 951 * Chains can be resized smaller without reallocating the storage. Resizing 952 * larger will reallocate the storage. Excess or prior storage is reclaimed 953 * asynchronously at a later time. 954 * 955 * Must be passed an exclusively locked parent and chain. 956 * 957 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 958 * to avoid instantiating a device buffer that conflicts with the vnode data 959 * buffer. However, because H2 can compress or encrypt data, the chain may 960 * have a dio assigned to it in those situations, and they do not conflict. 961 * 962 * XXX return error if cannot resize. 963 */ 964 void 965 hammer2_chain_resize(hammer2_inode_t *ip, 966 hammer2_chain_t *parent, hammer2_chain_t *chain, 967 hammer2_tid_t mtid, hammer2_off_t dedup_off, 968 int nradix, int flags) 969 { 970 hammer2_dev_t *hmp; 971 size_t obytes; 972 size_t nbytes; 973 974 hmp = chain->hmp; 975 976 /* 977 * Only data and indirect blocks can be resized for now. 978 * (The volu root, inodes, and freemap elements use a fixed size). 979 */ 980 KKASSERT(chain != &hmp->vchain); 981 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 982 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 983 KKASSERT(chain->parent == parent); 984 985 /* 986 * Nothing to do if the element is already the proper size 987 */ 988 obytes = chain->bytes; 989 nbytes = 1U << nradix; 990 if (obytes == nbytes) 991 return; 992 993 /* 994 * Make sure the old data is instantiated so we can copy it. If this 995 * is a data block, the device data may be superfluous since the data 996 * might be in a logical block, but compressed or encrypted data is 997 * another matter. 998 * 999 * NOTE: The modify will set BMAPUPD for us if BMAPPED is set. 1000 */ 1001 hammer2_chain_modify(chain, mtid, dedup_off, 0); 1002 1003 /* 1004 * Relocate the block, even if making it smaller (because different 1005 * block sizes may be in different regions). 1006 * 1007 * (data blocks only, we aren't copying the storage here). 1008 */ 1009 hammer2_freemap_alloc(chain, nbytes); 1010 chain->bytes = nbytes; 1011 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1012 1013 /* 1014 * We don't want the followup chain_modify() to try to copy data 1015 * from the old (wrong-sized) buffer. It won't know how much to 1016 * copy. This case should only occur during writes when the 1017 * originator already has the data to write in-hand. 1018 */ 1019 if (chain->dio) { 1020 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA); 1021 hammer2_io_brelse(&chain->dio); 1022 chain->data = NULL; 1023 } 1024 } 1025 1026 /* 1027 * Set the chain modified so its data can be changed by the caller. 1028 * 1029 * Sets bref.modify_tid to mtid only if mtid != 0. Note that bref.modify_tid 1030 * is a CLC (cluster level change) field and is not updated by parent 1031 * propagation during a flush. 1032 * 1033 * If the caller passes a non-zero dedup_off we assign data_off to that 1034 * instead of allocating a ne block. Caller must not modify the data already 1035 * present at the target offset. 1036 */ 1037 void 1038 hammer2_chain_modify(hammer2_chain_t *chain, hammer2_tid_t mtid, 1039 hammer2_off_t dedup_off, int flags) 1040 { 1041 hammer2_blockref_t obref; 1042 hammer2_dev_t *hmp; 1043 hammer2_io_t *dio; 1044 int error; 1045 int wasinitial; 1046 int newmod; 1047 char *bdata; 1048 1049 hmp = chain->hmp; 1050 obref = chain->bref; 1051 KKASSERT((chain->flags & HAMMER2_CHAIN_FICTITIOUS) == 0); 1052 1053 /* 1054 * Data is not optional for freemap chains (we must always be sure 1055 * to copy the data on COW storage allocations). 1056 */ 1057 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1058 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1059 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1060 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1061 } 1062 1063 /* 1064 * Data must be resolved if already assigned, unless explicitly 1065 * flagged otherwise. 1066 */ 1067 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1068 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1069 hammer2_chain_load_data(chain); 1070 } 1071 1072 /* 1073 * Set MODIFIED to indicate that the chain has been modified. 1074 * Set UPDATE to ensure that the blockref is updated in the parent. 1075 */ 1076 if ((chain->flags & (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) == 1077 (HAMMER2_CHAIN_DEDUP | HAMMER2_CHAIN_MODIFIED)) { 1078 newmod = 1; 1079 } else if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1080 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1081 hammer2_chain_ref(chain); 1082 hammer2_pfs_memory_inc(chain->pmp); /* can be NULL */ 1083 newmod = 1; 1084 } else { 1085 newmod = 0; 1086 } 1087 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 1088 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1089 hammer2_chain_ref(chain); 1090 } 1091 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DEDUP); 1092 1093 /* 1094 * The modification or re-modification requires an allocation and 1095 * possible COW. 1096 * 1097 * If dedup_off is non-zero, caller already has a data offset 1098 * containing the caller's desired data. The dedup offset is 1099 * allowed to be in a partially free state and we must be sure 1100 * to reset it to a fully allocated state to force two bulkfree 1101 * passes to free it again. 1102 * 1103 * XXX can a chain already be marked MODIFIED without a data 1104 * assignment? If not, assert here instead of testing the case. 1105 */ 1106 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1107 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1108 newmod 1109 ) { 1110 if (dedup_off) { 1111 chain->bref.data_off = dedup_off; 1112 chain->bytes = 1 << (dedup_off & 1113 HAMMER2_OFF_MASK_RADIX); 1114 atomic_set_int(&chain->flags, 1115 HAMMER2_CHAIN_DEDUP); 1116 hammer2_freemap_adjust(hmp, &chain->bref, 1117 HAMMER2_FREEMAP_DORECOVER); 1118 } else { 1119 hammer2_freemap_alloc(chain, chain->bytes); 1120 } 1121 /* XXX failed allocation */ 1122 } 1123 } 1124 1125 /* 1126 * Update mirror_tid and modify_tid. modify_tid is only updated 1127 * if not passed as zero (during flushes, parent propagation passes 1128 * the value 0). 1129 * 1130 * NOTE: chain->pmp could be the device spmp. 1131 */ 1132 chain->bref.mirror_tid = hmp->voldata.mirror_tid + 1; 1133 if (mtid) 1134 chain->bref.modify_tid = mtid; 1135 1136 /* 1137 * Set BMAPUPD to tell the flush code that an existing blockmap entry 1138 * requires updating as well as to tell the delete code that the 1139 * chain's blockref might not exactly match (in terms of physical size 1140 * or block offset) the one in the parent's blocktable. The base key 1141 * of course will still match. 1142 */ 1143 if (chain->flags & HAMMER2_CHAIN_BMAPPED) 1144 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPUPD); 1145 1146 /* 1147 * Short-cut data blocks which the caller does not need an actual 1148 * data reference to (aka OPTDATA), as long as the chain does not 1149 * already have a data pointer to the data. This generally means 1150 * that the modifications are being done via the logical buffer cache. 1151 * The INITIAL flag relates only to the device data buffer and thus 1152 * remains unchange in this situation. 1153 */ 1154 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1155 (flags & HAMMER2_MODIFY_OPTDATA) && 1156 chain->data == NULL) { 1157 goto skip2; 1158 } 1159 1160 /* 1161 * Clearing the INITIAL flag (for indirect blocks) indicates that 1162 * we've processed the uninitialized storage allocation. 1163 * 1164 * If this flag is already clear we are likely in a copy-on-write 1165 * situation but we have to be sure NOT to bzero the storage if 1166 * no data is present. 1167 */ 1168 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1169 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1170 wasinitial = 1; 1171 } else { 1172 wasinitial = 0; 1173 } 1174 1175 /* 1176 * Instantiate data buffer and possibly execute COW operation 1177 */ 1178 switch(chain->bref.type) { 1179 case HAMMER2_BREF_TYPE_VOLUME: 1180 case HAMMER2_BREF_TYPE_FREEMAP: 1181 /* 1182 * The data is embedded, no copy-on-write operation is 1183 * needed. 1184 */ 1185 KKASSERT(chain->dio == NULL); 1186 break; 1187 case HAMMER2_BREF_TYPE_INODE: 1188 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1189 case HAMMER2_BREF_TYPE_DATA: 1190 case HAMMER2_BREF_TYPE_INDIRECT: 1191 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1192 /* 1193 * Perform the copy-on-write operation 1194 * 1195 * zero-fill or copy-on-write depending on whether 1196 * chain->data exists or not and set the dirty state for 1197 * the new buffer. hammer2_io_new() will handle the 1198 * zero-fill. 1199 * 1200 * If a dedup_off was supplied this is an existing block 1201 * and no COW, copy, or further modification is required. 1202 */ 1203 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1204 1205 if (wasinitial && dedup_off == 0) { 1206 error = hammer2_io_new(hmp, chain->bref.data_off, 1207 chain->bytes, &dio); 1208 } else { 1209 error = hammer2_io_bread(hmp, chain->bref.data_off, 1210 chain->bytes, &dio); 1211 } 1212 hammer2_adjreadcounter(&chain->bref, chain->bytes); 1213 1214 /* 1215 * If an I/O error occurs make sure callers cannot accidently 1216 * modify the old buffer's contents and corrupt the filesystem. 1217 */ 1218 if (error) { 1219 kprintf("hammer2_chain_modify: hmp=%p I/O error\n", 1220 hmp); 1221 chain->error = HAMMER2_ERROR_IO; 1222 hammer2_io_brelse(&dio); 1223 hammer2_io_brelse(&chain->dio); 1224 chain->data = NULL; 1225 break; 1226 } 1227 chain->error = 0; 1228 bdata = hammer2_io_data(dio, chain->bref.data_off); 1229 1230 if (chain->data) { 1231 /* 1232 * COW (unless a dedup). 1233 */ 1234 KKASSERT(chain->dio != NULL); 1235 if (chain->data != (void *)bdata && dedup_off == 0) { 1236 bcopy(chain->data, bdata, chain->bytes); 1237 } 1238 } else if (wasinitial == 0) { 1239 /* 1240 * We have a problem. We were asked to COW but 1241 * we don't have any data to COW with! 1242 */ 1243 panic("hammer2_chain_modify: having a COW %p\n", 1244 chain); 1245 } 1246 1247 /* 1248 * Retire the old buffer, replace with the new. Dirty or 1249 * redirty the new buffer. 1250 * 1251 * WARNING! The system buffer cache may have already flushed 1252 * the buffer, so we must be sure to [re]dirty it 1253 * for further modification. 1254 * 1255 * If dedup_off was supplied, the caller is not 1256 * expected to make any further modification to the 1257 * buffer. 1258 */ 1259 if (chain->dio) 1260 hammer2_io_bqrelse(&chain->dio); 1261 chain->data = (void *)bdata; 1262 chain->dio = dio; 1263 if (dedup_off == 0) 1264 hammer2_io_setdirty(dio); 1265 break; 1266 default: 1267 panic("hammer2_chain_modify: illegal non-embedded type %d", 1268 chain->bref.type); 1269 break; 1270 1271 } 1272 skip2: 1273 /* 1274 * setflush on parent indicating that the parent must recurse down 1275 * to us. Do not call on chain itself which might already have it 1276 * set. 1277 */ 1278 if (chain->parent) 1279 hammer2_chain_setflush(chain->parent); 1280 } 1281 1282 /* 1283 * Modify the chain associated with an inode. 1284 */ 1285 void 1286 hammer2_chain_modify_ip(hammer2_inode_t *ip, hammer2_chain_t *chain, 1287 hammer2_tid_t mtid, int flags) 1288 { 1289 hammer2_inode_modify(ip); 1290 hammer2_chain_modify(chain, mtid, 0, flags); 1291 } 1292 1293 /* 1294 * Volume header data locks 1295 */ 1296 void 1297 hammer2_voldata_lock(hammer2_dev_t *hmp) 1298 { 1299 lockmgr(&hmp->vollk, LK_EXCLUSIVE); 1300 } 1301 1302 void 1303 hammer2_voldata_unlock(hammer2_dev_t *hmp) 1304 { 1305 lockmgr(&hmp->vollk, LK_RELEASE); 1306 } 1307 1308 void 1309 hammer2_voldata_modify(hammer2_dev_t *hmp) 1310 { 1311 if ((hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1312 atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED); 1313 hammer2_chain_ref(&hmp->vchain); 1314 hammer2_pfs_memory_inc(hmp->vchain.pmp); 1315 } 1316 } 1317 1318 /* 1319 * This function returns the chain at the nearest key within the specified 1320 * range. The returned chain will be referenced but not locked. 1321 * 1322 * This function will recurse through chain->rbtree as necessary and will 1323 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1324 * the iteration value is less than the current value of *key_nextp. 1325 * 1326 * The caller should use (*key_nextp) to calculate the actual range of 1327 * the returned element, which will be (key_beg to *key_nextp - 1), because 1328 * there might be another element which is superior to the returned element 1329 * and overlaps it. 1330 * 1331 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1332 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1333 * it will wind up being (key_end + 1). 1334 * 1335 * WARNING! Must be called with child's spinlock held. Spinlock remains 1336 * held through the operation. 1337 */ 1338 struct hammer2_chain_find_info { 1339 hammer2_chain_t *best; 1340 hammer2_key_t key_beg; 1341 hammer2_key_t key_end; 1342 hammer2_key_t key_next; 1343 }; 1344 1345 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1346 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1347 1348 static 1349 hammer2_chain_t * 1350 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1351 hammer2_key_t key_beg, hammer2_key_t key_end) 1352 { 1353 struct hammer2_chain_find_info info; 1354 1355 info.best = NULL; 1356 info.key_beg = key_beg; 1357 info.key_end = key_end; 1358 info.key_next = *key_nextp; 1359 1360 RB_SCAN(hammer2_chain_tree, &parent->core.rbtree, 1361 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1362 &info); 1363 *key_nextp = info.key_next; 1364 #if 0 1365 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1366 parent, key_beg, key_end, *key_nextp); 1367 #endif 1368 1369 return (info.best); 1370 } 1371 1372 static 1373 int 1374 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1375 { 1376 struct hammer2_chain_find_info *info = data; 1377 hammer2_key_t child_beg; 1378 hammer2_key_t child_end; 1379 1380 child_beg = child->bref.key; 1381 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1382 1383 if (child_end < info->key_beg) 1384 return(-1); 1385 if (child_beg > info->key_end) 1386 return(1); 1387 return(0); 1388 } 1389 1390 static 1391 int 1392 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1393 { 1394 struct hammer2_chain_find_info *info = data; 1395 hammer2_chain_t *best; 1396 hammer2_key_t child_end; 1397 1398 /* 1399 * WARNING! Do not discard DUPLICATED chains, it is possible that 1400 * we are catching an insertion half-way done. If a 1401 * duplicated chain turns out to be the best choice the 1402 * caller will re-check its flags after locking it. 1403 * 1404 * WARNING! Layerq is scanned forwards, exact matches should keep 1405 * the existing info->best. 1406 */ 1407 if ((best = info->best) == NULL) { 1408 /* 1409 * No previous best. Assign best 1410 */ 1411 info->best = child; 1412 } else if (best->bref.key <= info->key_beg && 1413 child->bref.key <= info->key_beg) { 1414 /* 1415 * Illegal overlap. 1416 */ 1417 KKASSERT(0); 1418 /*info->best = child;*/ 1419 } else if (child->bref.key < best->bref.key) { 1420 /* 1421 * Child has a nearer key and best is not flush with key_beg. 1422 * Set best to child. Truncate key_next to the old best key. 1423 */ 1424 info->best = child; 1425 if (info->key_next > best->bref.key || info->key_next == 0) 1426 info->key_next = best->bref.key; 1427 } else if (child->bref.key == best->bref.key) { 1428 /* 1429 * If our current best is flush with the child then this 1430 * is an illegal overlap. 1431 * 1432 * key_next will automatically be limited to the smaller of 1433 * the two end-points. 1434 */ 1435 KKASSERT(0); 1436 info->best = child; 1437 } else { 1438 /* 1439 * Keep the current best but truncate key_next to the child's 1440 * base. 1441 * 1442 * key_next will also automatically be limited to the smaller 1443 * of the two end-points (probably not necessary for this case 1444 * but we do it anyway). 1445 */ 1446 if (info->key_next > child->bref.key || info->key_next == 0) 1447 info->key_next = child->bref.key; 1448 } 1449 1450 /* 1451 * Always truncate key_next based on child's end-of-range. 1452 */ 1453 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1454 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1455 info->key_next = child_end; 1456 1457 return(0); 1458 } 1459 1460 /* 1461 * Retrieve the specified chain from a media blockref, creating the 1462 * in-memory chain structure which reflects it. 1463 * 1464 * To handle insertion races pass the INSERT_RACE flag along with the 1465 * generation number of the core. NULL will be returned if the generation 1466 * number changes before we have a chance to insert the chain. Insert 1467 * races can occur because the parent might be held shared. 1468 * 1469 * Caller must hold the parent locked shared or exclusive since we may 1470 * need the parent's bref array to find our block. 1471 * 1472 * WARNING! chain->pmp is always set to NULL for any chain representing 1473 * part of the super-root topology. 1474 */ 1475 hammer2_chain_t * 1476 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1477 hammer2_blockref_t *bref) 1478 { 1479 hammer2_dev_t *hmp = parent->hmp; 1480 hammer2_chain_t *chain; 1481 int error; 1482 1483 /* 1484 * Allocate a chain structure representing the existing media 1485 * entry. Resulting chain has one ref and is not locked. 1486 */ 1487 if (bref->flags & HAMMER2_BREF_FLAG_PFSROOT) 1488 chain = hammer2_chain_alloc(hmp, NULL, bref); 1489 else 1490 chain = hammer2_chain_alloc(hmp, parent->pmp, bref); 1491 /* ref'd chain returned */ 1492 1493 /* 1494 * Flag that the chain is in the parent's blockmap so delete/flush 1495 * knows what to do with it. 1496 */ 1497 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1498 1499 /* 1500 * Link the chain into its parent. A spinlock is required to safely 1501 * access the RBTREE, and it is possible to collide with another 1502 * hammer2_chain_get() operation because the caller might only hold 1503 * a shared lock on the parent. 1504 */ 1505 KKASSERT(parent->refs > 0); 1506 error = hammer2_chain_insert(parent, chain, 1507 HAMMER2_CHAIN_INSERT_SPIN | 1508 HAMMER2_CHAIN_INSERT_RACE, 1509 generation); 1510 if (error) { 1511 KKASSERT((chain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 1512 kprintf("chain %p get race\n", chain); 1513 hammer2_chain_drop(chain); 1514 chain = NULL; 1515 } else { 1516 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1517 } 1518 1519 /* 1520 * Return our new chain referenced but not locked, or NULL if 1521 * a race occurred. 1522 */ 1523 return (chain); 1524 } 1525 1526 /* 1527 * Lookup initialization/completion API 1528 */ 1529 hammer2_chain_t * 1530 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1531 { 1532 hammer2_chain_ref(parent); 1533 if (flags & HAMMER2_LOOKUP_SHARED) { 1534 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1535 HAMMER2_RESOLVE_SHARED); 1536 } else { 1537 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1538 } 1539 return (parent); 1540 } 1541 1542 void 1543 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1544 { 1545 if (parent) { 1546 hammer2_chain_unlock(parent); 1547 hammer2_chain_drop(parent); 1548 } 1549 } 1550 1551 hammer2_chain_t * 1552 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1553 { 1554 hammer2_chain_t *oparent; 1555 hammer2_chain_t *nparent; 1556 1557 /* 1558 * Be careful of order, oparent must be unlocked before nparent 1559 * is locked below to avoid a deadlock. 1560 */ 1561 oparent = *parentp; 1562 hammer2_spin_ex(&oparent->core.spin); 1563 nparent = oparent->parent; 1564 hammer2_chain_ref(nparent); 1565 hammer2_spin_unex(&oparent->core.spin); 1566 if (oparent) { 1567 hammer2_chain_unlock(oparent); 1568 hammer2_chain_drop(oparent); 1569 oparent = NULL; 1570 } 1571 1572 hammer2_chain_lock(nparent, how); 1573 *parentp = nparent; 1574 1575 return (nparent); 1576 } 1577 1578 /* 1579 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1580 * (*parentp) typically points to an inode but can also point to a related 1581 * indirect block and this function will recurse upwards and find the inode 1582 * again. 1583 * 1584 * (*parentp) must be exclusively locked and referenced and can be an inode 1585 * or an existing indirect block within the inode. 1586 * 1587 * On return (*parentp) will be modified to point at the deepest parent chain 1588 * element encountered during the search, as a helper for an insertion or 1589 * deletion. The new (*parentp) will be locked and referenced and the old 1590 * will be unlocked and dereferenced (no change if they are both the same). 1591 * 1592 * The matching chain will be returned exclusively locked. If NOLOCK is 1593 * requested the chain will be returned only referenced. Note that the 1594 * parent chain must always be locked shared or exclusive, matching the 1595 * HAMMER2_LOOKUP_SHARED flag. We can conceivably lock it SHARED temporarily 1596 * when NOLOCK is specified but that complicates matters if *parentp must 1597 * inherit the chain. 1598 * 1599 * NOLOCK also implies NODATA, since an unlocked chain usually has a NULL 1600 * data pointer or can otherwise be in flux. 1601 * 1602 * NULL is returned if no match was found, but (*parentp) will still 1603 * potentially be adjusted. 1604 * 1605 * If a fatal error occurs (typically an I/O error), a dummy chain is 1606 * returned with chain->error and error-identifying information set. This 1607 * chain will assert if you try to do anything fancy with it. 1608 * 1609 * XXX Depending on where the error occurs we should allow continued iteration. 1610 * 1611 * On return (*key_nextp) will point to an iterative value for key_beg. 1612 * (If NULL is returned (*key_nextp) is set to (key_end + 1)). 1613 * 1614 * This function will also recurse up the chain if the key is not within the 1615 * current parent's range. (*parentp) can never be set to NULL. An iteration 1616 * can simply allow (*parentp) to float inside the loop. 1617 * 1618 * NOTE! chain->data is not always resolved. By default it will not be 1619 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1620 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1621 * BREF_TYPE_DATA as the device buffer can alias the logical file 1622 * buffer). 1623 */ 1624 hammer2_chain_t * 1625 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1626 hammer2_key_t key_beg, hammer2_key_t key_end, 1627 int *cache_indexp, int flags) 1628 { 1629 hammer2_dev_t *hmp; 1630 hammer2_chain_t *parent; 1631 hammer2_chain_t *chain; 1632 hammer2_blockref_t *base; 1633 hammer2_blockref_t *bref; 1634 hammer2_blockref_t bcopy; 1635 hammer2_key_t scan_beg; 1636 hammer2_key_t scan_end; 1637 int count = 0; 1638 int how_always = HAMMER2_RESOLVE_ALWAYS; 1639 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1640 int how; 1641 int generation; 1642 int maxloops = 300000; 1643 1644 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1645 how_maybe = how_always; 1646 how = HAMMER2_RESOLVE_ALWAYS; 1647 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1648 how = HAMMER2_RESOLVE_NEVER; 1649 } else { 1650 how = HAMMER2_RESOLVE_MAYBE; 1651 } 1652 if (flags & HAMMER2_LOOKUP_SHARED) { 1653 how_maybe |= HAMMER2_RESOLVE_SHARED; 1654 how_always |= HAMMER2_RESOLVE_SHARED; 1655 how |= HAMMER2_RESOLVE_SHARED; 1656 } 1657 1658 /* 1659 * Recurse (*parentp) upward if necessary until the parent completely 1660 * encloses the key range or we hit the inode. 1661 * 1662 * This function handles races against the flusher doing a delete- 1663 * duplicate above us and re-homes the parent to the duplicate in 1664 * that case, otherwise we'd wind up recursing down a stale chain. 1665 */ 1666 parent = *parentp; 1667 hmp = parent->hmp; 1668 1669 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1670 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1671 scan_beg = parent->bref.key; 1672 scan_end = scan_beg + 1673 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1674 if (key_beg >= scan_beg && key_end <= scan_end) 1675 break; 1676 parent = hammer2_chain_getparent(parentp, how_maybe); 1677 } 1678 1679 again: 1680 if (--maxloops == 0) 1681 panic("hammer2_chain_lookup: maxloops"); 1682 /* 1683 * Locate the blockref array. Currently we do a fully associative 1684 * search through the array. 1685 */ 1686 switch(parent->bref.type) { 1687 case HAMMER2_BREF_TYPE_INODE: 1688 /* 1689 * Special shortcut for embedded data returns the inode 1690 * itself. Callers must detect this condition and access 1691 * the embedded data (the strategy code does this for us). 1692 * 1693 * This is only applicable to regular files and softlinks. 1694 */ 1695 if (parent->data->ipdata.meta.op_flags & 1696 HAMMER2_OPFLAG_DIRECTDATA) { 1697 if (flags & HAMMER2_LOOKUP_NODIRECT) { 1698 chain = NULL; 1699 *key_nextp = key_end + 1; 1700 goto done; 1701 } 1702 hammer2_chain_ref(parent); 1703 if ((flags & HAMMER2_LOOKUP_NOLOCK) == 0) 1704 hammer2_chain_lock(parent, how_always); 1705 *key_nextp = key_end + 1; 1706 return (parent); 1707 } 1708 base = &parent->data->ipdata.u.blockset.blockref[0]; 1709 count = HAMMER2_SET_COUNT; 1710 break; 1711 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1712 case HAMMER2_BREF_TYPE_INDIRECT: 1713 /* 1714 * Handle MATCHIND on the parent 1715 */ 1716 if (flags & HAMMER2_LOOKUP_MATCHIND) { 1717 scan_beg = parent->bref.key; 1718 scan_end = scan_beg + 1719 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1720 if (key_beg == scan_beg && key_end == scan_end) { 1721 chain = parent; 1722 hammer2_chain_ref(chain); 1723 hammer2_chain_lock(chain, how_maybe); 1724 *key_nextp = scan_end + 1; 1725 goto done; 1726 } 1727 } 1728 /* 1729 * Optimize indirect blocks in the INITIAL state to avoid 1730 * I/O. 1731 */ 1732 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 1733 base = NULL; 1734 } else { 1735 if (parent->data == NULL) 1736 panic("parent->data is NULL"); 1737 base = &parent->data->npdata[0]; 1738 } 1739 count = parent->bytes / sizeof(hammer2_blockref_t); 1740 break; 1741 case HAMMER2_BREF_TYPE_VOLUME: 1742 base = &parent->data->voldata.sroot_blockset.blockref[0]; 1743 count = HAMMER2_SET_COUNT; 1744 break; 1745 case HAMMER2_BREF_TYPE_FREEMAP: 1746 base = &parent->data->blkset.blockref[0]; 1747 count = HAMMER2_SET_COUNT; 1748 break; 1749 default: 1750 kprintf("hammer2_chain_lookup: unrecognized " 1751 "blockref(B) type: %d", 1752 parent->bref.type); 1753 while (1) 1754 tsleep(&base, 0, "dead", 0); 1755 panic("hammer2_chain_lookup: unrecognized " 1756 "blockref(B) type: %d", 1757 parent->bref.type); 1758 base = NULL; /* safety */ 1759 count = 0; /* safety */ 1760 } 1761 1762 /* 1763 * Merged scan to find next candidate. 1764 * 1765 * hammer2_base_*() functions require the parent->core.live_* fields 1766 * to be synchronized. 1767 * 1768 * We need to hold the spinlock to access the block array and RB tree 1769 * and to interlock chain creation. 1770 */ 1771 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 1772 hammer2_chain_countbrefs(parent, base, count); 1773 1774 /* 1775 * Combined search 1776 */ 1777 hammer2_spin_ex(&parent->core.spin); 1778 chain = hammer2_combined_find(parent, base, count, 1779 cache_indexp, key_nextp, 1780 key_beg, key_end, 1781 &bref); 1782 generation = parent->core.generation; 1783 1784 /* 1785 * Exhausted parent chain, iterate. 1786 */ 1787 if (bref == NULL) { 1788 hammer2_spin_unex(&parent->core.spin); 1789 if (key_beg == key_end) /* short cut single-key case */ 1790 return (NULL); 1791 1792 /* 1793 * Stop if we reached the end of the iteration. 1794 */ 1795 if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 1796 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1797 return (NULL); 1798 } 1799 1800 /* 1801 * Calculate next key, stop if we reached the end of the 1802 * iteration, otherwise go up one level and loop. 1803 */ 1804 key_beg = parent->bref.key + 1805 ((hammer2_key_t)1 << parent->bref.keybits); 1806 if (key_beg == 0 || key_beg > key_end) 1807 return (NULL); 1808 parent = hammer2_chain_getparent(parentp, how_maybe); 1809 goto again; 1810 } 1811 1812 /* 1813 * Selected from blockref or in-memory chain. 1814 */ 1815 if (chain == NULL) { 1816 bcopy = *bref; 1817 hammer2_spin_unex(&parent->core.spin); 1818 chain = hammer2_chain_get(parent, generation, 1819 &bcopy); 1820 if (chain == NULL) { 1821 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 1822 parent, key_beg, key_end); 1823 goto again; 1824 } 1825 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 1826 hammer2_chain_drop(chain); 1827 goto again; 1828 } 1829 } else { 1830 hammer2_chain_ref(chain); 1831 hammer2_spin_unex(&parent->core.spin); 1832 } 1833 1834 /* 1835 * chain is referenced but not locked. We must lock the chain 1836 * to obtain definitive DUPLICATED/DELETED state 1837 */ 1838 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1839 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1840 hammer2_chain_lock(chain, how_maybe); 1841 } else { 1842 hammer2_chain_lock(chain, how); 1843 } 1844 1845 /* 1846 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 1847 * 1848 * NOTE: Chain's key range is not relevant as there might be 1849 * one-offs within the range that are not deleted. 1850 * 1851 * NOTE: Lookups can race delete-duplicate because 1852 * delete-duplicate does not lock the parent's core 1853 * (they just use the spinlock on the core). We must 1854 * check for races by comparing the DUPLICATED flag before 1855 * releasing the spinlock with the flag after locking the 1856 * chain. 1857 */ 1858 if (chain->flags & HAMMER2_CHAIN_DELETED) { 1859 hammer2_chain_unlock(chain); 1860 hammer2_chain_drop(chain); 1861 key_beg = *key_nextp; 1862 if (key_beg == 0 || key_beg > key_end) 1863 return(NULL); 1864 goto again; 1865 } 1866 1867 /* 1868 * If the chain element is an indirect block it becomes the new 1869 * parent and we loop on it. We must maintain our top-down locks 1870 * to prevent the flusher from interfering (i.e. doing a 1871 * delete-duplicate and leaving us recursing down a deleted chain). 1872 * 1873 * The parent always has to be locked with at least RESOLVE_MAYBE 1874 * so we can access its data. It might need a fixup if the caller 1875 * passed incompatible flags. Be careful not to cause a deadlock 1876 * as a data-load requires an exclusive lock. 1877 * 1878 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 1879 * range is within the requested key range we return the indirect 1880 * block and do NOT loop. This is usually only used to acquire 1881 * freemap nodes. 1882 */ 1883 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1884 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1885 hammer2_chain_unlock(parent); 1886 hammer2_chain_drop(parent); 1887 *parentp = parent = chain; 1888 goto again; 1889 } 1890 done: 1891 /* 1892 * All done, return the chain. 1893 * 1894 * If the caller does not want a locked chain, replace the lock with 1895 * a ref. Perhaps this can eventually be optimized to not obtain the 1896 * lock in the first place for situations where the data does not 1897 * need to be resolved. 1898 */ 1899 if (chain) { 1900 if (flags & HAMMER2_LOOKUP_NOLOCK) 1901 hammer2_chain_unlock(chain); 1902 } 1903 1904 return (chain); 1905 } 1906 1907 /* 1908 * After having issued a lookup we can iterate all matching keys. 1909 * 1910 * If chain is non-NULL we continue the iteration from just after it's index. 1911 * 1912 * If chain is NULL we assume the parent was exhausted and continue the 1913 * iteration at the next parent. 1914 * 1915 * If a fatal error occurs (typically an I/O error), a dummy chain is 1916 * returned with chain->error and error-identifying information set. This 1917 * chain will assert if you try to do anything fancy with it. 1918 * 1919 * XXX Depending on where the error occurs we should allow continued iteration. 1920 * 1921 * parent must be locked on entry and remains locked throughout. chain's 1922 * lock status must match flags. Chain is always at least referenced. 1923 * 1924 * WARNING! The MATCHIND flag does not apply to this function. 1925 */ 1926 hammer2_chain_t * 1927 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 1928 hammer2_key_t *key_nextp, 1929 hammer2_key_t key_beg, hammer2_key_t key_end, 1930 int *cache_indexp, int flags) 1931 { 1932 hammer2_chain_t *parent; 1933 int how_maybe; 1934 1935 /* 1936 * Calculate locking flags for upward recursion. 1937 */ 1938 how_maybe = HAMMER2_RESOLVE_MAYBE; 1939 if (flags & HAMMER2_LOOKUP_SHARED) 1940 how_maybe |= HAMMER2_RESOLVE_SHARED; 1941 1942 parent = *parentp; 1943 1944 /* 1945 * Calculate the next index and recalculate the parent if necessary. 1946 */ 1947 if (chain) { 1948 key_beg = chain->bref.key + 1949 ((hammer2_key_t)1 << chain->bref.keybits); 1950 if ((flags & (HAMMER2_LOOKUP_NOLOCK | 1951 HAMMER2_LOOKUP_NOUNLOCK)) == 0) { 1952 hammer2_chain_unlock(chain); 1953 } 1954 hammer2_chain_drop(chain); 1955 1956 /* 1957 * chain invalid past this point, but we can still do a 1958 * pointer comparison w/parent. 1959 * 1960 * Any scan where the lookup returned degenerate data embedded 1961 * in the inode has an invalid index and must terminate. 1962 */ 1963 if (chain == parent) 1964 return(NULL); 1965 if (key_beg == 0 || key_beg > key_end) 1966 return(NULL); 1967 chain = NULL; 1968 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 1969 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1970 /* 1971 * We reached the end of the iteration. 1972 */ 1973 return (NULL); 1974 } else { 1975 /* 1976 * Continue iteration with next parent unless the current 1977 * parent covers the range. 1978 */ 1979 key_beg = parent->bref.key + 1980 ((hammer2_key_t)1 << parent->bref.keybits); 1981 if (key_beg == 0 || key_beg > key_end) 1982 return (NULL); 1983 parent = hammer2_chain_getparent(parentp, how_maybe); 1984 } 1985 1986 /* 1987 * And execute 1988 */ 1989 return (hammer2_chain_lookup(parentp, key_nextp, 1990 key_beg, key_end, 1991 cache_indexp, flags)); 1992 } 1993 1994 /* 1995 * The raw scan function is similar to lookup/next but does not seek to a key. 1996 * Blockrefs are iterated via first_bref = (parent, NULL) and 1997 * next_chain = (parent, bref). 1998 * 1999 * The passed-in parent must be locked and its data resolved. The function 2000 * nominally returns a locked and referenced *chainp != NULL for chains 2001 * the caller might need to recurse on (and will dipose of any *chainp passed 2002 * in). The caller must check the chain->bref.type either way. 2003 * 2004 * *chainp is not set for leaf elements. 2005 * 2006 * This function takes a pointer to a stack-based bref structure whos 2007 * contents is updated for each iteration. The same pointer is returned, 2008 * or NULL when the iteration is complete. *firstp must be set to 1 for 2009 * the first ieration. This function will set it to 0. 2010 */ 2011 hammer2_blockref_t * 2012 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t **chainp, 2013 hammer2_blockref_t *bref, int *firstp, 2014 int *cache_indexp, int flags) 2015 { 2016 hammer2_dev_t *hmp; 2017 hammer2_blockref_t *base; 2018 hammer2_blockref_t *bref_ptr; 2019 hammer2_key_t key; 2020 hammer2_key_t next_key; 2021 hammer2_chain_t *chain = NULL; 2022 int count = 0; 2023 int how_always = HAMMER2_RESOLVE_ALWAYS; 2024 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2025 int how; 2026 int generation; 2027 int maxloops = 300000; 2028 2029 hmp = parent->hmp; 2030 2031 /* 2032 * Scan flags borrowed from lookup. 2033 */ 2034 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2035 how_maybe = how_always; 2036 how = HAMMER2_RESOLVE_ALWAYS; 2037 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2038 how = HAMMER2_RESOLVE_NEVER; 2039 } else { 2040 how = HAMMER2_RESOLVE_MAYBE; 2041 } 2042 if (flags & HAMMER2_LOOKUP_SHARED) { 2043 how_maybe |= HAMMER2_RESOLVE_SHARED; 2044 how_always |= HAMMER2_RESOLVE_SHARED; 2045 how |= HAMMER2_RESOLVE_SHARED; 2046 } 2047 2048 /* 2049 * Calculate key to locate first/next element, unlocking the previous 2050 * element as we go. Be careful, the key calculation can overflow. 2051 * 2052 * (also reset bref to NULL) 2053 */ 2054 if (*firstp) { 2055 key = 0; 2056 *firstp = 0; 2057 } else { 2058 key = bref->key + ((hammer2_key_t)1 << bref->keybits); 2059 if ((chain = *chainp) != NULL) { 2060 *chainp = NULL; 2061 hammer2_chain_unlock(chain); 2062 hammer2_chain_drop(chain); 2063 chain = NULL; 2064 } 2065 if (key == 0) { 2066 bref = NULL; 2067 goto done; 2068 } 2069 } 2070 2071 again: 2072 KKASSERT(parent->error == 0); /* XXX case not handled yet */ 2073 if (--maxloops == 0) 2074 panic("hammer2_chain_scan: maxloops"); 2075 /* 2076 * Locate the blockref array. Currently we do a fully associative 2077 * search through the array. 2078 */ 2079 switch(parent->bref.type) { 2080 case HAMMER2_BREF_TYPE_INODE: 2081 /* 2082 * An inode with embedded data has no sub-chains. 2083 * 2084 * WARNING! Bulk scan code may pass a static chain marked 2085 * as BREF_TYPE_INODE with a copy of the volume 2086 * root blockset to snapshot the volume. 2087 */ 2088 if (parent->data->ipdata.meta.op_flags & 2089 HAMMER2_OPFLAG_DIRECTDATA) { 2090 bref = NULL; 2091 goto done; 2092 } 2093 base = &parent->data->ipdata.u.blockset.blockref[0]; 2094 count = HAMMER2_SET_COUNT; 2095 break; 2096 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2097 case HAMMER2_BREF_TYPE_INDIRECT: 2098 /* 2099 * Optimize indirect blocks in the INITIAL state to avoid 2100 * I/O. 2101 */ 2102 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2103 base = NULL; 2104 } else { 2105 if (parent->data == NULL) 2106 panic("parent->data is NULL"); 2107 base = &parent->data->npdata[0]; 2108 } 2109 count = parent->bytes / sizeof(hammer2_blockref_t); 2110 break; 2111 case HAMMER2_BREF_TYPE_VOLUME: 2112 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2113 count = HAMMER2_SET_COUNT; 2114 break; 2115 case HAMMER2_BREF_TYPE_FREEMAP: 2116 base = &parent->data->blkset.blockref[0]; 2117 count = HAMMER2_SET_COUNT; 2118 break; 2119 default: 2120 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2121 parent->bref.type); 2122 base = NULL; /* safety */ 2123 count = 0; /* safety */ 2124 } 2125 2126 /* 2127 * Merged scan to find next candidate. 2128 * 2129 * hammer2_base_*() functions require the parent->core.live_* fields 2130 * to be synchronized. 2131 * 2132 * We need to hold the spinlock to access the block array and RB tree 2133 * and to interlock chain creation. 2134 */ 2135 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2136 hammer2_chain_countbrefs(parent, base, count); 2137 2138 next_key = 0; 2139 bref_ptr = NULL; 2140 hammer2_spin_ex(&parent->core.spin); 2141 chain = hammer2_combined_find(parent, base, count, 2142 cache_indexp, &next_key, 2143 key, HAMMER2_KEY_MAX, 2144 &bref_ptr); 2145 generation = parent->core.generation; 2146 2147 /* 2148 * Exhausted parent chain, we're done. 2149 */ 2150 if (bref_ptr == NULL) { 2151 hammer2_spin_unex(&parent->core.spin); 2152 KKASSERT(chain == NULL); 2153 bref = NULL; 2154 goto done; 2155 } 2156 2157 /* 2158 * Copy into the supplied stack-based blockref. 2159 */ 2160 *bref = *bref_ptr; 2161 2162 /* 2163 * Selected from blockref or in-memory chain. 2164 */ 2165 if (chain == NULL) { 2166 switch(bref->type) { 2167 case HAMMER2_BREF_TYPE_INODE: 2168 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2169 case HAMMER2_BREF_TYPE_INDIRECT: 2170 case HAMMER2_BREF_TYPE_VOLUME: 2171 case HAMMER2_BREF_TYPE_FREEMAP: 2172 /* 2173 * Recursion, always get the chain 2174 */ 2175 hammer2_spin_unex(&parent->core.spin); 2176 chain = hammer2_chain_get(parent, generation, bref); 2177 if (chain == NULL) { 2178 kprintf("retry scan parent %p keys %016jx\n", 2179 parent, key); 2180 goto again; 2181 } 2182 if (bcmp(bref, bref_ptr, sizeof(*bref))) { 2183 hammer2_chain_drop(chain); 2184 chain = NULL; 2185 goto again; 2186 } 2187 break; 2188 default: 2189 /* 2190 * No recursion, do not waste time instantiating 2191 * a chain, just iterate using the bref. 2192 */ 2193 hammer2_spin_unex(&parent->core.spin); 2194 break; 2195 } 2196 } else { 2197 /* 2198 * Recursion or not we need the chain in order to supply 2199 * the bref. 2200 */ 2201 hammer2_chain_ref(chain); 2202 hammer2_spin_unex(&parent->core.spin); 2203 } 2204 2205 /* 2206 * chain is referenced but not locked. We must lock the chain 2207 * to obtain definitive DUPLICATED/DELETED state 2208 */ 2209 if (chain) 2210 hammer2_chain_lock(chain, how); 2211 2212 /* 2213 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2214 * 2215 * NOTE: chain's key range is not relevant as there might be 2216 * one-offs within the range that are not deleted. 2217 * 2218 * NOTE: XXX this could create problems with scans used in 2219 * situations other than mount-time recovery. 2220 * 2221 * NOTE: Lookups can race delete-duplicate because 2222 * delete-duplicate does not lock the parent's core 2223 * (they just use the spinlock on the core). We must 2224 * check for races by comparing the DUPLICATED flag before 2225 * releasing the spinlock with the flag after locking the 2226 * chain. 2227 */ 2228 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 2229 hammer2_chain_unlock(chain); 2230 hammer2_chain_drop(chain); 2231 chain = NULL; 2232 2233 key = next_key; 2234 if (key == 0) { 2235 bref = NULL; 2236 goto done; 2237 } 2238 goto again; 2239 } 2240 2241 done: 2242 /* 2243 * All done, return the bref or NULL, supply chain if necessary. 2244 */ 2245 if (chain) 2246 *chainp = chain; 2247 return (bref); 2248 } 2249 2250 /* 2251 * Create and return a new hammer2 system memory structure of the specified 2252 * key, type and size and insert it under (*parentp). This is a full 2253 * insertion, based on the supplied key/keybits, and may involve creating 2254 * indirect blocks and moving other chains around via delete/duplicate. 2255 * 2256 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2257 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2258 * FULL. This typically means that the caller is creating the chain after 2259 * doing a hammer2_chain_lookup(). 2260 * 2261 * (*parentp) must be exclusive locked and may be replaced on return 2262 * depending on how much work the function had to do. 2263 * 2264 * (*parentp) must not be errored or this function will assert. 2265 * 2266 * (*chainp) usually starts out NULL and returns the newly created chain, 2267 * but if the caller desires the caller may allocate a disconnected chain 2268 * and pass it in instead. 2269 * 2270 * This function should NOT be used to insert INDIRECT blocks. It is 2271 * typically used to create/insert inodes and data blocks. 2272 * 2273 * Caller must pass-in an exclusively locked parent the new chain is to 2274 * be inserted under, and optionally pass-in a disconnected, exclusively 2275 * locked chain to insert (else we create a new chain). The function will 2276 * adjust (*parentp) as necessary, create or connect the chain, and 2277 * return an exclusively locked chain in *chainp. 2278 * 2279 * When creating a PFSROOT inode under the super-root, pmp is typically NULL 2280 * and will be reassigned. 2281 */ 2282 int 2283 hammer2_chain_create(hammer2_chain_t **parentp, 2284 hammer2_chain_t **chainp, hammer2_pfs_t *pmp, 2285 hammer2_key_t key, int keybits, int type, size_t bytes, 2286 hammer2_tid_t mtid, hammer2_off_t dedup_off, int flags) 2287 { 2288 hammer2_dev_t *hmp; 2289 hammer2_chain_t *chain; 2290 hammer2_chain_t *parent; 2291 hammer2_blockref_t *base; 2292 hammer2_blockref_t dummy; 2293 int allocated = 0; 2294 int error = 0; 2295 int count; 2296 int maxloops = 300000; 2297 2298 /* 2299 * Topology may be crossing a PFS boundary. 2300 */ 2301 parent = *parentp; 2302 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2303 KKASSERT(parent->error == 0); 2304 hmp = parent->hmp; 2305 chain = *chainp; 2306 2307 if (chain == NULL) { 2308 /* 2309 * First allocate media space and construct the dummy bref, 2310 * then allocate the in-memory chain structure. Set the 2311 * INITIAL flag for fresh chains which do not have embedded 2312 * data. 2313 * 2314 * XXX for now set the check mode of the child based on 2315 * the parent or, if the parent is an inode, the 2316 * specification in the inode. 2317 */ 2318 bzero(&dummy, sizeof(dummy)); 2319 dummy.type = type; 2320 dummy.key = key; 2321 dummy.keybits = keybits; 2322 dummy.data_off = hammer2_getradix(bytes); 2323 dummy.methods = parent->bref.methods; 2324 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 2325 parent->data) { 2326 dummy.methods &= ~HAMMER2_ENC_CHECK(-1); 2327 dummy.methods |= HAMMER2_ENC_CHECK( 2328 parent->data->ipdata.meta.check_algo); 2329 } 2330 2331 chain = hammer2_chain_alloc(hmp, pmp, &dummy); 2332 2333 /* 2334 * Lock the chain manually, chain_lock will load the chain 2335 * which we do NOT want to do. (note: chain->refs is set 2336 * to 1 by chain_alloc() for us, but lockcnt is not). 2337 */ 2338 chain->lockcnt = 1; 2339 hammer2_mtx_ex(&chain->lock); 2340 allocated = 1; 2341 2342 /* 2343 * Set INITIAL to optimize I/O. The flag will generally be 2344 * processed when we call hammer2_chain_modify(). 2345 * 2346 * Recalculate bytes to reflect the actual media block 2347 * allocation. 2348 */ 2349 bytes = (hammer2_off_t)1 << 2350 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2351 chain->bytes = bytes; 2352 2353 switch(type) { 2354 case HAMMER2_BREF_TYPE_VOLUME: 2355 case HAMMER2_BREF_TYPE_FREEMAP: 2356 panic("hammer2_chain_create: called with volume type"); 2357 break; 2358 case HAMMER2_BREF_TYPE_INDIRECT: 2359 panic("hammer2_chain_create: cannot be used to" 2360 "create indirect block"); 2361 break; 2362 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2363 panic("hammer2_chain_create: cannot be used to" 2364 "create freemap root or node"); 2365 break; 2366 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2367 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2368 /* fall through */ 2369 case HAMMER2_BREF_TYPE_INODE: 2370 case HAMMER2_BREF_TYPE_DATA: 2371 default: 2372 /* 2373 * leave chain->data NULL, set INITIAL 2374 */ 2375 KKASSERT(chain->data == NULL); 2376 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2377 break; 2378 } 2379 } else { 2380 /* 2381 * We are reattaching a previously deleted chain, possibly 2382 * under a new parent and possibly with a new key/keybits. 2383 * The chain does not have to be in a modified state. The 2384 * UPDATE flag will be set later on in this routine. 2385 * 2386 * Do NOT mess with the current state of the INITIAL flag. 2387 */ 2388 chain->bref.key = key; 2389 chain->bref.keybits = keybits; 2390 if (chain->flags & HAMMER2_CHAIN_DELETED) 2391 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2392 KKASSERT(chain->parent == NULL); 2393 } 2394 if (flags & HAMMER2_INSERT_PFSROOT) 2395 chain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 2396 else 2397 chain->bref.flags &= ~HAMMER2_BREF_FLAG_PFSROOT; 2398 2399 /* 2400 * Calculate how many entries we have in the blockref array and 2401 * determine if an indirect block is required. 2402 */ 2403 again: 2404 if (--maxloops == 0) 2405 panic("hammer2_chain_create: maxloops"); 2406 2407 switch(parent->bref.type) { 2408 case HAMMER2_BREF_TYPE_INODE: 2409 KKASSERT((parent->data->ipdata.meta.op_flags & 2410 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2411 KKASSERT(parent->data != NULL); 2412 base = &parent->data->ipdata.u.blockset.blockref[0]; 2413 count = HAMMER2_SET_COUNT; 2414 break; 2415 case HAMMER2_BREF_TYPE_INDIRECT: 2416 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2417 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2418 base = NULL; 2419 else 2420 base = &parent->data->npdata[0]; 2421 count = parent->bytes / sizeof(hammer2_blockref_t); 2422 break; 2423 case HAMMER2_BREF_TYPE_VOLUME: 2424 KKASSERT(parent->data != NULL); 2425 base = &parent->data->voldata.sroot_blockset.blockref[0]; 2426 count = HAMMER2_SET_COUNT; 2427 break; 2428 case HAMMER2_BREF_TYPE_FREEMAP: 2429 KKASSERT(parent->data != NULL); 2430 base = &parent->data->blkset.blockref[0]; 2431 count = HAMMER2_SET_COUNT; 2432 break; 2433 default: 2434 panic("hammer2_chain_create: unrecognized blockref type: %d", 2435 parent->bref.type); 2436 base = NULL; 2437 count = 0; 2438 break; 2439 } 2440 2441 /* 2442 * Make sure we've counted the brefs 2443 */ 2444 if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 2445 hammer2_chain_countbrefs(parent, base, count); 2446 2447 KKASSERT(parent->core.live_count >= 0 && 2448 parent->core.live_count <= count); 2449 2450 /* 2451 * If no free blockref could be found we must create an indirect 2452 * block and move a number of blockrefs into it. With the parent 2453 * locked we can safely lock each child in order to delete+duplicate 2454 * it without causing a deadlock. 2455 * 2456 * This may return the new indirect block or the old parent depending 2457 * on where the key falls. NULL is returned on error. 2458 */ 2459 if (parent->core.live_count == count) { 2460 hammer2_chain_t *nparent; 2461 2462 nparent = hammer2_chain_create_indirect(parent, key, keybits, 2463 mtid, type, &error); 2464 if (nparent == NULL) { 2465 if (allocated) 2466 hammer2_chain_drop(chain); 2467 chain = NULL; 2468 goto done; 2469 } 2470 if (parent != nparent) { 2471 hammer2_chain_unlock(parent); 2472 hammer2_chain_drop(parent); 2473 parent = *parentp = nparent; 2474 } 2475 goto again; 2476 } 2477 2478 /* 2479 * Link the chain into its parent. 2480 */ 2481 if (chain->parent != NULL) 2482 panic("hammer2: hammer2_chain_create: chain already connected"); 2483 KKASSERT(chain->parent == NULL); 2484 hammer2_chain_insert(parent, chain, 2485 HAMMER2_CHAIN_INSERT_SPIN | 2486 HAMMER2_CHAIN_INSERT_LIVE, 2487 0); 2488 2489 if (allocated) { 2490 /* 2491 * Mark the newly created chain modified. This will cause 2492 * UPDATE to be set and process the INITIAL flag. 2493 * 2494 * Device buffers are not instantiated for DATA elements 2495 * as these are handled by logical buffers. 2496 * 2497 * Indirect and freemap node indirect blocks are handled 2498 * by hammer2_chain_create_indirect() and not by this 2499 * function. 2500 * 2501 * Data for all other bref types is expected to be 2502 * instantiated (INODE, LEAF). 2503 */ 2504 switch(chain->bref.type) { 2505 case HAMMER2_BREF_TYPE_DATA: 2506 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2507 case HAMMER2_BREF_TYPE_INODE: 2508 hammer2_chain_modify(chain, mtid, dedup_off, 2509 HAMMER2_MODIFY_OPTDATA); 2510 break; 2511 default: 2512 /* 2513 * Remaining types are not supported by this function. 2514 * In particular, INDIRECT and LEAF_NODE types are 2515 * handled by create_indirect(). 2516 */ 2517 panic("hammer2_chain_create: bad type: %d", 2518 chain->bref.type); 2519 /* NOT REACHED */ 2520 break; 2521 } 2522 } else { 2523 /* 2524 * When reconnecting a chain we must set UPDATE and 2525 * setflush so the flush recognizes that it must update 2526 * the bref in the parent. 2527 */ 2528 if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0) { 2529 hammer2_chain_ref(chain); 2530 atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 2531 } 2532 } 2533 2534 /* 2535 * We must setflush(parent) to ensure that it recurses through to 2536 * chain. setflush(chain) might not work because ONFLUSH is possibly 2537 * already set in the chain (so it won't recurse up to set it in the 2538 * parent). 2539 */ 2540 hammer2_chain_setflush(parent); 2541 2542 done: 2543 *chainp = chain; 2544 2545 return (error); 2546 } 2547 2548 /* 2549 * Move the chain from its old parent to a new parent. The chain must have 2550 * already been deleted or already disconnected (or never associated) with 2551 * a parent. The chain is reassociated with the new parent and the deleted 2552 * flag will be cleared (no longer deleted). The chain's modification state 2553 * is not altered. 2554 * 2555 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (parent) TO THE INSERTION 2556 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2557 * FULL. This typically means that the caller is creating the chain after 2558 * doing a hammer2_chain_lookup(). 2559 * 2560 * A non-NULL bref is typically passed when key and keybits must be overridden. 2561 * Note that hammer2_cluster_duplicate() *ONLY* uses the key and keybits fields 2562 * from a passed-in bref and uses the old chain's bref for everything else. 2563 * 2564 * Neither (parent) or (chain) can be errored. 2565 * 2566 * If (parent) is non-NULL then the new duplicated chain is inserted under 2567 * the parent. 2568 * 2569 * If (parent) is NULL then the newly duplicated chain is not inserted 2570 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2571 * passing into hammer2_chain_create() after this function returns). 2572 * 2573 * WARNING! This function calls create which means it can insert indirect 2574 * blocks. This can cause other unrelated chains in the parent to 2575 * be moved to a newly inserted indirect block in addition to the 2576 * specific chain. 2577 */ 2578 void 2579 hammer2_chain_rename(hammer2_blockref_t *bref, 2580 hammer2_chain_t **parentp, hammer2_chain_t *chain, 2581 hammer2_tid_t mtid, int flags) 2582 { 2583 hammer2_dev_t *hmp; 2584 hammer2_chain_t *parent; 2585 size_t bytes; 2586 2587 /* 2588 * WARNING! We should never resolve DATA to device buffers 2589 * (XXX allow it if the caller did?), and since 2590 * we currently do not have the logical buffer cache 2591 * buffer in-hand to fix its cached physical offset 2592 * we also force the modify code to not COW it. XXX 2593 */ 2594 hmp = chain->hmp; 2595 KKASSERT(chain->parent == NULL); 2596 KKASSERT(chain->error == 0); 2597 2598 /* 2599 * Now create a duplicate of the chain structure, associating 2600 * it with the same core, making it the same size, pointing it 2601 * to the same bref (the same media block). 2602 */ 2603 if (bref == NULL) 2604 bref = &chain->bref; 2605 bytes = (hammer2_off_t)1 << 2606 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2607 2608 /* 2609 * If parent is not NULL the duplicated chain will be entered under 2610 * the parent and the UPDATE bit set to tell flush to update 2611 * the blockref. 2612 * 2613 * We must setflush(parent) to ensure that it recurses through to 2614 * chain. setflush(chain) might not work because ONFLUSH is possibly 2615 * already set in the chain (so it won't recurse up to set it in the 2616 * parent). 2617 * 2618 * Having both chains locked is extremely important for atomicy. 2619 */ 2620 if (parentp && (parent = *parentp) != NULL) { 2621 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2622 KKASSERT(parent->refs > 0); 2623 KKASSERT(parent->error == 0); 2624 2625 hammer2_chain_create(parentp, &chain, chain->pmp, 2626 bref->key, bref->keybits, bref->type, 2627 chain->bytes, mtid, 0, flags); 2628 KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 2629 hammer2_chain_setflush(*parentp); 2630 } 2631 } 2632 2633 /* 2634 * Helper function for deleting chains. 2635 * 2636 * The chain is removed from the live view (the RBTREE) as well as the parent's 2637 * blockmap. Both chain and its parent must be locked. 2638 * 2639 * parent may not be errored. chain can be errored. 2640 */ 2641 static void 2642 _hammer2_chain_delete_helper(hammer2_chain_t *parent, hammer2_chain_t *chain, 2643 hammer2_tid_t mtid, int flags) 2644 { 2645 hammer2_dev_t *hmp; 2646 2647 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2648 HAMMER2_CHAIN_FICTITIOUS)) == 0); 2649 KKASSERT(chain->parent == parent); 2650 hmp = chain->hmp; 2651 2652 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2653 /* 2654 * Chain is blockmapped, so there must be a parent. 2655 * Atomically remove the chain from the parent and remove 2656 * the blockmap entry. 2657 */ 2658 hammer2_blockref_t *base; 2659 int count; 2660 2661 KKASSERT(parent != NULL); 2662 KKASSERT(parent->error == 0); 2663 KKASSERT((parent->flags & HAMMER2_CHAIN_INITIAL) == 0); 2664 hammer2_chain_modify(parent, mtid, 0, HAMMER2_MODIFY_OPTDATA); 2665 2666 /* 2667 * Calculate blockmap pointer 2668 */ 2669 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2670 hammer2_spin_ex(&parent->core.spin); 2671 2672 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2673 atomic_add_int(&parent->core.live_count, -1); 2674 ++parent->core.generation; 2675 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2676 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2677 --parent->core.chain_count; 2678 chain->parent = NULL; 2679 2680 switch(parent->bref.type) { 2681 case HAMMER2_BREF_TYPE_INODE: 2682 /* 2683 * Access the inode's block array. However, there 2684 * is no block array if the inode is flagged 2685 * DIRECTDATA. The DIRECTDATA case typicaly only 2686 * occurs when a hardlink has been shifted up the 2687 * tree and the original inode gets replaced with 2688 * an OBJTYPE_HARDLINK placeholding inode. 2689 */ 2690 if (parent->data && 2691 (parent->data->ipdata.meta.op_flags & 2692 HAMMER2_OPFLAG_DIRECTDATA) == 0) { 2693 base = 2694 &parent->data->ipdata.u.blockset.blockref[0]; 2695 } else { 2696 base = NULL; 2697 } 2698 count = HAMMER2_SET_COUNT; 2699 break; 2700 case HAMMER2_BREF_TYPE_INDIRECT: 2701 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2702 if (parent->data) 2703 base = &parent->data->npdata[0]; 2704 else 2705 base = NULL; 2706 count = parent->bytes / sizeof(hammer2_blockref_t); 2707 break; 2708 case HAMMER2_BREF_TYPE_VOLUME: 2709 base = &parent->data->voldata. 2710 sroot_blockset.blockref[0]; 2711 count = HAMMER2_SET_COUNT; 2712 break; 2713 case HAMMER2_BREF_TYPE_FREEMAP: 2714 base = &parent->data->blkset.blockref[0]; 2715 count = HAMMER2_SET_COUNT; 2716 break; 2717 default: 2718 base = NULL; 2719 count = 0; 2720 panic("hammer2_flush_pass2: " 2721 "unrecognized blockref type: %d", 2722 parent->bref.type); 2723 } 2724 2725 /* 2726 * delete blockmapped chain from its parent. 2727 * 2728 * The parent is not affected by any statistics in chain 2729 * which are pending synchronization. That is, there is 2730 * nothing to undo in the parent since they have not yet 2731 * been incorporated into the parent. 2732 * 2733 * The parent is affected by statistics stored in inodes. 2734 * Those have already been synchronized, so they must be 2735 * undone. XXX split update possible w/delete in middle? 2736 */ 2737 if (base) { 2738 int cache_index = -1; 2739 hammer2_base_delete(parent, base, count, 2740 &cache_index, chain); 2741 } 2742 hammer2_spin_unex(&parent->core.spin); 2743 } else if (chain->flags & HAMMER2_CHAIN_ONRBTREE) { 2744 /* 2745 * Chain is not blockmapped but a parent is present. 2746 * Atomically remove the chain from the parent. There is 2747 * no blockmap entry to remove. 2748 * 2749 * Because chain was associated with a parent but not 2750 * synchronized, the chain's *_count_up fields contain 2751 * inode adjustment statistics which must be undone. 2752 */ 2753 hammer2_spin_ex(&parent->core.spin); 2754 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2755 atomic_add_int(&parent->core.live_count, -1); 2756 ++parent->core.generation; 2757 RB_REMOVE(hammer2_chain_tree, &parent->core.rbtree, chain); 2758 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2759 --parent->core.chain_count; 2760 chain->parent = NULL; 2761 hammer2_spin_unex(&parent->core.spin); 2762 } else { 2763 /* 2764 * Chain is not blockmapped and has no parent. This 2765 * is a degenerate case. 2766 */ 2767 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2768 } 2769 } 2770 2771 /* 2772 * Create an indirect block that covers one or more of the elements in the 2773 * current parent. Either returns the existing parent with no locking or 2774 * ref changes or returns the new indirect block locked and referenced 2775 * and leaving the original parent lock/ref intact as well. 2776 * 2777 * If an error occurs, NULL is returned and *errorp is set to the error. 2778 * 2779 * The returned chain depends on where the specified key falls. 2780 * 2781 * The key/keybits for the indirect mode only needs to follow three rules: 2782 * 2783 * (1) That all elements underneath it fit within its key space and 2784 * 2785 * (2) That all elements outside it are outside its key space. 2786 * 2787 * (3) When creating the new indirect block any elements in the current 2788 * parent that fit within the new indirect block's keyspace must be 2789 * moved into the new indirect block. 2790 * 2791 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 2792 * keyspace the the current parent, but lookup/iteration rules will 2793 * ensure (and must ensure) that rule (2) for all parents leading up 2794 * to the nearest inode or the root volume header is adhered to. This 2795 * is accomplished by always recursing through matching keyspaces in 2796 * the hammer2_chain_lookup() and hammer2_chain_next() API. 2797 * 2798 * The current implementation calculates the current worst-case keyspace by 2799 * iterating the current parent and then divides it into two halves, choosing 2800 * whichever half has the most elements (not necessarily the half containing 2801 * the requested key). 2802 * 2803 * We can also opt to use the half with the least number of elements. This 2804 * causes lower-numbered keys (aka logical file offsets) to recurse through 2805 * fewer indirect blocks and higher-numbered keys to recurse through more. 2806 * This also has the risk of not moving enough elements to the new indirect 2807 * block and being forced to create several indirect blocks before the element 2808 * can be inserted. 2809 * 2810 * Must be called with an exclusively locked parent. 2811 */ 2812 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 2813 hammer2_key_t *keyp, int keybits, 2814 hammer2_blockref_t *base, int count); 2815 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 2816 hammer2_key_t *keyp, int keybits, 2817 hammer2_blockref_t *base, int count); 2818 static 2819 hammer2_chain_t * 2820 hammer2_chain_create_indirect(hammer2_chain_t *parent, 2821 hammer2_key_t create_key, int create_bits, 2822 hammer2_tid_t mtid, int for_type, int *errorp) 2823 { 2824 hammer2_dev_t *hmp; 2825 hammer2_blockref_t *base; 2826 hammer2_blockref_t *bref; 2827 hammer2_blockref_t bcopy; 2828 hammer2_chain_t *chain; 2829 hammer2_chain_t *ichain; 2830 hammer2_chain_t dummy; 2831 hammer2_key_t key = create_key; 2832 hammer2_key_t key_beg; 2833 hammer2_key_t key_end; 2834 hammer2_key_t key_next; 2835 int keybits = create_bits; 2836 int count; 2837 int nbytes; 2838 int cache_index; 2839 int loops; 2840 int reason; 2841 int generation; 2842 int maxloops = 300000; 2843 2844 /* 2845 * Calculate the base blockref pointer or NULL if the chain 2846 * is known to be empty. We need to calculate the array count 2847 * for RB lookups either way. 2848 */ 2849 hmp = parent->hmp; 2850 *errorp = 0; 2851 KKASSERT(hammer2_mtx_owned(&parent->lock)); 2852 2853 /*hammer2_chain_modify(&parent, HAMMER2_MODIFY_OPTDATA);*/ 2854 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2855 base = NULL; 2856 2857 switch(parent->bref.type) { 2858 case HAMMER2_BREF_TYPE_INODE: 2859 count = HAMMER2_SET_COUNT; 2860 break; 2861 case HAMMER2_BREF_TYPE_INDIRECT: 2862 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2863 count = parent->bytes / sizeof(hammer2_blockref_t); 2864 break; 2865 case HAMMER2_BREF_TYPE_VOLUME: 2866 count = HAMMER2_SET_COUNT; 2867 break; 2868 case HAMMER2_BREF_TYPE_FREEMAP: 2869 count = HAMMER2_SET_COUNT; 2870 break; 2871 default: 2872 panic("hammer2_chain_create_indirect: " 2873 "unrecognized blockref type: %d", 2874 parent->bref.type); 2875 count = 0; 2876 break; 2877 } 2878 } else { 2879 switch(parent->bref.type) { 2880 case HAMMER2_BREF_TYPE_INODE: 2881 base = &parent->data->ipdata.u.blockset.blockref[0]; 2882 count = HAMMER2_SET_COUNT; 2883 break; 2884 case HAMMER2_BREF_TYPE_INDIRECT: 2885 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2886 base = &parent->data->npdata[0]; 2887 count = parent->bytes / sizeof(hammer2_blockref_t); 2888 break; 2889 case HAMMER2_BREF_TYPE_VOLUME: 2890 base = &parent->data->voldata. 2891 sroot_blockset.blockref[0]; 2892 count = HAMMER2_SET_COUNT; 2893 break; 2894 case HAMMER2_BREF_TYPE_FREEMAP: 2895 base = &parent->data->blkset.blockref[0]; 2896 count = HAMMER2_SET_COUNT; 2897 break; 2898 default: 2899 panic("hammer2_chain_create_indirect: " 2900 "unrecognized blockref type: %d", 2901 parent->bref.type); 2902 count = 0; 2903 break; 2904 } 2905 } 2906 2907 /* 2908 * dummy used in later chain allocation (no longer used for lookups). 2909 */ 2910 bzero(&dummy, sizeof(dummy)); 2911 2912 /* 2913 * When creating an indirect block for a freemap node or leaf 2914 * the key/keybits must be fitted to static radix levels because 2915 * particular radix levels use particular reserved blocks in the 2916 * related zone. 2917 * 2918 * This routine calculates the key/radix of the indirect block 2919 * we need to create, and whether it is on the high-side or the 2920 * low-side. 2921 */ 2922 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 2923 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 2924 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 2925 base, count); 2926 } else { 2927 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 2928 base, count); 2929 } 2930 2931 /* 2932 * Normalize the key for the radix being represented, keeping the 2933 * high bits and throwing away the low bits. 2934 */ 2935 key &= ~(((hammer2_key_t)1 << keybits) - 1); 2936 2937 /* 2938 * How big should our new indirect block be? It has to be at least 2939 * as large as its parent. 2940 * 2941 * The freemap uses a specific indirect block size. 2942 * 2943 * The first indirect block level down from an inode typically 2944 * uses LBUFSIZE (16384), else it uses PBUFSIZE (65536). 2945 */ 2946 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 2947 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 2948 nbytes = HAMMER2_FREEMAP_LEVELN_PSIZE; 2949 } else if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 2950 nbytes = HAMMER2_IND_BYTES_MIN; 2951 } else { 2952 nbytes = HAMMER2_IND_BYTES_MAX; 2953 } 2954 if (nbytes < count * sizeof(hammer2_blockref_t)) { 2955 KKASSERT(for_type != HAMMER2_BREF_TYPE_FREEMAP_NODE && 2956 for_type != HAMMER2_BREF_TYPE_FREEMAP_LEAF); 2957 nbytes = count * sizeof(hammer2_blockref_t); 2958 } 2959 2960 /* 2961 * Ok, create our new indirect block 2962 */ 2963 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 2964 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 2965 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 2966 } else { 2967 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 2968 } 2969 dummy.bref.key = key; 2970 dummy.bref.keybits = keybits; 2971 dummy.bref.data_off = hammer2_getradix(nbytes); 2972 dummy.bref.methods = parent->bref.methods; 2973 2974 ichain = hammer2_chain_alloc(hmp, parent->pmp, &dummy.bref); 2975 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 2976 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 2977 /* ichain has one ref at this point */ 2978 2979 /* 2980 * We have to mark it modified to allocate its block, but use 2981 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 2982 * it won't be acted upon by the flush code. 2983 */ 2984 hammer2_chain_modify(ichain, mtid, 0, HAMMER2_MODIFY_OPTDATA); 2985 2986 /* 2987 * Iterate the original parent and move the matching brefs into 2988 * the new indirect block. 2989 * 2990 * XXX handle flushes. 2991 */ 2992 key_beg = 0; 2993 key_end = HAMMER2_KEY_MAX; 2994 cache_index = 0; 2995 hammer2_spin_ex(&parent->core.spin); 2996 loops = 0; 2997 reason = 0; 2998 2999 for (;;) { 3000 if (++loops > 100000) { 3001 hammer2_spin_unex(&parent->core.spin); 3002 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3003 reason, parent, base, count, key_next); 3004 } 3005 3006 /* 3007 * NOTE: spinlock stays intact, returned chain (if not NULL) 3008 * is not referenced or locked which means that we 3009 * cannot safely check its flagged / deletion status 3010 * until we lock it. 3011 */ 3012 chain = hammer2_combined_find(parent, base, count, 3013 &cache_index, &key_next, 3014 key_beg, key_end, 3015 &bref); 3016 generation = parent->core.generation; 3017 if (bref == NULL) 3018 break; 3019 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3020 3021 /* 3022 * Skip keys that are not within the key/radix of the new 3023 * indirect block. They stay in the parent. 3024 */ 3025 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3026 (key ^ bref->key)) != 0) { 3027 goto next_key_spinlocked; 3028 } 3029 3030 /* 3031 * Load the new indirect block by acquiring the related 3032 * chains (potentially from media as it might not be 3033 * in-memory). Then move it to the new parent (ichain) 3034 * via DELETE-DUPLICATE. 3035 * 3036 * chain is referenced but not locked. We must lock the 3037 * chain to obtain definitive DUPLICATED/DELETED state 3038 */ 3039 if (chain) { 3040 /* 3041 * Use chain already present in the RBTREE 3042 */ 3043 hammer2_chain_ref(chain); 3044 hammer2_spin_unex(&parent->core.spin); 3045 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3046 } else { 3047 /* 3048 * Get chain for blockref element. _get returns NULL 3049 * on insertion race. 3050 */ 3051 bcopy = *bref; 3052 hammer2_spin_unex(&parent->core.spin); 3053 chain = hammer2_chain_get(parent, generation, &bcopy); 3054 if (chain == NULL) { 3055 reason = 1; 3056 hammer2_spin_ex(&parent->core.spin); 3057 continue; 3058 } 3059 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3060 kprintf("REASON 2\n"); 3061 reason = 2; 3062 hammer2_chain_drop(chain); 3063 hammer2_spin_ex(&parent->core.spin); 3064 continue; 3065 } 3066 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER); 3067 } 3068 3069 /* 3070 * This is always live so if the chain has been deleted 3071 * we raced someone and we have to retry. 3072 * 3073 * NOTE: Lookups can race delete-duplicate because 3074 * delete-duplicate does not lock the parent's core 3075 * (they just use the spinlock on the core). We must 3076 * check for races by comparing the DUPLICATED flag before 3077 * releasing the spinlock with the flag after locking the 3078 * chain. 3079 * 3080 * (note reversed logic for this one) 3081 */ 3082 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3083 hammer2_chain_unlock(chain); 3084 hammer2_chain_drop(chain); 3085 goto next_key; 3086 } 3087 3088 /* 3089 * Shift the chain to the indirect block. 3090 * 3091 * WARNING! No reason for us to load chain data, pass NOSTATS 3092 * to prevent delete/insert from trying to access 3093 * inode stats (and thus asserting if there is no 3094 * chain->data loaded). 3095 */ 3096 hammer2_chain_delete(parent, chain, mtid, 0); 3097 hammer2_chain_rename(NULL, &ichain, chain, mtid, 0); 3098 hammer2_chain_unlock(chain); 3099 hammer2_chain_drop(chain); 3100 KKASSERT(parent->refs > 0); 3101 chain = NULL; 3102 next_key: 3103 hammer2_spin_ex(&parent->core.spin); 3104 next_key_spinlocked: 3105 if (--maxloops == 0) 3106 panic("hammer2_chain_create_indirect: maxloops"); 3107 reason = 4; 3108 if (key_next == 0 || key_next > key_end) 3109 break; 3110 key_beg = key_next; 3111 /* loop */ 3112 } 3113 hammer2_spin_unex(&parent->core.spin); 3114 3115 /* 3116 * Insert the new indirect block into the parent now that we've 3117 * cleared out some entries in the parent. We calculated a good 3118 * insertion index in the loop above (ichain->index). 3119 * 3120 * We don't have to set UPDATE here because we mark ichain 3121 * modified down below (so the normal modified -> flush -> set-moved 3122 * sequence applies). 3123 * 3124 * The insertion shouldn't race as this is a completely new block 3125 * and the parent is locked. 3126 */ 3127 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3128 hammer2_chain_insert(parent, ichain, 3129 HAMMER2_CHAIN_INSERT_SPIN | 3130 HAMMER2_CHAIN_INSERT_LIVE, 3131 0); 3132 3133 /* 3134 * Make sure flushes propogate after our manual insertion. 3135 */ 3136 hammer2_chain_setflush(ichain); 3137 hammer2_chain_setflush(parent); 3138 3139 /* 3140 * Figure out what to return. 3141 */ 3142 if (~(((hammer2_key_t)1 << keybits) - 1) & 3143 (create_key ^ key)) { 3144 /* 3145 * Key being created is outside the key range, 3146 * return the original parent. 3147 */ 3148 hammer2_chain_unlock(ichain); 3149 hammer2_chain_drop(ichain); 3150 } else { 3151 /* 3152 * Otherwise its in the range, return the new parent. 3153 * (leave both the new and old parent locked). 3154 */ 3155 parent = ichain; 3156 } 3157 3158 return(parent); 3159 } 3160 3161 /* 3162 * Calculate the keybits and highside/lowside of the freemap node the 3163 * caller is creating. 3164 * 3165 * This routine will specify the next higher-level freemap key/radix 3166 * representing the lowest-ordered set. By doing so, eventually all 3167 * low-ordered sets will be moved one level down. 3168 * 3169 * We have to be careful here because the freemap reserves a limited 3170 * number of blocks for a limited number of levels. So we can't just 3171 * push indiscriminately. 3172 */ 3173 int 3174 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3175 int keybits, hammer2_blockref_t *base, int count) 3176 { 3177 hammer2_chain_t *chain; 3178 hammer2_blockref_t *bref; 3179 hammer2_key_t key; 3180 hammer2_key_t key_beg; 3181 hammer2_key_t key_end; 3182 hammer2_key_t key_next; 3183 int cache_index; 3184 int locount; 3185 int hicount; 3186 int maxloops = 300000; 3187 3188 key = *keyp; 3189 locount = 0; 3190 hicount = 0; 3191 keybits = 64; 3192 3193 /* 3194 * Calculate the range of keys in the array being careful to skip 3195 * slots which are overridden with a deletion. 3196 */ 3197 key_beg = 0; 3198 key_end = HAMMER2_KEY_MAX; 3199 cache_index = 0; 3200 hammer2_spin_ex(&parent->core.spin); 3201 3202 for (;;) { 3203 if (--maxloops == 0) { 3204 panic("indkey_freemap shit %p %p:%d\n", 3205 parent, base, count); 3206 } 3207 chain = hammer2_combined_find(parent, base, count, 3208 &cache_index, &key_next, 3209 key_beg, key_end, 3210 &bref); 3211 3212 /* 3213 * Exhausted search 3214 */ 3215 if (bref == NULL) 3216 break; 3217 3218 /* 3219 * Skip deleted chains. 3220 */ 3221 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3222 if (key_next == 0 || key_next > key_end) 3223 break; 3224 key_beg = key_next; 3225 continue; 3226 } 3227 3228 /* 3229 * Use the full live (not deleted) element for the scan 3230 * iteration. HAMMER2 does not allow partial replacements. 3231 * 3232 * XXX should be built into hammer2_combined_find(). 3233 */ 3234 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3235 3236 if (keybits > bref->keybits) { 3237 key = bref->key; 3238 keybits = bref->keybits; 3239 } else if (keybits == bref->keybits && bref->key < key) { 3240 key = bref->key; 3241 } 3242 if (key_next == 0) 3243 break; 3244 key_beg = key_next; 3245 } 3246 hammer2_spin_unex(&parent->core.spin); 3247 3248 /* 3249 * Return the keybits for a higher-level FREEMAP_NODE covering 3250 * this node. 3251 */ 3252 switch(keybits) { 3253 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3254 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3255 break; 3256 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3257 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3258 break; 3259 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3260 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3261 break; 3262 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3263 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3264 break; 3265 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3266 keybits = HAMMER2_FREEMAP_LEVEL5_RADIX; 3267 break; 3268 case HAMMER2_FREEMAP_LEVEL5_RADIX: 3269 panic("hammer2_chain_indkey_freemap: level too high"); 3270 break; 3271 default: 3272 panic("hammer2_chain_indkey_freemap: bad radix"); 3273 break; 3274 } 3275 *keyp = key; 3276 3277 return (keybits); 3278 } 3279 3280 /* 3281 * Calculate the keybits and highside/lowside of the indirect block the 3282 * caller is creating. 3283 */ 3284 static int 3285 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3286 int keybits, hammer2_blockref_t *base, int count) 3287 { 3288 hammer2_blockref_t *bref; 3289 hammer2_chain_t *chain; 3290 hammer2_key_t key_beg; 3291 hammer2_key_t key_end; 3292 hammer2_key_t key_next; 3293 hammer2_key_t key; 3294 int nkeybits; 3295 int locount; 3296 int hicount; 3297 int cache_index; 3298 int maxloops = 300000; 3299 3300 key = *keyp; 3301 locount = 0; 3302 hicount = 0; 3303 3304 /* 3305 * Calculate the range of keys in the array being careful to skip 3306 * slots which are overridden with a deletion. Once the scan 3307 * completes we will cut the key range in half and shift half the 3308 * range into the new indirect block. 3309 */ 3310 key_beg = 0; 3311 key_end = HAMMER2_KEY_MAX; 3312 cache_index = 0; 3313 hammer2_spin_ex(&parent->core.spin); 3314 3315 for (;;) { 3316 if (--maxloops == 0) { 3317 panic("indkey_freemap shit %p %p:%d\n", 3318 parent, base, count); 3319 } 3320 chain = hammer2_combined_find(parent, base, count, 3321 &cache_index, &key_next, 3322 key_beg, key_end, 3323 &bref); 3324 3325 /* 3326 * Exhausted search 3327 */ 3328 if (bref == NULL) 3329 break; 3330 3331 /* 3332 * NOTE: No need to check DUPLICATED here because we do 3333 * not release the spinlock. 3334 */ 3335 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3336 if (key_next == 0 || key_next > key_end) 3337 break; 3338 key_beg = key_next; 3339 continue; 3340 } 3341 3342 /* 3343 * Use the full live (not deleted) element for the scan 3344 * iteration. HAMMER2 does not allow partial replacements. 3345 * 3346 * XXX should be built into hammer2_combined_find(). 3347 */ 3348 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3349 3350 /* 3351 * Expand our calculated key range (key, keybits) to fit 3352 * the scanned key. nkeybits represents the full range 3353 * that we will later cut in half (two halves @ nkeybits - 1). 3354 */ 3355 nkeybits = keybits; 3356 if (nkeybits < bref->keybits) { 3357 if (bref->keybits > 64) { 3358 kprintf("bad bref chain %p bref %p\n", 3359 chain, bref); 3360 Debugger("fubar"); 3361 } 3362 nkeybits = bref->keybits; 3363 } 3364 while (nkeybits < 64 && 3365 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3366 (key ^ bref->key)) != 0) { 3367 ++nkeybits; 3368 } 3369 3370 /* 3371 * If the new key range is larger we have to determine 3372 * which side of the new key range the existing keys fall 3373 * under by checking the high bit, then collapsing the 3374 * locount into the hicount or vise-versa. 3375 */ 3376 if (keybits != nkeybits) { 3377 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3378 hicount += locount; 3379 locount = 0; 3380 } else { 3381 locount += hicount; 3382 hicount = 0; 3383 } 3384 keybits = nkeybits; 3385 } 3386 3387 /* 3388 * The newly scanned key will be in the lower half or the 3389 * upper half of the (new) key range. 3390 */ 3391 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3392 ++hicount; 3393 else 3394 ++locount; 3395 3396 if (key_next == 0) 3397 break; 3398 key_beg = key_next; 3399 } 3400 hammer2_spin_unex(&parent->core.spin); 3401 bref = NULL; /* now invalid (safety) */ 3402 3403 /* 3404 * Adjust keybits to represent half of the full range calculated 3405 * above (radix 63 max) 3406 */ 3407 --keybits; 3408 3409 /* 3410 * Select whichever half contains the most elements. Theoretically 3411 * we can select either side as long as it contains at least one 3412 * element (in order to ensure that a free slot is present to hold 3413 * the indirect block). 3414 */ 3415 if (hammer2_indirect_optimize) { 3416 /* 3417 * Insert node for least number of keys, this will arrange 3418 * the first few blocks of a large file or the first few 3419 * inodes in a directory with fewer indirect blocks when 3420 * created linearly. 3421 */ 3422 if (hicount < locount && hicount != 0) 3423 key |= (hammer2_key_t)1 << keybits; 3424 else 3425 key &= ~(hammer2_key_t)1 << keybits; 3426 } else { 3427 /* 3428 * Insert node for most number of keys, best for heavily 3429 * fragmented files. 3430 */ 3431 if (hicount > locount) 3432 key |= (hammer2_key_t)1 << keybits; 3433 else 3434 key &= ~(hammer2_key_t)1 << keybits; 3435 } 3436 *keyp = key; 3437 3438 return (keybits); 3439 } 3440 3441 /* 3442 * Sets CHAIN_DELETED and remove the chain's blockref from the parent if 3443 * it exists. 3444 * 3445 * Both parent and chain must be locked exclusively. 3446 * 3447 * This function will modify the parent if the blockref requires removal 3448 * from the parent's block table. 3449 * 3450 * This function is NOT recursive. Any entity already pushed into the 3451 * chain (such as an inode) may still need visibility into its contents, 3452 * as well as the ability to read and modify the contents. For example, 3453 * for an unlinked file which is still open. 3454 */ 3455 void 3456 hammer2_chain_delete(hammer2_chain_t *parent, hammer2_chain_t *chain, 3457 hammer2_tid_t mtid, int flags) 3458 { 3459 KKASSERT(hammer2_mtx_owned(&chain->lock)); 3460 3461 /* 3462 * Nothing to do if already marked. 3463 * 3464 * We need the spinlock on the core whos RBTREE contains chain 3465 * to protect against races. 3466 */ 3467 if ((chain->flags & HAMMER2_CHAIN_DELETED) == 0) { 3468 KKASSERT((chain->flags & HAMMER2_CHAIN_DELETED) == 0 && 3469 chain->parent == parent); 3470 _hammer2_chain_delete_helper(parent, chain, mtid, flags); 3471 } 3472 3473 /* 3474 * To avoid losing track of a permanent deletion we add the chain 3475 * to the delayed flush queue. If were to flush it right now the 3476 * parent would end up in a modified state and generate I/O. 3477 * The delayed queue gives the parent a chance to be deleted to 3478 * (e.g. rm -rf). 3479 */ 3480 if (flags & HAMMER2_DELETE_PERMANENT) { 3481 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 3482 hammer2_delayed_flush(chain); 3483 } else { 3484 /* XXX might not be needed */ 3485 hammer2_chain_setflush(chain); 3486 } 3487 } 3488 3489 /* 3490 * Returns the index of the nearest element in the blockref array >= elm. 3491 * Returns (count) if no element could be found. 3492 * 3493 * Sets *key_nextp to the next key for loop purposes but does not modify 3494 * it if the next key would be higher than the current value of *key_nextp. 3495 * Note that *key_nexp can overflow to 0, which should be tested by the 3496 * caller. 3497 * 3498 * (*cache_indexp) is a heuristic and can be any value without effecting 3499 * the result. 3500 * 3501 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3502 * held through the operation. 3503 */ 3504 static int 3505 hammer2_base_find(hammer2_chain_t *parent, 3506 hammer2_blockref_t *base, int count, 3507 int *cache_indexp, hammer2_key_t *key_nextp, 3508 hammer2_key_t key_beg, hammer2_key_t key_end) 3509 { 3510 hammer2_blockref_t *scan; 3511 hammer2_key_t scan_end; 3512 int i; 3513 int limit; 3514 3515 /* 3516 * Require the live chain's already have their core's counted 3517 * so we can optimize operations. 3518 */ 3519 KKASSERT(parent->flags & HAMMER2_CHAIN_COUNTEDBREFS); 3520 3521 /* 3522 * Degenerate case 3523 */ 3524 if (count == 0 || base == NULL) 3525 return(count); 3526 3527 /* 3528 * Sequential optimization using *cache_indexp. This is the most 3529 * likely scenario. 3530 * 3531 * We can avoid trailing empty entries on live chains, otherwise 3532 * we might have to check the whole block array. 3533 */ 3534 i = *cache_indexp; 3535 cpu_ccfence(); 3536 limit = parent->core.live_zero; 3537 if (i >= limit) 3538 i = limit - 1; 3539 if (i < 0) 3540 i = 0; 3541 KKASSERT(i < count); 3542 3543 /* 3544 * Search backwards 3545 */ 3546 scan = &base[i]; 3547 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 3548 --scan; 3549 --i; 3550 } 3551 *cache_indexp = i; 3552 3553 /* 3554 * Search forwards, stop when we find a scan element which 3555 * encloses the key or until we know that there are no further 3556 * elements. 3557 */ 3558 while (i < count) { 3559 if (scan->type != 0) { 3560 scan_end = scan->key + 3561 ((hammer2_key_t)1 << scan->keybits) - 1; 3562 if (scan->key > key_beg || scan_end >= key_beg) 3563 break; 3564 } 3565 if (i >= limit) 3566 return (count); 3567 ++scan; 3568 ++i; 3569 } 3570 if (i != count) { 3571 *cache_indexp = i; 3572 if (i >= limit) { 3573 i = count; 3574 } else { 3575 scan_end = scan->key + 3576 ((hammer2_key_t)1 << scan->keybits); 3577 if (scan_end && (*key_nextp > scan_end || 3578 *key_nextp == 0)) { 3579 *key_nextp = scan_end; 3580 } 3581 } 3582 } 3583 return (i); 3584 } 3585 3586 /* 3587 * Do a combined search and return the next match either from the blockref 3588 * array or from the in-memory chain. Sets *bresp to the returned bref in 3589 * both cases, or sets it to NULL if the search exhausted. Only returns 3590 * a non-NULL chain if the search matched from the in-memory chain. 3591 * 3592 * When no in-memory chain has been found and a non-NULL bref is returned 3593 * in *bresp. 3594 * 3595 * 3596 * The returned chain is not locked or referenced. Use the returned bref 3597 * to determine if the search exhausted or not. Iterate if the base find 3598 * is chosen but matches a deleted chain. 3599 * 3600 * WARNING! Must be called with parent's spinlock held. Spinlock remains 3601 * held through the operation. 3602 */ 3603 static hammer2_chain_t * 3604 hammer2_combined_find(hammer2_chain_t *parent, 3605 hammer2_blockref_t *base, int count, 3606 int *cache_indexp, hammer2_key_t *key_nextp, 3607 hammer2_key_t key_beg, hammer2_key_t key_end, 3608 hammer2_blockref_t **bresp) 3609 { 3610 hammer2_blockref_t *bref; 3611 hammer2_chain_t *chain; 3612 int i; 3613 3614 /* 3615 * Lookup in block array and in rbtree. 3616 */ 3617 *key_nextp = key_end + 1; 3618 i = hammer2_base_find(parent, base, count, cache_indexp, 3619 key_nextp, key_beg, key_end); 3620 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 3621 3622 /* 3623 * Neither matched 3624 */ 3625 if (i == count && chain == NULL) { 3626 *bresp = NULL; 3627 return(NULL); 3628 } 3629 3630 /* 3631 * Only chain matched. 3632 */ 3633 if (i == count) { 3634 bref = &chain->bref; 3635 goto found; 3636 } 3637 3638 /* 3639 * Only blockref matched. 3640 */ 3641 if (chain == NULL) { 3642 bref = &base[i]; 3643 goto found; 3644 } 3645 3646 /* 3647 * Both in-memory and blockref matched, select the nearer element. 3648 * 3649 * If both are flush with the left-hand side or both are the 3650 * same distance away, select the chain. In this situation the 3651 * chain must have been loaded from the matching blockmap. 3652 */ 3653 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 3654 chain->bref.key == base[i].key) { 3655 KKASSERT(chain->bref.key == base[i].key); 3656 bref = &chain->bref; 3657 goto found; 3658 } 3659 3660 /* 3661 * Select the nearer key 3662 */ 3663 if (chain->bref.key < base[i].key) { 3664 bref = &chain->bref; 3665 } else { 3666 bref = &base[i]; 3667 chain = NULL; 3668 } 3669 3670 /* 3671 * If the bref is out of bounds we've exhausted our search. 3672 */ 3673 found: 3674 if (bref->key > key_end) { 3675 *bresp = NULL; 3676 chain = NULL; 3677 } else { 3678 *bresp = bref; 3679 } 3680 return(chain); 3681 } 3682 3683 /* 3684 * Locate the specified block array element and delete it. The element 3685 * must exist. 3686 * 3687 * The spin lock on the related chain must be held. 3688 * 3689 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3690 * need to be adjusted when we commit the media change. 3691 */ 3692 void 3693 hammer2_base_delete(hammer2_chain_t *parent, 3694 hammer2_blockref_t *base, int count, 3695 int *cache_indexp, hammer2_chain_t *chain) 3696 { 3697 hammer2_blockref_t *elm = &chain->bref; 3698 hammer2_key_t key_next; 3699 int i; 3700 3701 /* 3702 * Delete element. Expect the element to exist. 3703 * 3704 * XXX see caller, flush code not yet sophisticated enough to prevent 3705 * re-flushed in some cases. 3706 */ 3707 key_next = 0; /* max range */ 3708 i = hammer2_base_find(parent, base, count, cache_indexp, 3709 &key_next, elm->key, elm->key); 3710 if (i == count || base[i].type == 0 || 3711 base[i].key != elm->key || 3712 ((chain->flags & HAMMER2_CHAIN_BMAPUPD) == 0 && 3713 base[i].keybits != elm->keybits)) { 3714 hammer2_spin_unex(&parent->core.spin); 3715 panic("delete base %p element not found at %d/%d elm %p\n", 3716 base, i, count, elm); 3717 return; 3718 } 3719 3720 /* 3721 * Update stats and zero the entry 3722 */ 3723 parent->bref.data_count -= base[i].data_count; 3724 parent->bref.data_count -= (hammer2_off_t)1 << 3725 (int)(base[i].data_off & HAMMER2_OFF_MASK_RADIX); 3726 parent->bref.inode_count -= base[i].inode_count; 3727 if (base[i].type == HAMMER2_BREF_TYPE_INODE) 3728 parent->bref.inode_count -= 1; 3729 3730 bzero(&base[i], sizeof(*base)); 3731 3732 /* 3733 * We can only optimize parent->core.live_zero for live chains. 3734 */ 3735 if (parent->core.live_zero == i + 1) { 3736 while (--i >= 0 && base[i].type == 0) 3737 ; 3738 parent->core.live_zero = i + 1; 3739 } 3740 3741 /* 3742 * Clear appropriate blockmap flags in chain. 3743 */ 3744 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 3745 HAMMER2_CHAIN_BMAPUPD); 3746 } 3747 3748 /* 3749 * Insert the specified element. The block array must not already have the 3750 * element and must have space available for the insertion. 3751 * 3752 * The spin lock on the related chain must be held. 3753 * 3754 * NOTE: live_count was adjusted when the chain was deleted, so it does not 3755 * need to be adjusted when we commit the media change. 3756 */ 3757 void 3758 hammer2_base_insert(hammer2_chain_t *parent, 3759 hammer2_blockref_t *base, int count, 3760 int *cache_indexp, hammer2_chain_t *chain) 3761 { 3762 hammer2_blockref_t *elm = &chain->bref; 3763 hammer2_key_t key_next; 3764 hammer2_key_t xkey; 3765 int i; 3766 int j; 3767 int k; 3768 int l; 3769 int u = 1; 3770 3771 /* 3772 * Insert new element. Expect the element to not already exist 3773 * unless we are replacing it. 3774 * 3775 * XXX see caller, flush code not yet sophisticated enough to prevent 3776 * re-flushed in some cases. 3777 */ 3778 key_next = 0; /* max range */ 3779 i = hammer2_base_find(parent, base, count, cache_indexp, 3780 &key_next, elm->key, elm->key); 3781 3782 /* 3783 * Shortcut fill optimization, typical ordered insertion(s) may not 3784 * require a search. 3785 */ 3786 KKASSERT(i >= 0 && i <= count); 3787 3788 /* 3789 * Set appropriate blockmap flags in chain. 3790 */ 3791 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 3792 3793 /* 3794 * Update stats and zero the entry 3795 */ 3796 parent->bref.data_count += elm->data_count; 3797 parent->bref.data_count += (hammer2_off_t)1 << 3798 (int)(elm->data_off & HAMMER2_OFF_MASK_RADIX); 3799 parent->bref.inode_count += elm->inode_count; 3800 if (elm->type == HAMMER2_BREF_TYPE_INODE) 3801 parent->bref.inode_count += 1; 3802 3803 3804 /* 3805 * We can only optimize parent->core.live_zero for live chains. 3806 */ 3807 if (i == count && parent->core.live_zero < count) { 3808 i = parent->core.live_zero++; 3809 base[i] = *elm; 3810 return; 3811 } 3812 3813 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 3814 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 3815 hammer2_spin_unex(&parent->core.spin); 3816 panic("insert base %p overlapping elements at %d elm %p\n", 3817 base, i, elm); 3818 } 3819 3820 /* 3821 * Try to find an empty slot before or after. 3822 */ 3823 j = i; 3824 k = i; 3825 while (j > 0 || k < count) { 3826 --j; 3827 if (j >= 0 && base[j].type == 0) { 3828 if (j == i - 1) { 3829 base[j] = *elm; 3830 } else { 3831 bcopy(&base[j+1], &base[j], 3832 (i - j - 1) * sizeof(*base)); 3833 base[i - 1] = *elm; 3834 } 3835 goto validate; 3836 } 3837 ++k; 3838 if (k < count && base[k].type == 0) { 3839 bcopy(&base[i], &base[i+1], 3840 (k - i) * sizeof(hammer2_blockref_t)); 3841 base[i] = *elm; 3842 3843 /* 3844 * We can only update parent->core.live_zero for live 3845 * chains. 3846 */ 3847 if (parent->core.live_zero <= k) 3848 parent->core.live_zero = k + 1; 3849 u = 2; 3850 goto validate; 3851 } 3852 } 3853 panic("hammer2_base_insert: no room!"); 3854 3855 /* 3856 * Debugging 3857 */ 3858 validate: 3859 key_next = 0; 3860 for (l = 0; l < count; ++l) { 3861 if (base[l].type) { 3862 key_next = base[l].key + 3863 ((hammer2_key_t)1 << base[l].keybits) - 1; 3864 break; 3865 } 3866 } 3867 while (++l < count) { 3868 if (base[l].type) { 3869 if (base[l].key <= key_next) 3870 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 3871 key_next = base[l].key + 3872 ((hammer2_key_t)1 << base[l].keybits) - 1; 3873 3874 } 3875 } 3876 3877 } 3878 3879 #if 0 3880 3881 /* 3882 * Sort the blockref array for the chain. Used by the flush code to 3883 * sort the blockref[] array. 3884 * 3885 * The chain must be exclusively locked AND spin-locked. 3886 */ 3887 typedef hammer2_blockref_t *hammer2_blockref_p; 3888 3889 static 3890 int 3891 hammer2_base_sort_callback(const void *v1, const void *v2) 3892 { 3893 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 3894 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 3895 3896 /* 3897 * Make sure empty elements are placed at the end of the array 3898 */ 3899 if (bref1->type == 0) { 3900 if (bref2->type == 0) 3901 return(0); 3902 return(1); 3903 } else if (bref2->type == 0) { 3904 return(-1); 3905 } 3906 3907 /* 3908 * Sort by key 3909 */ 3910 if (bref1->key < bref2->key) 3911 return(-1); 3912 if (bref1->key > bref2->key) 3913 return(1); 3914 return(0); 3915 } 3916 3917 void 3918 hammer2_base_sort(hammer2_chain_t *chain) 3919 { 3920 hammer2_blockref_t *base; 3921 int count; 3922 3923 switch(chain->bref.type) { 3924 case HAMMER2_BREF_TYPE_INODE: 3925 /* 3926 * Special shortcut for embedded data returns the inode 3927 * itself. Callers must detect this condition and access 3928 * the embedded data (the strategy code does this for us). 3929 * 3930 * This is only applicable to regular files and softlinks. 3931 */ 3932 if (chain->data->ipdata.meta.op_flags & 3933 HAMMER2_OPFLAG_DIRECTDATA) { 3934 return; 3935 } 3936 base = &chain->data->ipdata.u.blockset.blockref[0]; 3937 count = HAMMER2_SET_COUNT; 3938 break; 3939 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3940 case HAMMER2_BREF_TYPE_INDIRECT: 3941 /* 3942 * Optimize indirect blocks in the INITIAL state to avoid 3943 * I/O. 3944 */ 3945 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 3946 base = &chain->data->npdata[0]; 3947 count = chain->bytes / sizeof(hammer2_blockref_t); 3948 break; 3949 case HAMMER2_BREF_TYPE_VOLUME: 3950 base = &chain->data->voldata.sroot_blockset.blockref[0]; 3951 count = HAMMER2_SET_COUNT; 3952 break; 3953 case HAMMER2_BREF_TYPE_FREEMAP: 3954 base = &chain->data->blkset.blockref[0]; 3955 count = HAMMER2_SET_COUNT; 3956 break; 3957 default: 3958 kprintf("hammer2_chain_lookup: unrecognized " 3959 "blockref(A) type: %d", 3960 chain->bref.type); 3961 while (1) 3962 tsleep(&base, 0, "dead", 0); 3963 panic("hammer2_chain_lookup: unrecognized " 3964 "blockref(A) type: %d", 3965 chain->bref.type); 3966 base = NULL; /* safety */ 3967 count = 0; /* safety */ 3968 } 3969 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 3970 } 3971 3972 #endif 3973 3974 /* 3975 * Chain memory management 3976 */ 3977 void 3978 hammer2_chain_wait(hammer2_chain_t *chain) 3979 { 3980 tsleep(chain, 0, "chnflw", 1); 3981 } 3982 3983 const hammer2_media_data_t * 3984 hammer2_chain_rdata(hammer2_chain_t *chain) 3985 { 3986 KKASSERT(chain->data != NULL); 3987 return (chain->data); 3988 } 3989 3990 hammer2_media_data_t * 3991 hammer2_chain_wdata(hammer2_chain_t *chain) 3992 { 3993 KKASSERT(chain->data != NULL); 3994 return (chain->data); 3995 } 3996 3997 /* 3998 * Set the check data for a chain. This can be a heavy-weight operation 3999 * and typically only runs on-flush. For file data check data is calculated 4000 * when the logical buffers are flushed. 4001 */ 4002 void 4003 hammer2_chain_setcheck(hammer2_chain_t *chain, void *bdata) 4004 { 4005 chain->bref.flags &= ~HAMMER2_BREF_FLAG_ZERO; 4006 4007 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4008 case HAMMER2_CHECK_NONE: 4009 break; 4010 case HAMMER2_CHECK_DISABLED: 4011 break; 4012 case HAMMER2_CHECK_ISCSI32: 4013 chain->bref.check.iscsi32.value = 4014 hammer2_icrc32(bdata, chain->bytes); 4015 break; 4016 case HAMMER2_CHECK_CRC64: 4017 chain->bref.check.crc64.value = 0; 4018 /* XXX */ 4019 break; 4020 case HAMMER2_CHECK_SHA192: 4021 { 4022 SHA256_CTX hash_ctx; 4023 union { 4024 uint8_t digest[SHA256_DIGEST_LENGTH]; 4025 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4026 } u; 4027 4028 SHA256_Init(&hash_ctx); 4029 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4030 SHA256_Final(u.digest, &hash_ctx); 4031 u.digest64[2] ^= u.digest64[3]; 4032 bcopy(u.digest, 4033 chain->bref.check.sha192.data, 4034 sizeof(chain->bref.check.sha192.data)); 4035 } 4036 break; 4037 case HAMMER2_CHECK_FREEMAP: 4038 chain->bref.check.freemap.icrc32 = 4039 hammer2_icrc32(bdata, chain->bytes); 4040 break; 4041 default: 4042 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4043 chain->bref.methods); 4044 break; 4045 } 4046 } 4047 4048 int 4049 hammer2_chain_testcheck(hammer2_chain_t *chain, void *bdata) 4050 { 4051 int r; 4052 4053 if (chain->bref.flags & HAMMER2_BREF_FLAG_ZERO) 4054 return 1; 4055 4056 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 4057 case HAMMER2_CHECK_NONE: 4058 r = 1; 4059 break; 4060 case HAMMER2_CHECK_DISABLED: 4061 r = 1; 4062 break; 4063 case HAMMER2_CHECK_ISCSI32: 4064 r = (chain->bref.check.iscsi32.value == 4065 hammer2_icrc32(bdata, chain->bytes)); 4066 break; 4067 case HAMMER2_CHECK_CRC64: 4068 r = (chain->bref.check.crc64.value == 0); 4069 /* XXX */ 4070 break; 4071 case HAMMER2_CHECK_SHA192: 4072 { 4073 SHA256_CTX hash_ctx; 4074 union { 4075 uint8_t digest[SHA256_DIGEST_LENGTH]; 4076 uint64_t digest64[SHA256_DIGEST_LENGTH/8]; 4077 } u; 4078 4079 SHA256_Init(&hash_ctx); 4080 SHA256_Update(&hash_ctx, bdata, chain->bytes); 4081 SHA256_Final(u.digest, &hash_ctx); 4082 u.digest64[2] ^= u.digest64[3]; 4083 if (bcmp(u.digest, 4084 chain->bref.check.sha192.data, 4085 sizeof(chain->bref.check.sha192.data)) == 0) { 4086 r = 1; 4087 } else { 4088 r = 0; 4089 } 4090 } 4091 break; 4092 case HAMMER2_CHECK_FREEMAP: 4093 r = (chain->bref.check.freemap.icrc32 == 4094 hammer2_icrc32(bdata, chain->bytes)); 4095 if (r == 0) { 4096 kprintf("freemap.icrc %08x icrc32 %08x (%d)\n", 4097 chain->bref.check.freemap.icrc32, 4098 hammer2_icrc32(bdata, chain->bytes), chain->bytes); 4099 if (chain->dio) 4100 kprintf("dio %p buf %016jx,%d bdata %p/%p\n", 4101 chain->dio, chain->dio->bp->b_loffset, chain->dio->bp->b_bufsize, bdata, chain->dio->bp->b_data); 4102 } 4103 4104 break; 4105 default: 4106 kprintf("hammer2_chain_setcheck: unknown check type %02x\n", 4107 chain->bref.methods); 4108 r = 1; 4109 break; 4110 } 4111 return r; 4112 } 4113 4114 /* 4115 * The caller presents a shared-locked (parent, chain) where the chain 4116 * is of type HAMMER2_OBJTYPE_HARDLINK. The caller must hold the ip 4117 * structure representing the inode locked to prevent 4118 * consolidation/deconsolidation races. 4119 * 4120 * We locate the hardlink in the current or a common parent directory. 4121 * 4122 * If we are unable to locate the hardlink, EIO is returned and 4123 * (*chainp) is unlocked and dropped. 4124 */ 4125 int 4126 hammer2_chain_hardlink_find(hammer2_inode_t *dip, 4127 hammer2_chain_t **parentp, 4128 hammer2_chain_t **chainp, 4129 int flags) 4130 { 4131 hammer2_chain_t *parent; 4132 hammer2_chain_t *rchain; 4133 hammer2_key_t key_dummy; 4134 hammer2_key_t lhc; 4135 int cache_index = -1; 4136 4137 /* 4138 * Obtain the key for the hardlink from *chainp. 4139 */ 4140 rchain = *chainp; 4141 lhc = rchain->data->ipdata.meta.inum; 4142 hammer2_chain_unlock(rchain); 4143 hammer2_chain_drop(rchain); 4144 rchain = NULL; 4145 4146 for (;;) { 4147 int nloops; 4148 rchain = hammer2_chain_lookup(parentp, &key_dummy, 4149 lhc, lhc, 4150 &cache_index, flags); 4151 if (rchain) 4152 break; 4153 4154 /* 4155 * Iterate parents, handle parent rename races by retrying 4156 * the operation. 4157 */ 4158 nloops = -1; 4159 while (nloops) { 4160 --nloops; 4161 parent = *parentp; 4162 if (nloops < 0 && 4163 parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 4164 nloops = 1; 4165 } 4166 if (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) 4167 goto done; 4168 if (parent->parent == NULL) 4169 goto done; 4170 parent = parent->parent; 4171 hammer2_chain_ref(parent); 4172 hammer2_chain_unlock(*parentp); 4173 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 4174 flags); 4175 if ((*parentp)->parent == parent) { 4176 hammer2_chain_drop(*parentp); 4177 *parentp = parent; 4178 } else { 4179 hammer2_chain_unlock(parent); 4180 hammer2_chain_drop(parent); 4181 hammer2_chain_lock(*parentp, 4182 HAMMER2_RESOLVE_ALWAYS | 4183 flags); 4184 parent = NULL; /* safety */ 4185 /* retry */ 4186 } 4187 } 4188 } 4189 done: 4190 4191 *chainp = rchain; 4192 return (rchain ? EINVAL : 0); 4193 } 4194 4195 /* 4196 * Used by the bulkscan code to snapshot the synchronized storage for 4197 * a volume, allowing it to be scanned concurrently against normal 4198 * operation. 4199 */ 4200 hammer2_chain_t * 4201 hammer2_chain_bulksnap(hammer2_chain_t *chain) 4202 { 4203 hammer2_chain_t *copy; 4204 4205 copy = hammer2_chain_alloc(chain->hmp, chain->pmp, &chain->bref); 4206 switch(chain->bref.type) { 4207 case HAMMER2_BREF_TYPE_VOLUME: 4208 copy->data = kmalloc(sizeof(copy->data->voldata), 4209 chain->hmp->mchain, 4210 M_WAITOK | M_ZERO); 4211 hammer2_spin_ex(&chain->core.spin); 4212 copy->data->voldata = chain->data->voldata; 4213 hammer2_spin_unex(&chain->core.spin); 4214 break; 4215 case HAMMER2_BREF_TYPE_FREEMAP: 4216 copy->data = kmalloc(sizeof(hammer2_blockset_t), 4217 chain->hmp->mchain, 4218 M_WAITOK | M_ZERO); 4219 hammer2_spin_ex(&chain->core.spin); 4220 copy->data->blkset = chain->data->blkset; 4221 hammer2_spin_unex(&chain->core.spin); 4222 break; 4223 default: 4224 break; 4225 } 4226 return copy; 4227 } 4228 4229 void 4230 hammer2_chain_bulkdrop(hammer2_chain_t *copy) 4231 { 4232 switch(copy->bref.type) { 4233 case HAMMER2_BREF_TYPE_VOLUME: 4234 case HAMMER2_BREF_TYPE_FREEMAP: 4235 KKASSERT(copy->data); 4236 kfree(copy->data, copy->hmp->mchain); 4237 copy->data = NULL; 4238 default: 4239 break; 4240 } 4241 hammer2_chain_drop(copy); 4242 } 4243 4244 /* 4245 * Create a snapshot of the specified {parent, ochain} with the specified 4246 * label. The originating hammer2_inode must be exclusively locked for 4247 * safety. 4248 * 4249 * The ioctl code has already synced the filesystem. 4250 */ 4251 int 4252 hammer2_chain_snapshot(hammer2_chain_t *chain, hammer2_ioc_pfs_t *pmp, 4253 hammer2_tid_t mtid) 4254 { 4255 hammer2_dev_t *hmp; 4256 const hammer2_inode_data_t *ripdata; 4257 hammer2_inode_data_t *wipdata; 4258 hammer2_chain_t *nchain; 4259 hammer2_inode_t *nip; 4260 size_t name_len; 4261 hammer2_key_t lhc; 4262 struct vattr vat; 4263 #if 0 4264 uuid_t opfs_clid; 4265 #endif 4266 int error; 4267 4268 kprintf("snapshot %s\n", pmp->name); 4269 4270 name_len = strlen(pmp->name); 4271 lhc = hammer2_dirhash(pmp->name, name_len); 4272 4273 /* 4274 * Get the clid 4275 */ 4276 ripdata = &chain->data->ipdata; 4277 #if 0 4278 opfs_clid = ripdata->meta.pfs_clid; 4279 #endif 4280 hmp = chain->hmp; 4281 4282 /* 4283 * Create the snapshot directory under the super-root 4284 * 4285 * Set PFS type, generate a unique filesystem id, and generate 4286 * a cluster id. Use the same clid when snapshotting a PFS root, 4287 * which theoretically allows the snapshot to be used as part of 4288 * the same cluster (perhaps as a cache). 4289 * 4290 * Copy the (flushed) blockref array. Theoretically we could use 4291 * chain_duplicate() but it becomes difficult to disentangle 4292 * the shared core so for now just brute-force it. 4293 */ 4294 VATTR_NULL(&vat); 4295 vat.va_type = VDIR; 4296 vat.va_mode = 0755; 4297 nip = hammer2_inode_create(hmp->spmp->iroot, &vat, proc0.p_ucred, 4298 pmp->name, name_len, 0, 4299 1, 0, 0, 4300 HAMMER2_INSERT_PFSROOT, &error); 4301 4302 if (nip) { 4303 hammer2_inode_modify(nip); 4304 nchain = hammer2_inode_chain(nip, 0, HAMMER2_RESOLVE_ALWAYS); 4305 hammer2_chain_modify(nchain, mtid, 0, 0); 4306 wipdata = &nchain->data->ipdata; 4307 4308 nip->meta.pfs_type = HAMMER2_PFSTYPE_MASTER; 4309 nip->meta.pfs_subtype = HAMMER2_PFSSUBTYPE_SNAPSHOT; 4310 nip->meta.op_flags |= HAMMER2_OPFLAG_PFSROOT; 4311 kern_uuidgen(&nip->meta.pfs_fsid, 1); 4312 4313 /* 4314 * Give the snapshot its own private cluster id. As a 4315 * snapshot no further synchronization with the original 4316 * cluster will be done. 4317 */ 4318 #if 0 4319 if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) 4320 nip->meta.pfs_clid = opfs_clid; 4321 else 4322 kern_uuidgen(&nip->meta.pfs_clid, 1); 4323 #endif 4324 kern_uuidgen(&nip->meta.pfs_clid, 1); 4325 nchain->bref.flags |= HAMMER2_BREF_FLAG_PFSROOT; 4326 4327 /* XXX hack blockset copy */ 4328 /* XXX doesn't work with real cluster */ 4329 wipdata->meta = nip->meta; 4330 wipdata->u.blockset = ripdata->u.blockset; 4331 hammer2_flush(nchain, 1); 4332 hammer2_chain_unlock(nchain); 4333 hammer2_chain_drop(nchain); 4334 hammer2_inode_unlock(nip); 4335 } 4336 return (error); 4337 } 4338