1 /* 2 * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 /* 36 * This subsystem implements most of the core support functions for 37 * the hammer2_chain structure. 38 * 39 * Chains are the in-memory version on media objects (volume header, inodes, 40 * indirect blocks, data blocks, etc). Chains represent a portion of the 41 * HAMMER2 topology. 42 * 43 * A chain is topologically stable once it has been inserted into the 44 * in-memory topology. Modifications which copy, move, or resize the chain 45 * are handled via the DELETE-DUPLICATE mechanic where the original chain 46 * stays intact but is marked deleted and a new chain is allocated which 47 * shares the old chain's children. 48 * 49 * This sharing is handled via the hammer2_chain_core structure. 50 * 51 * The DELETE-DUPLICATE mechanism allows the same topological level to contain 52 * many overloadings. However, our RBTREE mechanics require that there be 53 * no overlaps so we accomplish the overloading by moving conflicting chains 54 * with smaller or equal radii into a sub-RBTREE under the chain being 55 * overloaded. 56 * 57 * DELETE-DUPLICATE is also used when a modification to a chain crosses a 58 * flush synchronization boundary, allowing the flush code to continue flushing 59 * the older version of the topology and not be disrupted by new frontend 60 * operations. 61 * 62 * LIVE VS FLUSH VIEW 63 * 64 * All lookup and iterate operations and most modifications are done on the 65 * live view. During flushes lookups are not normally done and modifications 66 * may be run on the flush view. However, flushes often needs to allocate 67 * blocks and the freemap_alloc/free code issues lookups. This code is 68 * special cased to use the live view when called from a flush. 69 * 70 * General chain lookup/iteration functions are NOT aware of the flush view, 71 * they only know about live views. 72 */ 73 #include <sys/cdefs.h> 74 #include <sys/param.h> 75 #include <sys/systm.h> 76 #include <sys/types.h> 77 #include <sys/lock.h> 78 #include <sys/kern_syscall.h> 79 #include <sys/uuid.h> 80 81 #include "hammer2.h" 82 83 static int hammer2_indirect_optimize; /* XXX SYSCTL */ 84 85 static hammer2_chain_t *hammer2_chain_create_indirect( 86 hammer2_trans_t *trans, hammer2_chain_t *parent, 87 hammer2_key_t key, int keybits, int for_type, int *errorp); 88 static void hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop); 89 static void adjreadcounter(hammer2_blockref_t *bref, size_t bytes); 90 static hammer2_chain_t *hammer2_combined_find( 91 hammer2_chain_t *parent, 92 hammer2_blockref_t *base, int count, 93 int *cache_indexp, hammer2_key_t *key_nextp, 94 hammer2_key_t key_beg, hammer2_key_t key_end, 95 hammer2_blockref_t **bresp); 96 97 /* 98 * Basic RBTree for chains (core->rbtree and core->dbtree). Chains cannot 99 * overlap in the RB trees. Deleted chains are moved from rbtree to either 100 * dbtree or to dbq. 101 * 102 * Chains in delete-duplicate sequences can always iterate through core_entry 103 * to locate the live version of the chain. 104 */ 105 RB_GENERATE(hammer2_chain_tree, hammer2_chain, rbnode, hammer2_chain_cmp); 106 107 int 108 hammer2_chain_cmp(hammer2_chain_t *chain1, hammer2_chain_t *chain2) 109 { 110 hammer2_key_t c1_beg; 111 hammer2_key_t c1_end; 112 hammer2_key_t c2_beg; 113 hammer2_key_t c2_end; 114 115 /* 116 * Compare chains. Overlaps are not supposed to happen and catch 117 * any software issues early we count overlaps as a match. 118 */ 119 c1_beg = chain1->bref.key; 120 c1_end = c1_beg + ((hammer2_key_t)1 << chain1->bref.keybits) - 1; 121 c2_beg = chain2->bref.key; 122 c2_end = c2_beg + ((hammer2_key_t)1 << chain2->bref.keybits) - 1; 123 124 if (c1_end < c2_beg) /* fully to the left */ 125 return(-1); 126 if (c1_beg > c2_end) /* fully to the right */ 127 return(1); 128 return(0); /* overlap (must not cross edge boundary) */ 129 } 130 131 static __inline 132 int 133 hammer2_isclusterable(hammer2_chain_t *chain) 134 { 135 if (hammer2_cluster_enable) { 136 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 137 chain->bref.type == HAMMER2_BREF_TYPE_INODE || 138 chain->bref.type == HAMMER2_BREF_TYPE_DATA) { 139 return(1); 140 } 141 } 142 return(0); 143 } 144 145 /* 146 * Recursively set update_hi starting at chain up through to the root. 147 * 148 * This controls top-down visibility for flushes. The child has just one 149 * 'above' core, but the core itself can be multi-homed with parents iterated 150 * via core->ownerq. The last parent is the 'live' parent (all others had to 151 * have been delete-duplicated). We always propagate upward through the live 152 * parent. 153 * 154 * This function is not used during a flush (except when the flush is 155 * allocating which requires the live tree). The flush keeps track of its 156 * recursion itself. 157 * 158 * XXX SMP races 159 */ 160 void 161 hammer2_chain_setsubmod(hammer2_trans_t *trans, hammer2_chain_t *chain) 162 { 163 hammer2_chain_core_t *above; 164 165 if (chain->update_hi < trans->sync_tid) 166 chain->update_hi = trans->sync_tid; 167 168 while ((above = chain->above) != NULL) { 169 spin_lock(&above->cst.spin); 170 chain = TAILQ_LAST(&above->ownerq, h2_core_list); 171 if (chain->update_hi < trans->sync_tid) 172 chain->update_hi = trans->sync_tid; 173 spin_unlock(&above->cst.spin); 174 } 175 } 176 177 /* 178 * Allocate a new disconnected chain element representing the specified 179 * bref. chain->refs is set to 1 and the passed bref is copied to 180 * chain->bref. chain->bytes is derived from the bref. 181 * 182 * chain->core is NOT allocated and the media data and bp pointers are left 183 * NULL. The caller must call chain_core_alloc() to allocate or associate 184 * a core with the chain. 185 * 186 * NOTE: Returns a referenced but unlocked (because there is no core) chain. 187 */ 188 hammer2_chain_t * 189 hammer2_chain_alloc(hammer2_mount_t *hmp, hammer2_pfsmount_t *pmp, 190 hammer2_trans_t *trans, hammer2_blockref_t *bref) 191 { 192 hammer2_chain_t *chain; 193 u_int bytes = 1U << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 194 195 /* 196 * Construct the appropriate system structure. 197 */ 198 switch(bref->type) { 199 case HAMMER2_BREF_TYPE_INODE: 200 case HAMMER2_BREF_TYPE_INDIRECT: 201 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 202 case HAMMER2_BREF_TYPE_DATA: 203 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 204 /* 205 * Chain's are really only associated with the hmp but we 206 * maintain a pmp association for per-mount memory tracking 207 * purposes. The pmp can be NULL. 208 */ 209 chain = kmalloc(sizeof(*chain), hmp->mchain, M_WAITOK | M_ZERO); 210 if (pmp) 211 chain->pmp = pmp; 212 break; 213 case HAMMER2_BREF_TYPE_VOLUME: 214 case HAMMER2_BREF_TYPE_FREEMAP: 215 chain = NULL; 216 panic("hammer2_chain_alloc volume type illegal for op"); 217 default: 218 chain = NULL; 219 panic("hammer2_chain_alloc: unrecognized blockref type: %d", 220 bref->type); 221 } 222 223 chain->hmp = hmp; 224 chain->bref = *bref; 225 chain->bytes = bytes; 226 chain->refs = 1; 227 chain->flags = HAMMER2_CHAIN_ALLOCATED; 228 chain->delete_tid = HAMMER2_MAX_TID; 229 230 /* 231 * Set modify_tid if a transaction is creating the inode. 232 * Enforce update_lo = 0 so nearby transactions do not think 233 * it has been flushed when it hasn't. 234 * 235 * NOTE: When loading a chain from backing store or creating a 236 * snapshot, trans will be NULL and the caller is responsible 237 * for setting these fields. 238 */ 239 if (trans) { 240 chain->modify_tid = trans->sync_tid; 241 chain->update_lo = 0; 242 } 243 244 return (chain); 245 } 246 247 /* 248 * Associate an existing core with the chain or allocate a new core. 249 * 250 * The core is not locked. No additional refs on the chain are made. 251 * (trans) must not be NULL if (core) is not NULL. 252 * 253 * When chains are delete-duplicated during flushes we insert nchain on 254 * the ownerq after ochain instead of at the end in order to give the 255 * drop code visibility in the correct order, otherwise drops can be missed. 256 */ 257 void 258 hammer2_chain_core_alloc(hammer2_trans_t *trans, 259 hammer2_chain_t *nchain, hammer2_chain_t *ochain) 260 { 261 hammer2_chain_core_t *core; 262 263 KKASSERT(nchain->core == NULL); 264 265 if (ochain == NULL) { 266 /* 267 * Fresh core under nchain (no multi-homing of ochain's 268 * sub-tree). 269 */ 270 core = kmalloc(sizeof(*core), nchain->hmp->mchain, 271 M_WAITOK | M_ZERO); 272 TAILQ_INIT(&core->ownerq); 273 TAILQ_INIT(&core->dbq); 274 RB_INIT(&core->rbtree); /* live chains */ 275 RB_INIT(&core->dbtree); /* deleted original (bmapped) chains */ 276 core->sharecnt = 1; 277 core->good = 0x1234; 278 nchain->core = core; 279 ccms_cst_init(&core->cst, nchain); 280 TAILQ_INSERT_TAIL(&core->ownerq, nchain, core_entry); 281 } else { 282 /* 283 * Propagate the PFSROOT flag which we set on all subdirs 284 * under the super-root. 285 */ 286 atomic_set_int(&nchain->flags, 287 ochain->flags & HAMMER2_CHAIN_PFSROOT); 288 289 /* 290 * Duplicating ochain -> nchain. Set the DUPLICATED flag on 291 * ochain if nchain is not a snapshot. 292 * 293 * It is possible for the DUPLICATED flag to already be 294 * set when called via a flush operation because flush 295 * operations may have to work on elements with delete_tid's 296 * beyond the flush sync_tid. In this situation we must 297 * ensure that nchain is placed just after ochain in the 298 * ownerq and that the DUPLICATED flag is set on nchain so 299 * 'live' operations skip past it to the correct chain. 300 * 301 * The flusher understands the blockref synchronization state 302 * for any stale chains by observing bref.mirror_tid, which 303 * delete-duplicate replicates. 304 * 305 * WARNING! However, the case is disallowed when the flusher 306 * is allocating freemap space because this entails 307 * more than just adjusting a block table. 308 */ 309 if (ochain->flags & HAMMER2_CHAIN_DUPLICATED) { 310 KKASSERT((trans->flags & 311 (HAMMER2_TRANS_ISFLUSH | 312 HAMMER2_TRANS_ISALLOCATING)) == 313 HAMMER2_TRANS_ISFLUSH); 314 atomic_set_int(&nchain->flags, 315 HAMMER2_CHAIN_DUPLICATED); 316 } 317 if ((nchain->flags & HAMMER2_CHAIN_SNAPSHOT) == 0) { 318 atomic_set_int(&ochain->flags, 319 HAMMER2_CHAIN_DUPLICATED); 320 } 321 core = ochain->core; 322 atomic_add_int(&core->sharecnt, 1); 323 324 spin_lock(&core->cst.spin); 325 nchain->core = core; 326 327 /* 328 * Maintain ordering for refactor test so we don't skip over 329 * a snapshot. Also, during flushes, delete-duplications 330 * for block-table updates can occur on ochains already 331 * deleted (delete-duplicated by a later transaction), or 332 * on forward-indexed ochains. We must properly insert 333 * nchain relative to ochain. 334 */ 335 if (trans && trans->sync_tid < ochain->modify_tid) { 336 TAILQ_INSERT_BEFORE(ochain, nchain, core_entry); 337 } else { 338 TAILQ_INSERT_AFTER(&core->ownerq, ochain, 339 nchain, core_entry); 340 } 341 spin_unlock(&core->cst.spin); 342 } 343 } 344 345 /* 346 * Add a reference to a chain element, preventing its destruction. 347 */ 348 void 349 hammer2_chain_ref(hammer2_chain_t *chain) 350 { 351 atomic_add_int(&chain->refs, 1); 352 } 353 354 /* 355 * Insert the chain in the core rbtree. 356 * 357 * Normal insertions are placed in the live rbtree. Insertion of a deleted 358 * chain is a special case used by the flush code that is placed on the 359 * unstaged deleted list to avoid confusing the live view. 360 */ 361 #define HAMMER2_CHAIN_INSERT_SPIN 0x0001 362 #define HAMMER2_CHAIN_INSERT_LIVE 0x0002 363 #define HAMMER2_CHAIN_INSERT_RACE 0x0004 364 365 static 366 int 367 hammer2_chain_insert(hammer2_chain_core_t *above, 368 hammer2_chain_t *ochain, hammer2_chain_t *nchain, 369 int flags, int generation) 370 { 371 hammer2_chain_t *xchain; 372 int error = 0; 373 374 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 375 spin_lock(&above->cst.spin); 376 377 /* 378 * Interlocked by spinlock, check for race 379 */ 380 if ((flags & HAMMER2_CHAIN_INSERT_RACE) && 381 above->generation != generation) { 382 error = EAGAIN; 383 goto failed; 384 } 385 386 /* 387 * Insert nchain 388 */ 389 if (nchain->flags & HAMMER2_CHAIN_DELETED) { 390 if (ochain && (ochain->flags & HAMMER2_CHAIN_BMAPPED)) { 391 if (ochain->flags & HAMMER2_CHAIN_ONDBTREE) { 392 RB_REMOVE(hammer2_chain_tree, 393 &above->dbtree, ochain); 394 atomic_clear_int(&ochain->flags, 395 HAMMER2_CHAIN_ONDBTREE); 396 TAILQ_INSERT_TAIL(&above->dbq, 397 ochain, db_entry); 398 atomic_set_int(&ochain->flags, 399 HAMMER2_CHAIN_ONDBQ); 400 } 401 /* clear BMAPPED (DBTREE, sometimes RBTREE) */ 402 atomic_clear_int(&ochain->flags, HAMMER2_CHAIN_BMAPPED); 403 404 xchain = RB_INSERT(hammer2_chain_tree, 405 &above->dbtree, nchain); 406 KKASSERT(xchain == NULL); 407 atomic_set_int(&nchain->flags, 408 HAMMER2_CHAIN_ONDBTREE | 409 HAMMER2_CHAIN_BMAPPED); 410 } else { 411 TAILQ_INSERT_TAIL(&above->dbq, nchain, db_entry); 412 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONDBQ); 413 } 414 } else { 415 xchain = RB_INSERT(hammer2_chain_tree, &above->rbtree, nchain); 416 KASSERT(xchain == NULL, 417 ("hammer2_chain_insert: collision %p", nchain)); 418 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_ONRBTREE); 419 } 420 421 nchain->above = above; 422 ++above->chain_count; 423 ++above->generation; 424 425 /* 426 * We have to keep track of the effective live-view blockref count 427 * so the create code knows when to push an indirect block. 428 */ 429 if (flags & HAMMER2_CHAIN_INSERT_LIVE) 430 atomic_add_int(&above->live_count, 1); 431 failed: 432 if (flags & HAMMER2_CHAIN_INSERT_SPIN) 433 spin_unlock(&above->cst.spin); 434 return error; 435 } 436 437 /* 438 * Drop the caller's reference to the chain. When the ref count drops to 439 * zero this function will try to disassociate the chain from its parent and 440 * deallocate it, then recursely drop the parent using the implied ref 441 * from the chain's chain->parent. 442 */ 443 static hammer2_chain_t *hammer2_chain_lastdrop(hammer2_chain_t *chain, 444 struct h2_core_list *delayq); 445 446 void 447 hammer2_chain_drop(hammer2_chain_t *chain) 448 { 449 struct h2_core_list delayq; 450 hammer2_chain_t *scan; 451 u_int refs; 452 u_int need = 0; 453 454 if (hammer2_debug & 0x200000) 455 Debugger("drop"); 456 457 if (chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) 458 ++need; 459 if (chain->flags & HAMMER2_CHAIN_FLUSH_DELETE) 460 ++need; 461 if (chain->flags & HAMMER2_CHAIN_MODIFIED) 462 ++need; 463 KKASSERT(chain->refs > need); 464 465 TAILQ_INIT(&delayq); 466 467 while (chain) { 468 refs = chain->refs; 469 cpu_ccfence(); 470 KKASSERT(refs > 0); 471 472 if (refs == 1) { 473 chain = hammer2_chain_lastdrop(chain, &delayq); 474 } else { 475 if (atomic_cmpset_int(&chain->refs, refs, refs - 1)) 476 break; 477 /* retry the same chain */ 478 } 479 480 /* 481 * When we've exhausted lastdrop chaining pull off of delayq. 482 * chains on delayq are dead but are used to placehold other 483 * chains which we added a ref to for the purpose of dropping. 484 */ 485 if (chain == NULL) { 486 hammer2_mount_t *hmp; 487 488 if ((scan = TAILQ_FIRST(&delayq)) != NULL) { 489 chain = (void *)scan->data; 490 TAILQ_REMOVE(&delayq, scan, core_entry); 491 scan->flags &= ~HAMMER2_CHAIN_ALLOCATED; 492 hmp = scan->hmp; 493 scan->hmp = NULL; 494 kfree(scan, hmp->mchain); 495 } 496 } 497 } 498 } 499 500 /* 501 * Safe handling of the 1->0 transition on chain. Returns a chain for 502 * recursive drop or NULL, possibly returning the same chain if the atomic 503 * op fails. 504 * 505 * Whem two chains need to be recursively dropped we use the chain 506 * we would otherwise free to placehold the additional chain. It's a bit 507 * convoluted but we can't just recurse without potentially blowing out 508 * the kernel stack. 509 * 510 * The chain cannot be freed if it has a non-empty core (children) or 511 * it is not at the head of ownerq. 512 * 513 * The cst spinlock is allowed nest child-to-parent (not parent-to-child). 514 */ 515 static 516 hammer2_chain_t * 517 hammer2_chain_lastdrop(hammer2_chain_t *chain, struct h2_core_list *delayq) 518 { 519 hammer2_pfsmount_t *pmp; 520 hammer2_mount_t *hmp; 521 hammer2_chain_core_t *above; 522 hammer2_chain_core_t *core; 523 hammer2_chain_t *rdrop1; 524 hammer2_chain_t *rdrop2; 525 526 /* 527 * Spinlock the core and check to see if it is empty. If it is 528 * not empty we leave chain intact with refs == 0. The elements 529 * in core->rbtree are associated with other chains contemporary 530 * with ours but not with our chain directly. 531 */ 532 if ((core = chain->core) != NULL) { 533 spin_lock(&core->cst.spin); 534 535 /* 536 * We can't free non-stale chains with children until we are 537 * able to free the children because there might be a flush 538 * dependency. Flushes of stale children (which should also 539 * have their deleted flag set) short-cut recursive flush 540 * dependencies and can be freed here. Any flushes which run 541 * through stale children due to the flush synchronization 542 * point should have a FLUSH_* bit set in the chain and not 543 * reach lastdrop at this time. 544 * 545 * NOTE: We return (chain) on failure to retry. 546 */ 547 if (core->chain_count && 548 (chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 549 if (atomic_cmpset_int(&chain->refs, 1, 0)) 550 chain = NULL; /* success */ 551 spin_unlock(&core->cst.spin); 552 return(chain); 553 } 554 /* no chains left under us */ 555 556 /* 557 * Various parts of the code might be holding a ref on a 558 * stale chain as a placemarker which must be iterated to 559 * locate a later non-stale (live) chain. We must be sure 560 * NOT to free the later non-stale chain (which might have 561 * no refs). Otherwise mass confusion may result. 562 * 563 * The DUPLICATED flag tells us whether the chain is stale 564 * or not, so the rule is that any chain whos DUPLICATED flag 565 * is NOT set must also be at the head of the ownerq. 566 * 567 * Note that the DELETED flag is not involved. That is, a 568 * live chain can represent a deletion that has not yet been 569 * flushed (or still has refs). 570 */ 571 #if 0 572 if (TAILQ_NEXT(chain, core_entry) == NULL && 573 TAILQ_FIRST(&core->ownerq) != chain) { 574 #endif 575 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 && 576 TAILQ_FIRST(&core->ownerq) != chain) { 577 if (atomic_cmpset_int(&chain->refs, 1, 0)) 578 chain = NULL; /* success */ 579 spin_unlock(&core->cst.spin); 580 return(chain); 581 } 582 } 583 584 /* 585 * chain->core has no children left so no accessors can get to our 586 * chain from there. Now we have to lock the above core to interlock 587 * remaining possible accessors that might bump chain's refs before 588 * we can safely drop chain's refs with intent to free the chain. 589 */ 590 hmp = chain->hmp; 591 pmp = chain->pmp; /* can be NULL */ 592 rdrop1 = NULL; 593 rdrop2 = NULL; 594 595 /* 596 * Spinlock the parent and try to drop the last ref on chain. 597 * On success remove chain from its parent, otherwise return NULL. 598 * 599 * (normal core locks are top-down recursive but we define core 600 * spinlocks as bottom-up recursive, so this is safe). 601 */ 602 if ((above = chain->above) != NULL) { 603 spin_lock(&above->cst.spin); 604 if (atomic_cmpset_int(&chain->refs, 1, 0) == 0) { 605 /* 1->0 transition failed */ 606 spin_unlock(&above->cst.spin); 607 if (core) 608 spin_unlock(&core->cst.spin); 609 return(chain); /* retry */ 610 } 611 612 /* 613 * 1->0 transition successful, remove chain from its 614 * above core. 615 */ 616 switch (chain->flags & (HAMMER2_CHAIN_ONRBTREE | 617 HAMMER2_CHAIN_ONDBTREE | 618 HAMMER2_CHAIN_ONDBQ)) { 619 case HAMMER2_CHAIN_ONRBTREE: 620 RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); 621 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 622 break; 623 case HAMMER2_CHAIN_ONDBTREE: 624 RB_REMOVE(hammer2_chain_tree, &above->dbtree, chain); 625 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); 626 break; 627 case HAMMER2_CHAIN_ONDBQ: 628 TAILQ_REMOVE(&above->dbq, chain, db_entry); 629 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); 630 break; 631 default: 632 panic("hammer2_chain_lastdrop: chain %p badflags %08x", 633 chain, chain->flags); 634 break; 635 } 636 637 --above->chain_count; 638 chain->above = NULL; 639 640 /* 641 * If our chain was the last chain in the parent's core the 642 * core is now empty and its parents might now be droppable. 643 * Try to drop the first multi-homed parent by gaining a 644 * ref on it here and then dropping it below. 645 */ 646 if (above->chain_count == 0) { 647 rdrop1 = TAILQ_FIRST(&above->ownerq); 648 if (rdrop1 && 649 atomic_cmpset_int(&rdrop1->refs, 0, 1) == 0) { 650 rdrop1 = NULL; 651 } 652 } 653 spin_unlock(&above->cst.spin); 654 above = NULL; /* safety */ 655 } 656 657 /* 658 * Successful 1->0 transition and the chain can be destroyed now. 659 * 660 * We still have the core spinlock (if core is non-NULL), and core's 661 * chain_count is 0. The above spinlock is gone. 662 * 663 * Remove chain from ownerq. Once core has no more owners (and no 664 * children which is already the case) we can destroy core. 665 * 666 * If core has more owners we may be able to continue a bottom-up 667 * drop with our next sibling. 668 */ 669 if (core) { 670 chain->core = NULL; 671 672 TAILQ_REMOVE(&core->ownerq, chain, core_entry); 673 rdrop2 = TAILQ_FIRST(&core->ownerq); 674 if (rdrop2 && atomic_cmpset_int(&rdrop2->refs, 0, 1) == 0) 675 rdrop2 = NULL; 676 spin_unlock(&core->cst.spin); 677 678 /* 679 * We can do the final 1->0 transition with an atomic op 680 * after releasing core's spinlock. 681 */ 682 if (atomic_fetchadd_int(&core->sharecnt, -1) == 1) { 683 /* 684 * On the 1->0 transition of core we can destroy 685 * it. 686 */ 687 KKASSERT(TAILQ_EMPTY(&core->ownerq)); 688 KKASSERT(RB_EMPTY(&core->rbtree) && 689 RB_EMPTY(&core->dbtree) && 690 TAILQ_EMPTY(&core->dbq) && 691 core->chain_count == 0); 692 KKASSERT(core->cst.count == 0); 693 KKASSERT(core->cst.upgrade == 0); 694 core->good = 0x5678; 695 kfree(core, hmp->mchain); 696 } 697 core = NULL; /* safety */ 698 } 699 700 /* 701 * All spin locks are gone, finish freeing stuff. 702 */ 703 KKASSERT((chain->flags & (HAMMER2_CHAIN_FLUSH_CREATE | 704 HAMMER2_CHAIN_FLUSH_DELETE | 705 HAMMER2_CHAIN_MODIFIED)) == 0); 706 hammer2_chain_drop_data(chain, 1); 707 708 KKASSERT(chain->dio == NULL); 709 710 /* 711 * Once chain resources are gone we can use the now dead chain 712 * structure to placehold what might otherwise require a recursive 713 * drop, because we have potentially two things to drop and can only 714 * return one directly. 715 */ 716 if (rdrop1 && rdrop2) { 717 KKASSERT(chain->flags & HAMMER2_CHAIN_ALLOCATED); 718 chain->data = (void *)rdrop1; 719 TAILQ_INSERT_TAIL(delayq, chain, core_entry); 720 rdrop1 = NULL; 721 } else if (chain->flags & HAMMER2_CHAIN_ALLOCATED) { 722 chain->flags &= ~HAMMER2_CHAIN_ALLOCATED; 723 chain->hmp = NULL; 724 kfree(chain, hmp->mchain); 725 } 726 727 /* 728 * Either or both can be NULL. We already handled the case where 729 * both might not have been NULL. 730 */ 731 if (rdrop1) 732 return(rdrop1); 733 else 734 return(rdrop2); 735 } 736 737 /* 738 * On either last lock release or last drop 739 */ 740 static void 741 hammer2_chain_drop_data(hammer2_chain_t *chain, int lastdrop) 742 { 743 /*hammer2_mount_t *hmp = chain->hmp;*/ 744 745 switch(chain->bref.type) { 746 case HAMMER2_BREF_TYPE_VOLUME: 747 case HAMMER2_BREF_TYPE_FREEMAP: 748 if (lastdrop) 749 chain->data = NULL; 750 break; 751 default: 752 KKASSERT(chain->data == NULL); 753 break; 754 } 755 } 756 757 /* 758 * Ref and lock a chain element, acquiring its data with I/O if necessary, 759 * and specify how you would like the data to be resolved. 760 * 761 * Returns 0 on success or an error code if the data could not be acquired. 762 * The chain element is locked on return regardless of whether an error 763 * occurred or not. 764 * 765 * The lock is allowed to recurse, multiple locking ops will aggregate 766 * the requested resolve types. Once data is assigned it will not be 767 * removed until the last unlock. 768 * 769 * HAMMER2_RESOLVE_NEVER - Do not resolve the data element. 770 * (typically used to avoid device/logical buffer 771 * aliasing for data) 772 * 773 * HAMMER2_RESOLVE_MAYBE - Do not resolve data elements for chains in 774 * the INITIAL-create state (indirect blocks only). 775 * 776 * Do not resolve data elements for DATA chains. 777 * (typically used to avoid device/logical buffer 778 * aliasing for data) 779 * 780 * HAMMER2_RESOLVE_ALWAYS- Always resolve the data element. 781 * 782 * HAMMER2_RESOLVE_SHARED- (flag) The chain is locked shared, otherwise 783 * it will be locked exclusive. 784 * 785 * NOTE: Embedded elements (volume header, inodes) are always resolved 786 * regardless. 787 * 788 * NOTE: Specifying HAMMER2_RESOLVE_ALWAYS on a newly-created non-embedded 789 * element will instantiate and zero its buffer, and flush it on 790 * release. 791 * 792 * NOTE: (data) elements are normally locked RESOLVE_NEVER or RESOLVE_MAYBE 793 * so as not to instantiate a device buffer, which could alias against 794 * a logical file buffer. However, if ALWAYS is specified the 795 * device buffer will be instantiated anyway. 796 * 797 * WARNING! If data must be fetched a shared lock will temporarily be 798 * upgraded to exclusive. However, a deadlock can occur if 799 * the caller owns more than one shared lock. 800 */ 801 int 802 hammer2_chain_lock(hammer2_chain_t *chain, int how) 803 { 804 hammer2_mount_t *hmp; 805 hammer2_chain_core_t *core; 806 hammer2_blockref_t *bref; 807 ccms_state_t ostate; 808 char *bdata; 809 int error; 810 811 /* 812 * Ref and lock the element. Recursive locks are allowed. 813 */ 814 if ((how & HAMMER2_RESOLVE_NOREF) == 0) 815 hammer2_chain_ref(chain); 816 atomic_add_int(&chain->lockcnt, 1); 817 818 hmp = chain->hmp; 819 KKASSERT(hmp != NULL); 820 821 /* 822 * Get the appropriate lock. 823 */ 824 core = chain->core; 825 if (how & HAMMER2_RESOLVE_SHARED) 826 ccms_thread_lock(&core->cst, CCMS_STATE_SHARED); 827 else 828 ccms_thread_lock(&core->cst, CCMS_STATE_EXCLUSIVE); 829 830 /* 831 * If we already have a valid data pointer no further action is 832 * necessary. 833 */ 834 if (chain->data) 835 return (0); 836 837 /* 838 * Do we have to resolve the data? 839 */ 840 switch(how & HAMMER2_RESOLVE_MASK) { 841 case HAMMER2_RESOLVE_NEVER: 842 return(0); 843 case HAMMER2_RESOLVE_MAYBE: 844 if (chain->flags & HAMMER2_CHAIN_INITIAL) 845 return(0); 846 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA) 847 return(0); 848 #if 0 849 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) 850 return(0); 851 #endif 852 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) 853 return(0); 854 /* fall through */ 855 case HAMMER2_RESOLVE_ALWAYS: 856 break; 857 } 858 859 /* 860 * Upgrade to an exclusive lock so we can safely manipulate the 861 * buffer cache. If another thread got to it before us we 862 * can just return. 863 */ 864 ostate = ccms_thread_lock_upgrade(&core->cst); 865 if (chain->data) { 866 ccms_thread_lock_downgrade(&core->cst, ostate); 867 return (0); 868 } 869 870 /* 871 * We must resolve to a device buffer, either by issuing I/O or 872 * by creating a zero-fill element. We do not mark the buffer 873 * dirty when creating a zero-fill element (the hammer2_chain_modify() 874 * API must still be used to do that). 875 * 876 * The device buffer is variable-sized in powers of 2 down 877 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 878 * chunk always contains buffers of the same size. (XXX) 879 * 880 * The minimum physical IO size may be larger than the variable 881 * block size. 882 */ 883 bref = &chain->bref; 884 885 /* 886 * The getblk() optimization can only be used on newly created 887 * elements if the physical block size matches the request. 888 */ 889 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 890 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, 891 &chain->dio); 892 } else { 893 error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 894 &chain->dio); 895 adjreadcounter(&chain->bref, chain->bytes); 896 } 897 898 if (error) { 899 kprintf("hammer2_chain_lock: I/O error %016jx: %d\n", 900 (intmax_t)bref->data_off, error); 901 hammer2_io_bqrelse(&chain->dio); 902 ccms_thread_lock_downgrade(&core->cst, ostate); 903 return (error); 904 } 905 906 #if 0 907 /* 908 * No need for this, always require that hammer2_chain_modify() 909 * be called before any modifying operations. 910 */ 911 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) && 912 !hammer2_io_isdirty(chain->dio)) { 913 hammer2_io_setdirty(chain->dio); 914 } 915 #endif 916 917 /* 918 * We can clear the INITIAL state now, we've resolved the buffer 919 * to zeros and marked it dirty with hammer2_io_new(). 920 */ 921 bdata = hammer2_io_data(chain->dio, chain->bref.data_off); 922 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 923 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 924 } 925 926 /* 927 * Setup the data pointer, either pointing it to an embedded data 928 * structure and copying the data from the buffer, or pointing it 929 * into the buffer. 930 * 931 * The buffer is not retained when copying to an embedded data 932 * structure in order to avoid potential deadlocks or recursions 933 * on the same physical buffer. 934 */ 935 switch (bref->type) { 936 case HAMMER2_BREF_TYPE_VOLUME: 937 case HAMMER2_BREF_TYPE_FREEMAP: 938 /* 939 * Copy data from bp to embedded buffer 940 */ 941 panic("hammer2_chain_lock: called on unresolved volume header"); 942 break; 943 case HAMMER2_BREF_TYPE_INODE: 944 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 945 case HAMMER2_BREF_TYPE_INDIRECT: 946 case HAMMER2_BREF_TYPE_DATA: 947 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 948 default: 949 /* 950 * Point data at the device buffer and leave dio intact. 951 */ 952 chain->data = (void *)bdata; 953 break; 954 } 955 ccms_thread_lock_downgrade(&core->cst, ostate); 956 return (0); 957 } 958 959 /* 960 * This basically calls hammer2_io_breadcb() but does some pre-processing 961 * of the chain first to handle certain cases. 962 */ 963 void 964 hammer2_chain_load_async(hammer2_chain_t *chain, 965 void (*callback)(hammer2_io_t *dio, 966 hammer2_chain_t *chain, 967 void *arg_p, off_t arg_o), 968 void *arg_p, off_t arg_o) 969 { 970 hammer2_mount_t *hmp; 971 struct hammer2_io *dio; 972 hammer2_blockref_t *bref; 973 int error; 974 975 if (chain->data) { 976 callback(NULL, chain, arg_p, arg_o); 977 return; 978 } 979 980 /* 981 * We must resolve to a device buffer, either by issuing I/O or 982 * by creating a zero-fill element. We do not mark the buffer 983 * dirty when creating a zero-fill element (the hammer2_chain_modify() 984 * API must still be used to do that). 985 * 986 * The device buffer is variable-sized in powers of 2 down 987 * to HAMMER2_MIN_ALLOC (typically 1K). A 64K physical storage 988 * chunk always contains buffers of the same size. (XXX) 989 * 990 * The minimum physical IO size may be larger than the variable 991 * block size. 992 */ 993 bref = &chain->bref; 994 hmp = chain->hmp; 995 996 /* 997 * The getblk() optimization can only be used on newly created 998 * elements if the physical block size matches the request. 999 */ 1000 if ((chain->flags & HAMMER2_CHAIN_INITIAL) && 1001 chain->bytes == hammer2_devblksize(chain->bytes)) { 1002 error = hammer2_io_new(hmp, bref->data_off, chain->bytes, &dio); 1003 KKASSERT(error == 0); 1004 callback(dio, chain, arg_p, arg_o); 1005 return; 1006 } 1007 1008 /* 1009 * Otherwise issue a read 1010 */ 1011 adjreadcounter(&chain->bref, chain->bytes); 1012 hammer2_io_breadcb(hmp, bref->data_off, chain->bytes, 1013 callback, chain, arg_p, arg_o); 1014 } 1015 1016 /* 1017 * Unlock and deref a chain element. 1018 * 1019 * On the last lock release any non-embedded data (chain->dio) will be 1020 * retired. 1021 */ 1022 void 1023 hammer2_chain_unlock(hammer2_chain_t *chain) 1024 { 1025 hammer2_chain_core_t *core = chain->core; 1026 ccms_state_t ostate; 1027 long *counterp; 1028 u_int lockcnt; 1029 1030 /* 1031 * The core->cst lock can be shared across several chains so we 1032 * need to track the per-chain lockcnt separately. 1033 * 1034 * If multiple locks are present (or being attempted) on this 1035 * particular chain we can just unlock, drop refs, and return. 1036 * 1037 * Otherwise fall-through on the 1->0 transition. 1038 */ 1039 for (;;) { 1040 lockcnt = chain->lockcnt; 1041 KKASSERT(lockcnt > 0); 1042 cpu_ccfence(); 1043 if (lockcnt > 1) { 1044 if (atomic_cmpset_int(&chain->lockcnt, 1045 lockcnt, lockcnt - 1)) { 1046 ccms_thread_unlock(&core->cst); 1047 hammer2_chain_drop(chain); 1048 return; 1049 } 1050 } else { 1051 if (atomic_cmpset_int(&chain->lockcnt, 1, 0)) 1052 break; 1053 } 1054 /* retry */ 1055 } 1056 1057 /* 1058 * On the 1->0 transition we upgrade the core lock (if necessary) 1059 * to exclusive for terminal processing. If after upgrading we find 1060 * that lockcnt is non-zero, another thread is racing us and will 1061 * handle the unload for us later on, so just cleanup and return 1062 * leaving the data/io intact 1063 * 1064 * Otherwise if lockcnt is still 0 it is possible for it to become 1065 * non-zero and race, but since we hold the core->cst lock 1066 * exclusively all that will happen is that the chain will be 1067 * reloaded after we unload it. 1068 */ 1069 ostate = ccms_thread_lock_upgrade(&core->cst); 1070 if (chain->lockcnt) { 1071 ccms_thread_unlock_upgraded(&core->cst, ostate); 1072 hammer2_chain_drop(chain); 1073 return; 1074 } 1075 1076 /* 1077 * Shortcut the case if the data is embedded or not resolved. 1078 * 1079 * Do NOT NULL out chain->data (e.g. inode data), it might be 1080 * dirty. 1081 */ 1082 if (chain->dio == NULL) { 1083 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 1084 hammer2_chain_drop_data(chain, 0); 1085 ccms_thread_unlock_upgraded(&core->cst, ostate); 1086 hammer2_chain_drop(chain); 1087 return; 1088 } 1089 1090 /* 1091 * Statistics 1092 */ 1093 if (hammer2_io_isdirty(chain->dio) == 0) { 1094 ; 1095 } else if (chain->flags & HAMMER2_CHAIN_IOFLUSH) { 1096 switch(chain->bref.type) { 1097 case HAMMER2_BREF_TYPE_DATA: 1098 counterp = &hammer2_ioa_file_write; 1099 break; 1100 case HAMMER2_BREF_TYPE_INODE: 1101 counterp = &hammer2_ioa_meta_write; 1102 break; 1103 case HAMMER2_BREF_TYPE_INDIRECT: 1104 counterp = &hammer2_ioa_indr_write; 1105 break; 1106 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1107 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1108 counterp = &hammer2_ioa_fmap_write; 1109 break; 1110 default: 1111 counterp = &hammer2_ioa_volu_write; 1112 break; 1113 } 1114 *counterp += chain->bytes; 1115 } else { 1116 switch(chain->bref.type) { 1117 case HAMMER2_BREF_TYPE_DATA: 1118 counterp = &hammer2_iod_file_write; 1119 break; 1120 case HAMMER2_BREF_TYPE_INODE: 1121 counterp = &hammer2_iod_meta_write; 1122 break; 1123 case HAMMER2_BREF_TYPE_INDIRECT: 1124 counterp = &hammer2_iod_indr_write; 1125 break; 1126 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1127 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1128 counterp = &hammer2_iod_fmap_write; 1129 break; 1130 default: 1131 counterp = &hammer2_iod_volu_write; 1132 break; 1133 } 1134 *counterp += chain->bytes; 1135 } 1136 1137 /* 1138 * Clean out the dio. 1139 * 1140 * If a device buffer was used for data be sure to destroy the 1141 * buffer when we are done to avoid aliases (XXX what about the 1142 * underlying VM pages?). 1143 * 1144 * NOTE: Freemap leaf's use reserved blocks and thus no aliasing 1145 * is possible. 1146 * 1147 * NOTE: The isdirty check tracks whether we have to bdwrite() the 1148 * buffer or not. The buffer might already be dirty. The 1149 * flag is re-set when chain_modify() is called, even if 1150 * MODIFIED is already set, allowing the OS to retire the 1151 * buffer independent of a hammer2 flush. 1152 */ 1153 chain->data = NULL; 1154 if ((chain->flags & HAMMER2_CHAIN_IOFLUSH) && 1155 hammer2_io_isdirty(chain->dio)) { 1156 hammer2_io_bawrite(&chain->dio); 1157 } else { 1158 hammer2_io_bqrelse(&chain->dio); 1159 } 1160 ccms_thread_unlock_upgraded(&core->cst, ostate); 1161 hammer2_chain_drop(chain); 1162 } 1163 1164 /* 1165 * This counts the number of live blockrefs in a block array and 1166 * also calculates the point at which all remaining blockrefs are empty. 1167 * This routine can only be called on a live chain (DUPLICATED flag not set). 1168 * 1169 * NOTE: Flag is not set until after the count is complete, allowing 1170 * callers to test the flag without holding the spinlock. 1171 * 1172 * NOTE: If base is NULL the related chain is still in the INITIAL 1173 * state and there are no blockrefs to count. 1174 * 1175 * NOTE: live_count may already have some counts accumulated due to 1176 * creation and deletion and could even be initially negative. 1177 */ 1178 void 1179 hammer2_chain_countbrefs(hammer2_chain_t *chain, 1180 hammer2_blockref_t *base, int count) 1181 { 1182 hammer2_chain_core_t *core = chain->core; 1183 1184 KKASSERT((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0); 1185 1186 spin_lock(&core->cst.spin); 1187 if ((core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) { 1188 if (base) { 1189 while (--count >= 0) { 1190 if (base[count].type) 1191 break; 1192 } 1193 core->live_zero = count + 1; 1194 while (count >= 0) { 1195 if (base[count].type) 1196 atomic_add_int(&core->live_count, 1); 1197 --count; 1198 } 1199 } else { 1200 core->live_zero = 0; 1201 } 1202 /* else do not modify live_count */ 1203 atomic_set_int(&core->flags, HAMMER2_CORE_COUNTEDBREFS); 1204 } 1205 spin_unlock(&core->cst.spin); 1206 } 1207 1208 /* 1209 * Resize the chain's physical storage allocation in-place. This may 1210 * replace the passed-in chain with a new chain. 1211 * 1212 * Chains can be resized smaller without reallocating the storage. 1213 * Resizing larger will reallocate the storage. 1214 * 1215 * Must be passed an exclusively locked parent and chain, returns a new 1216 * exclusively locked chain at the same index and unlocks the old chain. 1217 * Flushes the buffer if necessary. 1218 * 1219 * This function is mostly used with DATA blocks locked RESOLVE_NEVER in order 1220 * to avoid instantiating a device buffer that conflicts with the vnode 1221 * data buffer. That is, the passed-in bp is a logical buffer, whereas 1222 * any chain-oriented bp would be a device buffer. 1223 * 1224 * XXX return error if cannot resize. 1225 */ 1226 void 1227 hammer2_chain_resize(hammer2_trans_t *trans, hammer2_inode_t *ip, 1228 hammer2_chain_t *parent, hammer2_chain_t **chainp, 1229 int nradix, int flags) 1230 { 1231 hammer2_mount_t *hmp; 1232 hammer2_chain_t *chain; 1233 size_t obytes; 1234 size_t nbytes; 1235 1236 chain = *chainp; 1237 hmp = chain->hmp; 1238 1239 /* 1240 * Only data and indirect blocks can be resized for now. 1241 * (The volu root, inodes, and freemap elements use a fixed size). 1242 */ 1243 KKASSERT(chain != &hmp->vchain); 1244 KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_DATA || 1245 chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT); 1246 1247 /* 1248 * Nothing to do if the element is already the proper size 1249 */ 1250 obytes = chain->bytes; 1251 nbytes = 1U << nradix; 1252 if (obytes == nbytes) 1253 return; 1254 1255 /* 1256 * Delete the old chain and duplicate it at the same (parent, index), 1257 * returning a new chain. This allows the old chain to still be 1258 * used by the flush code. The new chain will be returned in a 1259 * modified state. 1260 * 1261 * The parent does not have to be locked for the delete/duplicate call, 1262 * but is in this particular code path. 1263 * 1264 * NOTE: If we are not crossing a synchronization point the 1265 * duplication code will simply reuse the existing chain 1266 * structure. 1267 */ 1268 hammer2_chain_delete_duplicate(trans, &chain, 0); 1269 1270 /* 1271 * Relocate the block, even if making it smaller (because different 1272 * block sizes may be in different regions). 1273 * 1274 * (data blocks only, we aren't copying the storage here). 1275 */ 1276 hammer2_freemap_alloc(trans, chain, nbytes); 1277 chain->bytes = nbytes; 1278 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_FORCECOW); 1279 /*ip->delta_dcount += (ssize_t)(nbytes - obytes);*/ /* XXX atomic */ 1280 1281 /* 1282 * For now just support it on DATA chains (and not on indirect 1283 * blocks). 1284 */ 1285 KKASSERT(chain->dio == NULL); 1286 1287 *chainp = chain; 1288 } 1289 1290 /* 1291 * Set a chain modified, making it read-write and duplicating it if necessary. 1292 * This function will assign a new physical block to the chain if necessary 1293 * 1294 * Duplication of already-modified chains is possible when the modification 1295 * crosses a flush synchronization boundary. 1296 * 1297 * Non-data blocks - The chain should be locked to at least the RESOLVE_MAYBE 1298 * level or the COW operation will not work. 1299 * 1300 * Data blocks - The chain is usually locked RESOLVE_NEVER so as not to 1301 * run the data through the device buffers. 1302 * 1303 * This function may return a different chain than was passed, in which case 1304 * the old chain will be unlocked and the new chain will be locked. 1305 * 1306 * ip->chain may be adjusted by hammer2_chain_modify_ip(). 1307 */ 1308 hammer2_inode_data_t * 1309 hammer2_chain_modify_ip(hammer2_trans_t *trans, hammer2_inode_t *ip, 1310 hammer2_chain_t **chainp, int flags) 1311 { 1312 atomic_set_int(&ip->flags, HAMMER2_INODE_MODIFIED); 1313 hammer2_chain_modify(trans, chainp, flags); 1314 if (ip->chain != *chainp) 1315 hammer2_inode_repoint(ip, NULL, *chainp); 1316 if (ip->vp) 1317 vsetisdirty(ip->vp); 1318 return(&ip->chain->data->ipdata); 1319 } 1320 1321 void 1322 hammer2_chain_modify(hammer2_trans_t *trans, hammer2_chain_t **chainp, 1323 int flags) 1324 { 1325 hammer2_mount_t *hmp; 1326 hammer2_chain_t *chain; 1327 hammer2_io_t *dio; 1328 int error; 1329 int wasinitial; 1330 char *bdata; 1331 1332 chain = *chainp; 1333 hmp = chain->hmp; 1334 1335 KKASSERT(chain->bref.mirror_tid != trans->sync_tid || 1336 (chain->flags & HAMMER2_CHAIN_MODIFIED)); 1337 1338 /* 1339 * data is not optional for freemap chains (we must always be sure 1340 * to copy the data on COW storage allocations). 1341 */ 1342 if (chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 1343 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 1344 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) || 1345 (flags & HAMMER2_MODIFY_OPTDATA) == 0); 1346 } 1347 1348 /* 1349 * Determine if a delete-duplicate is needed. 1350 * 1351 * (a) Modify_tid is part of a prior flush 1352 * (b) Transaction is concurrent with a flush (has higher tid) 1353 * (c) and chain is not in the initial state (freshly created) 1354 * (d) and caller didn't request an in-place modification. 1355 * 1356 * The freemap and volume header special chains are never D-Dd. 1357 */ 1358 if (chain->modify_tid != trans->sync_tid && /* cross boundary */ 1359 (flags & HAMMER2_MODIFY_INPLACE) == 0) { /* from d-d */ 1360 if (chain != &hmp->fchain && chain != &hmp->vchain) { 1361 KKASSERT((flags & HAMMER2_MODIFY_ASSERTNOCOPY) == 0); 1362 hammer2_chain_delete_duplicate(trans, chainp, 0); 1363 chain = *chainp; 1364 } 1365 } 1366 1367 /* 1368 * Data must be resolved if already assigned unless explicitly 1369 * flagged otherwise. 1370 */ 1371 if (chain->data == NULL && (flags & HAMMER2_MODIFY_OPTDATA) == 0 && 1372 (chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX)) { 1373 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 1374 hammer2_chain_unlock(chain); 1375 } 1376 1377 /* 1378 * Otherwise do initial-chain handling. Set MODIFIED to indicate 1379 * that the chain has been modified. Set FLUSH_CREATE to flush 1380 * the new blockref (the D-D set FLUSH_DELETE on the old chain to 1381 * delete the old blockref). 1382 */ 1383 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 1384 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 1385 hammer2_chain_ref(chain); 1386 hammer2_chain_memory_inc(chain->pmp); 1387 } 1388 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 1389 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_CREATE); 1390 hammer2_chain_ref(chain); 1391 } 1392 1393 /* 1394 * The modification or re-modification requires an allocation and 1395 * possible COW. 1396 * 1397 * We normally always allocate new storage here. If storage exists 1398 * and MODIFY_NOREALLOC is passed in, we do not allocate new storage. 1399 */ 1400 if (chain != &hmp->vchain && chain != &hmp->fchain) { 1401 if ((chain->bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0 || 1402 ((flags & HAMMER2_MODIFY_NOREALLOC) == 0 && 1403 chain->modify_tid != trans->sync_tid) 1404 ) { 1405 hammer2_freemap_alloc(trans, chain, chain->bytes); 1406 /* XXX failed allocation */ 1407 } else if (chain->flags & HAMMER2_CHAIN_FORCECOW) { 1408 hammer2_freemap_alloc(trans, chain, chain->bytes); 1409 /* XXX failed allocation */ 1410 } 1411 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_FORCECOW); 1412 } 1413 1414 /* 1415 * Update modify_tid. XXX special-case vchain/fchain because they 1416 * are always modified in-place. Otherwise the chain being modified 1417 * must not be part of a future transaction. 1418 */ 1419 if (chain == &hmp->vchain || chain == &hmp->fchain) { 1420 if (chain->modify_tid <= trans->sync_tid) 1421 chain->modify_tid = trans->sync_tid; 1422 } else { 1423 KKASSERT(chain->modify_tid <= trans->sync_tid); 1424 chain->modify_tid = trans->sync_tid; 1425 } 1426 1427 if ((flags & HAMMER2_MODIFY_NO_MODIFY_TID) == 0) 1428 chain->bref.modify_tid = trans->sync_tid; 1429 1430 /* 1431 * Do not COW BREF_TYPE_DATA when OPTDATA is set. This is because 1432 * data modifications are done via the logical buffer cache so COWing 1433 * it here would result in unnecessary extra copies (and possibly extra 1434 * block reallocations). The INITIAL flag remains unchanged in this 1435 * situation. 1436 * 1437 * (This is a bit of a hack). 1438 */ 1439 if (chain->bref.type == HAMMER2_BREF_TYPE_DATA && 1440 (flags & HAMMER2_MODIFY_OPTDATA)) { 1441 goto skip2; 1442 } 1443 1444 /* 1445 * Clearing the INITIAL flag (for indirect blocks) indicates that 1446 * we've processed the uninitialized storage allocation. 1447 * 1448 * If this flag is already clear we are likely in a copy-on-write 1449 * situation but we have to be sure NOT to bzero the storage if 1450 * no data is present. 1451 */ 1452 if (chain->flags & HAMMER2_CHAIN_INITIAL) { 1453 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 1454 wasinitial = 1; 1455 } else { 1456 wasinitial = 0; 1457 } 1458 1459 /* 1460 * Instantiate data buffer and possibly execute COW operation 1461 */ 1462 switch(chain->bref.type) { 1463 case HAMMER2_BREF_TYPE_VOLUME: 1464 case HAMMER2_BREF_TYPE_FREEMAP: 1465 /* 1466 * The data is embedded, no copy-on-write operation is 1467 * needed. 1468 */ 1469 KKASSERT(chain->dio == NULL); 1470 break; 1471 case HAMMER2_BREF_TYPE_INODE: 1472 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1473 case HAMMER2_BREF_TYPE_DATA: 1474 case HAMMER2_BREF_TYPE_INDIRECT: 1475 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1476 /* 1477 * Perform the copy-on-write operation 1478 * 1479 * zero-fill or copy-on-write depending on whether 1480 * chain->data exists or not and set the dirty state for 1481 * the new buffer. hammer2_io_new() will handle the 1482 * zero-fill. 1483 */ 1484 KKASSERT(chain != &hmp->vchain && chain != &hmp->fchain); 1485 1486 if (wasinitial) { 1487 error = hammer2_io_new(hmp, chain->bref.data_off, 1488 chain->bytes, &dio); 1489 } else { 1490 error = hammer2_io_bread(hmp, chain->bref.data_off, 1491 chain->bytes, &dio); 1492 } 1493 adjreadcounter(&chain->bref, chain->bytes); 1494 KKASSERT(error == 0); 1495 1496 bdata = hammer2_io_data(dio, chain->bref.data_off); 1497 1498 if (chain->data) { 1499 KKASSERT(chain->dio != NULL); 1500 if (chain->data != (void *)bdata) { 1501 bcopy(chain->data, bdata, chain->bytes); 1502 } 1503 } else if (wasinitial == 0) { 1504 /* 1505 * We have a problem. We were asked to COW but 1506 * we don't have any data to COW with! 1507 */ 1508 panic("hammer2_chain_modify: having a COW %p\n", 1509 chain); 1510 } 1511 1512 /* 1513 * Retire the old buffer, replace with the new 1514 */ 1515 if (chain->dio) 1516 hammer2_io_brelse(&chain->dio); 1517 chain->data = (void *)bdata; 1518 chain->dio = dio; 1519 hammer2_io_setdirty(dio); /* modified by bcopy above */ 1520 break; 1521 default: 1522 panic("hammer2_chain_modify: illegal non-embedded type %d", 1523 chain->bref.type); 1524 break; 1525 1526 } 1527 skip2: 1528 hammer2_chain_setsubmod(trans, chain); 1529 } 1530 1531 /* 1532 * Mark the volume as having been modified. This short-cut version 1533 * does not have to lock the volume's chain, which allows the ioctl 1534 * code to make adjustments to connections without deadlocking. XXX 1535 * 1536 * No ref is made on vchain when flagging it MODIFIED. 1537 */ 1538 void 1539 hammer2_modify_volume(hammer2_mount_t *hmp) 1540 { 1541 hammer2_voldata_lock(hmp); 1542 hammer2_voldata_unlock(hmp, 1); 1543 } 1544 1545 /* 1546 * This function returns the chain at the nearest key within the specified 1547 * range with the highest delete_tid. The core spinlock must be held on 1548 * call and the returned chain will be referenced but not locked. 1549 * 1550 * The returned chain may or may not be in a deleted state. Note that 1551 * live chains have a delete_tid = MAX_TID. 1552 * 1553 * This function will recurse through chain->rbtree as necessary and will 1554 * return a *key_nextp suitable for iteration. *key_nextp is only set if 1555 * the iteration value is less than the current value of *key_nextp. 1556 * 1557 * The caller should use (*key_nextp) to calculate the actual range of 1558 * the returned element, which will be (key_beg to *key_nextp - 1), because 1559 * there might be another element which is superior to the returned element 1560 * and overlaps it. 1561 * 1562 * (*key_nextp) can be passed as key_beg in an iteration only while non-NULL 1563 * chains continue to be returned. On EOF (*key_nextp) may overflow since 1564 * it will wind up being (key_end + 1). 1565 */ 1566 struct hammer2_chain_find_info { 1567 hammer2_chain_t *best; 1568 hammer2_key_t key_beg; 1569 hammer2_key_t key_end; 1570 hammer2_key_t key_next; 1571 }; 1572 1573 static int hammer2_chain_find_cmp(hammer2_chain_t *child, void *data); 1574 static int hammer2_chain_find_callback(hammer2_chain_t *child, void *data); 1575 1576 static 1577 hammer2_chain_t * 1578 hammer2_chain_find(hammer2_chain_t *parent, hammer2_key_t *key_nextp, 1579 hammer2_key_t key_beg, hammer2_key_t key_end) 1580 { 1581 struct hammer2_chain_find_info info; 1582 1583 info.best = NULL; 1584 info.key_beg = key_beg; 1585 info.key_end = key_end; 1586 info.key_next = *key_nextp; 1587 1588 KKASSERT(parent->core->good == 0x1234); 1589 RB_SCAN(hammer2_chain_tree, &parent->core->rbtree, 1590 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1591 &info); 1592 *key_nextp = info.key_next; 1593 #if 0 1594 kprintf("chain_find %p %016jx:%016jx next=%016jx\n", 1595 parent, key_beg, key_end, *key_nextp); 1596 #endif 1597 1598 return (info.best); 1599 } 1600 1601 /* 1602 * Find a deleted chain covering a block table entry. Be careful to deal 1603 * with the race condition where the block table has been updated but the 1604 * chain has not yet been removed from dbtree (due to multiple parents having 1605 * to be updated). 1606 */ 1607 static 1608 hammer2_chain_t * 1609 hammer2_chain_find_deleted(hammer2_chain_t *parent, 1610 hammer2_key_t key_beg, hammer2_key_t key_end) 1611 { 1612 struct hammer2_chain_find_info info; 1613 hammer2_chain_t *child; 1614 1615 info.best = NULL; 1616 info.key_beg = key_beg; 1617 info.key_end = key_end; 1618 info.key_next = 0; 1619 1620 KKASSERT(parent->core->good == 0x1234); 1621 RB_SCAN(hammer2_chain_tree, &parent->core->dbtree, 1622 hammer2_chain_find_cmp, hammer2_chain_find_callback, 1623 &info); 1624 if ((child = info.best) != NULL) { 1625 if (child->delete_tid <= parent->update_lo) 1626 child = NULL; 1627 } 1628 return child; 1629 } 1630 1631 static 1632 int 1633 hammer2_chain_find_cmp(hammer2_chain_t *child, void *data) 1634 { 1635 struct hammer2_chain_find_info *info = data; 1636 hammer2_key_t child_beg; 1637 hammer2_key_t child_end; 1638 1639 child_beg = child->bref.key; 1640 child_end = child_beg + ((hammer2_key_t)1 << child->bref.keybits) - 1; 1641 1642 if (child_end < info->key_beg) 1643 return(-1); 1644 if (child_beg > info->key_end) 1645 return(1); 1646 return(0); 1647 } 1648 1649 static 1650 int 1651 hammer2_chain_find_callback(hammer2_chain_t *child, void *data) 1652 { 1653 struct hammer2_chain_find_info *info = data; 1654 hammer2_chain_t *best; 1655 hammer2_key_t child_end; 1656 1657 /* 1658 * WARNING! Do not discard DUPLICATED chains, it is possible that 1659 * we are catching an insertion half-way done. If a 1660 * duplicated chain turns out to be the best choice the 1661 * caller will re-check its flags after locking it. 1662 * 1663 * WARNING! Layerq is scanned forwards, exact matches should keep 1664 * the existing info->best. 1665 */ 1666 if ((best = info->best) == NULL) { 1667 /* 1668 * No previous best. Assign best 1669 */ 1670 info->best = child; 1671 } else if (best->bref.key <= info->key_beg && 1672 child->bref.key <= info->key_beg) { 1673 /* 1674 * If our current best is flush with key_beg and child is 1675 * also flush with key_beg choose based on delete_tid. 1676 * 1677 * key_next will automatically be limited to the smaller of 1678 * the two end-points. 1679 */ 1680 if (child->delete_tid > best->delete_tid) 1681 info->best = child; 1682 } else if (child->bref.key < best->bref.key) { 1683 /* 1684 * Child has a nearer key and best is not flush with key_beg. 1685 * Truncate key_next to the old best key iff it had a better 1686 * delete_tid. 1687 */ 1688 info->best = child; 1689 if (best->delete_tid >= child->delete_tid && 1690 (info->key_next > best->bref.key || info->key_next == 0)) 1691 info->key_next = best->bref.key; 1692 } else if (child->bref.key == best->bref.key) { 1693 /* 1694 * If our current best is flush with the child then choose 1695 * based on delete_tid. 1696 * 1697 * key_next will automatically be limited to the smaller of 1698 * the two end-points. 1699 */ 1700 if (child->delete_tid > best->delete_tid) 1701 info->best = child; 1702 } else { 1703 /* 1704 * Keep the current best but truncate key_next to the child's 1705 * base iff the child has a higher delete_tid. 1706 * 1707 * key_next will also automatically be limited to the smaller 1708 * of the two end-points (probably not necessary for this case 1709 * but we do it anyway). 1710 */ 1711 if (child->delete_tid >= best->delete_tid && 1712 (info->key_next > child->bref.key || info->key_next == 0)) 1713 info->key_next = child->bref.key; 1714 } 1715 1716 /* 1717 * Always truncate key_next based on child's end-of-range. 1718 */ 1719 child_end = child->bref.key + ((hammer2_key_t)1 << child->bref.keybits); 1720 if (child_end && (info->key_next > child_end || info->key_next == 0)) 1721 info->key_next = child_end; 1722 1723 return(0); 1724 } 1725 1726 /* 1727 * Retrieve the specified chain from a media blockref, creating the 1728 * in-memory chain structure which reflects it. modify_tid will be 1729 * left 0 which forces any modifications to issue a delete-duplicate. 1730 * 1731 * To handle insertion races pass the INSERT_RACE flag along with the 1732 * generation number of the core. NULL will be returned if the generation 1733 * number changes before we have a chance to insert the chain. Insert 1734 * races can occur because the parent might be held shared. 1735 * 1736 * Caller must hold the parent locked shared or exclusive since we may 1737 * need the parent's bref array to find our block. 1738 */ 1739 hammer2_chain_t * 1740 hammer2_chain_get(hammer2_chain_t *parent, int generation, 1741 hammer2_blockref_t *bref) 1742 { 1743 hammer2_mount_t *hmp = parent->hmp; 1744 hammer2_chain_core_t *above = parent->core; 1745 hammer2_chain_t *chain; 1746 int error; 1747 1748 /* 1749 * Allocate a chain structure representing the existing media 1750 * entry. Resulting chain has one ref and is not locked. 1751 */ 1752 chain = hammer2_chain_alloc(hmp, parent->pmp, NULL, bref); 1753 hammer2_chain_core_alloc(NULL, chain, NULL); 1754 /* ref'd chain returned */ 1755 1756 /* 1757 * Set modify_tid and update_lo to the chain's synchronization 1758 * point from the media. 1759 */ 1760 chain->modify_tid = chain->bref.mirror_tid; 1761 chain->update_lo = chain->bref.mirror_tid; 1762 atomic_set_int(&chain->flags, HAMMER2_CHAIN_BMAPPED); 1763 1764 /* 1765 * Link the chain into its parent. A spinlock is required to safely 1766 * access the RBTREE, and it is possible to collide with another 1767 * hammer2_chain_get() operation because the caller might only hold 1768 * a shared lock on the parent. 1769 */ 1770 KKASSERT(parent->refs > 0); 1771 error = hammer2_chain_insert(above, NULL, chain, 1772 HAMMER2_CHAIN_INSERT_SPIN | 1773 HAMMER2_CHAIN_INSERT_RACE, 1774 generation); 1775 if (error) { 1776 KKASSERT((chain->flags & (HAMMER2_CHAIN_ONRBTREE | 1777 HAMMER2_CHAIN_ONDBTREE | 1778 HAMMER2_CHAIN_ONDBQ)) == 0); 1779 kprintf("chain %p get race\n", chain); 1780 hammer2_chain_drop(chain); 1781 chain = NULL; 1782 } else { 1783 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 1784 } 1785 1786 /* 1787 * Return our new chain referenced but not locked, or NULL if 1788 * a race occurred. 1789 */ 1790 return (chain); 1791 } 1792 1793 /* 1794 * Lookup initialization/completion API 1795 */ 1796 hammer2_chain_t * 1797 hammer2_chain_lookup_init(hammer2_chain_t *parent, int flags) 1798 { 1799 if (flags & HAMMER2_LOOKUP_SHARED) { 1800 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS | 1801 HAMMER2_RESOLVE_SHARED); 1802 } else { 1803 hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 1804 } 1805 return (parent); 1806 } 1807 1808 void 1809 hammer2_chain_lookup_done(hammer2_chain_t *parent) 1810 { 1811 if (parent) 1812 hammer2_chain_unlock(parent); 1813 } 1814 1815 static 1816 hammer2_chain_t * 1817 hammer2_chain_getparent(hammer2_chain_t **parentp, int how) 1818 { 1819 hammer2_chain_t *oparent; 1820 hammer2_chain_t *bparent; 1821 hammer2_chain_t *nparent; 1822 hammer2_chain_core_t *above; 1823 1824 oparent = *parentp; 1825 above = oparent->above; 1826 1827 spin_lock(&above->cst.spin); 1828 bparent = TAILQ_FIRST(&above->ownerq); 1829 hammer2_chain_ref(bparent); 1830 1831 /* 1832 * Be careful of order, oparent must be unlocked before nparent 1833 * is locked below to avoid a deadlock. We might as well delay its 1834 * unlocking until we conveniently no longer have the spinlock (instead 1835 * of cycling the spinlock). 1836 * 1837 * Theoretically our ref on bparent should prevent elements of the 1838 * following chain from going away and prevent above from going away, 1839 * but we still need the spinlock to safely scan the list. 1840 */ 1841 for (;;) { 1842 nparent = bparent; 1843 while (nparent->flags & HAMMER2_CHAIN_DUPLICATED) 1844 nparent = TAILQ_NEXT(nparent, core_entry); 1845 hammer2_chain_ref(nparent); 1846 spin_unlock(&above->cst.spin); 1847 1848 if (oparent) { 1849 hammer2_chain_unlock(oparent); 1850 oparent = NULL; 1851 } 1852 hammer2_chain_lock(nparent, how | HAMMER2_RESOLVE_NOREF); 1853 hammer2_chain_drop(bparent); 1854 1855 /* 1856 * We might have raced a delete-duplicate. 1857 */ 1858 if ((nparent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) 1859 break; 1860 bparent = nparent; 1861 hammer2_chain_ref(bparent); 1862 hammer2_chain_unlock(nparent); 1863 spin_lock(&above->cst.spin); 1864 /* retry */ 1865 } 1866 *parentp = nparent; 1867 1868 return (nparent); 1869 } 1870 1871 /* 1872 * Locate the first chain whos key range overlaps (key_beg, key_end) inclusive. 1873 * (*parentp) typically points to an inode but can also point to a related 1874 * indirect block and this function will recurse upwards and find the inode 1875 * again. 1876 * 1877 * (*parentp) must be exclusively locked and referenced and can be an inode 1878 * or an existing indirect block within the inode. 1879 * 1880 * On return (*parentp) will be modified to point at the deepest parent chain 1881 * element encountered during the search, as a helper for an insertion or 1882 * deletion. The new (*parentp) will be locked and referenced and the old 1883 * will be unlocked and dereferenced (no change if they are both the same). 1884 * 1885 * The matching chain will be returned exclusively locked. If NOLOCK is 1886 * requested the chain will be returned only referenced. 1887 * 1888 * NULL is returned if no match was found, but (*parentp) will still 1889 * potentially be adjusted. 1890 * 1891 * On return (*key_nextp) will point to an iterative value for key_beg. 1892 * (If NULL is returned (*key_nextp) is set to key_end). 1893 * 1894 * This function will also recurse up the chain if the key is not within the 1895 * current parent's range. (*parentp) can never be set to NULL. An iteration 1896 * can simply allow (*parentp) to float inside the loop. 1897 * 1898 * NOTE! chain->data is not always resolved. By default it will not be 1899 * resolved for BREF_TYPE_DATA, FREEMAP_NODE, or FREEMAP_LEAF. Use 1900 * HAMMER2_LOOKUP_ALWAYS to force resolution (but be careful w/ 1901 * BREF_TYPE_DATA as the device buffer can alias the logical file 1902 * buffer). 1903 */ 1904 hammer2_chain_t * 1905 hammer2_chain_lookup(hammer2_chain_t **parentp, hammer2_key_t *key_nextp, 1906 hammer2_key_t key_beg, hammer2_key_t key_end, 1907 int *cache_indexp, int flags) 1908 { 1909 hammer2_mount_t *hmp; 1910 hammer2_chain_t *parent; 1911 hammer2_chain_t *chain; 1912 hammer2_blockref_t *base; 1913 hammer2_blockref_t *bref; 1914 hammer2_blockref_t bcopy; 1915 hammer2_key_t scan_beg; 1916 hammer2_key_t scan_end; 1917 hammer2_chain_core_t *above; 1918 int count = 0; 1919 int how_always = HAMMER2_RESOLVE_ALWAYS; 1920 int how_maybe = HAMMER2_RESOLVE_MAYBE; 1921 int how; 1922 int generation; 1923 int maxloops = 300000; 1924 int wasdup; 1925 1926 if (flags & HAMMER2_LOOKUP_ALWAYS) { 1927 how_maybe = how_always; 1928 how = HAMMER2_RESOLVE_ALWAYS; 1929 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 1930 how = HAMMER2_RESOLVE_NEVER; 1931 } else { 1932 how = HAMMER2_RESOLVE_MAYBE; 1933 } 1934 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) { 1935 how_maybe |= HAMMER2_RESOLVE_SHARED; 1936 how_always |= HAMMER2_RESOLVE_SHARED; 1937 how |= HAMMER2_RESOLVE_SHARED; 1938 } 1939 1940 /* 1941 * Recurse (*parentp) upward if necessary until the parent completely 1942 * encloses the key range or we hit the inode. 1943 * 1944 * This function handles races against the flusher doing a delete- 1945 * duplicate above us and re-homes the parent to the duplicate in 1946 * that case, otherwise we'd wind up recursing down a stale chain. 1947 */ 1948 parent = *parentp; 1949 hmp = parent->hmp; 1950 1951 while (parent->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 1952 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 1953 scan_beg = parent->bref.key; 1954 scan_end = scan_beg + 1955 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1956 if (key_beg >= scan_beg && key_end <= scan_end) 1957 break; 1958 parent = hammer2_chain_getparent(parentp, how_maybe); 1959 } 1960 1961 again: 1962 if (--maxloops == 0) 1963 panic("hammer2_chain_lookup: maxloops"); 1964 /* 1965 * Locate the blockref array. Currently we do a fully associative 1966 * search through the array. 1967 */ 1968 switch(parent->bref.type) { 1969 case HAMMER2_BREF_TYPE_INODE: 1970 /* 1971 * Special shortcut for embedded data returns the inode 1972 * itself. Callers must detect this condition and access 1973 * the embedded data (the strategy code does this for us). 1974 * 1975 * This is only applicable to regular files and softlinks. 1976 */ 1977 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1978 if (flags & HAMMER2_LOOKUP_NOLOCK) 1979 hammer2_chain_ref(parent); 1980 else 1981 hammer2_chain_lock(parent, how_always); 1982 *key_nextp = key_end + 1; 1983 return (parent); 1984 } 1985 base = &parent->data->ipdata.u.blockset.blockref[0]; 1986 count = HAMMER2_SET_COUNT; 1987 break; 1988 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1989 case HAMMER2_BREF_TYPE_INDIRECT: 1990 /* 1991 * Handle MATCHIND on the parent 1992 */ 1993 if (flags & HAMMER2_LOOKUP_MATCHIND) { 1994 scan_beg = parent->bref.key; 1995 scan_end = scan_beg + 1996 ((hammer2_key_t)1 << parent->bref.keybits) - 1; 1997 if (key_beg == scan_beg && key_end == scan_end) { 1998 chain = parent; 1999 hammer2_chain_lock(chain, how_maybe); 2000 *key_nextp = scan_end + 1; 2001 goto done; 2002 } 2003 } 2004 /* 2005 * Optimize indirect blocks in the INITIAL state to avoid 2006 * I/O. 2007 */ 2008 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2009 base = NULL; 2010 } else { 2011 if (parent->data == NULL) 2012 panic("parent->data is NULL"); 2013 base = &parent->data->npdata[0]; 2014 } 2015 count = parent->bytes / sizeof(hammer2_blockref_t); 2016 break; 2017 case HAMMER2_BREF_TYPE_VOLUME: 2018 base = &hmp->voldata.sroot_blockset.blockref[0]; 2019 count = HAMMER2_SET_COUNT; 2020 break; 2021 case HAMMER2_BREF_TYPE_FREEMAP: 2022 base = &hmp->voldata.freemap_blockset.blockref[0]; 2023 count = HAMMER2_SET_COUNT; 2024 break; 2025 default: 2026 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2027 parent->bref.type); 2028 base = NULL; /* safety */ 2029 count = 0; /* safety */ 2030 } 2031 2032 /* 2033 * Merged scan to find next candidate. 2034 * 2035 * hammer2_base_*() functions require the above->live_* fields 2036 * to be synchronized. 2037 * 2038 * We need to hold the spinlock to access the block array and RB tree 2039 * and to interlock chain creation. 2040 */ 2041 above = parent->core; 2042 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2043 hammer2_chain_countbrefs(parent, base, count); 2044 2045 /* 2046 * Combined search 2047 */ 2048 spin_lock(&above->cst.spin); 2049 chain = hammer2_combined_find(parent, base, count, 2050 cache_indexp, key_nextp, 2051 key_beg, key_end, 2052 &bref); 2053 generation = above->generation; 2054 2055 /* 2056 * Exhausted parent chain, iterate. 2057 */ 2058 if (bref == NULL) { 2059 spin_unlock(&above->cst.spin); 2060 if (key_beg == key_end) /* short cut single-key case */ 2061 return (NULL); 2062 return (hammer2_chain_next(parentp, NULL, key_nextp, 2063 key_beg, key_end, 2064 cache_indexp, flags)); 2065 } 2066 2067 /* 2068 * Selected from blockref or in-memory chain. 2069 */ 2070 if (chain == NULL) { 2071 bcopy = *bref; 2072 spin_unlock(&above->cst.spin); 2073 chain = hammer2_chain_get(parent, generation, 2074 &bcopy); 2075 if (chain == NULL) { 2076 kprintf("retry lookup parent %p keys %016jx:%016jx\n", 2077 parent, key_beg, key_end); 2078 goto again; 2079 } 2080 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2081 hammer2_chain_drop(chain); 2082 goto again; 2083 } 2084 wasdup = 0; 2085 } else { 2086 hammer2_chain_ref(chain); 2087 wasdup = ((chain->flags & HAMMER2_CHAIN_DUPLICATED) != 0); 2088 spin_unlock(&above->cst.spin); 2089 } 2090 2091 /* 2092 * chain is referenced but not locked. We must lock the chain 2093 * to obtain definitive DUPLICATED/DELETED state 2094 */ 2095 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2096 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2097 hammer2_chain_lock(chain, how_maybe | HAMMER2_RESOLVE_NOREF); 2098 } else { 2099 hammer2_chain_lock(chain, how | HAMMER2_RESOLVE_NOREF); 2100 } 2101 2102 /* 2103 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2104 * 2105 * NOTE: Chain's key range is not relevant as there might be 2106 * one-offs within the range that are not deleted. 2107 * 2108 * NOTE: Lookups can race delete-duplicate because 2109 * delete-duplicate does not lock the parent's core 2110 * (they just use the spinlock on the core). We must 2111 * check for races by comparing the DUPLICATED flag before 2112 * releasing the spinlock with the flag after locking the 2113 * chain. 2114 */ 2115 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2116 hammer2_chain_unlock(chain); 2117 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 || wasdup) { 2118 key_beg = *key_nextp; 2119 if (key_beg == 0 || key_beg > key_end) 2120 return(NULL); 2121 } 2122 goto again; 2123 } 2124 2125 /* 2126 * If the chain element is an indirect block it becomes the new 2127 * parent and we loop on it. We must maintain our top-down locks 2128 * to prevent the flusher from interfering (i.e. doing a 2129 * delete-duplicate and leaving us recursing down a deleted chain). 2130 * 2131 * The parent always has to be locked with at least RESOLVE_MAYBE 2132 * so we can access its data. It might need a fixup if the caller 2133 * passed incompatible flags. Be careful not to cause a deadlock 2134 * as a data-load requires an exclusive lock. 2135 * 2136 * If HAMMER2_LOOKUP_MATCHIND is set and the indirect block's key 2137 * range is within the requested key range we return the indirect 2138 * block and do NOT loop. This is usually only used to acquire 2139 * freemap nodes. 2140 */ 2141 if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT || 2142 chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2143 hammer2_chain_unlock(parent); 2144 *parentp = parent = chain; 2145 goto again; 2146 } 2147 done: 2148 /* 2149 * All done, return the chain 2150 */ 2151 return (chain); 2152 } 2153 2154 /* 2155 * After having issued a lookup we can iterate all matching keys. 2156 * 2157 * If chain is non-NULL we continue the iteration from just after it's index. 2158 * 2159 * If chain is NULL we assume the parent was exhausted and continue the 2160 * iteration at the next parent. 2161 * 2162 * parent must be locked on entry and remains locked throughout. chain's 2163 * lock status must match flags. Chain is always at least referenced. 2164 * 2165 * WARNING! The MATCHIND flag does not apply to this function. 2166 */ 2167 hammer2_chain_t * 2168 hammer2_chain_next(hammer2_chain_t **parentp, hammer2_chain_t *chain, 2169 hammer2_key_t *key_nextp, 2170 hammer2_key_t key_beg, hammer2_key_t key_end, 2171 int *cache_indexp, int flags) 2172 { 2173 hammer2_chain_t *parent; 2174 int how_maybe; 2175 2176 /* 2177 * Calculate locking flags for upward recursion. 2178 */ 2179 how_maybe = HAMMER2_RESOLVE_MAYBE; 2180 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) 2181 how_maybe |= HAMMER2_RESOLVE_SHARED; 2182 2183 parent = *parentp; 2184 2185 /* 2186 * Calculate the next index and recalculate the parent if necessary. 2187 */ 2188 if (chain) { 2189 key_beg = chain->bref.key + 2190 ((hammer2_key_t)1 << chain->bref.keybits); 2191 if (flags & HAMMER2_LOOKUP_NOLOCK) 2192 hammer2_chain_drop(chain); 2193 else 2194 hammer2_chain_unlock(chain); 2195 2196 /* 2197 * Any scan where the lookup returned degenerate data embedded 2198 * in the inode has an invalid index and must terminate. 2199 */ 2200 if (chain == parent) 2201 return(NULL); 2202 if (key_beg == 0 || key_beg > key_end) 2203 return(NULL); 2204 chain = NULL; 2205 } else if (parent->bref.type != HAMMER2_BREF_TYPE_INDIRECT && 2206 parent->bref.type != HAMMER2_BREF_TYPE_FREEMAP_NODE) { 2207 /* 2208 * We reached the end of the iteration. 2209 */ 2210 return (NULL); 2211 } else { 2212 /* 2213 * Continue iteration with next parent unless the current 2214 * parent covers the range. 2215 */ 2216 key_beg = parent->bref.key + 2217 ((hammer2_key_t)1 << parent->bref.keybits); 2218 if (key_beg == 0 || key_beg > key_end) 2219 return (NULL); 2220 parent = hammer2_chain_getparent(parentp, how_maybe); 2221 } 2222 2223 /* 2224 * And execute 2225 */ 2226 return (hammer2_chain_lookup(parentp, key_nextp, 2227 key_beg, key_end, 2228 cache_indexp, flags)); 2229 } 2230 2231 /* 2232 * The raw scan function is similar to lookup/next but does not seek to a key. 2233 * Blockrefs are iterated via first_chain = (parent, NULL) and 2234 * next_chain = (parent, chain). 2235 * 2236 * The passed-in parent must be locked and its data resolved. The returned 2237 * chain will be locked. Pass chain == NULL to acquire the first sub-chain 2238 * under parent and then iterate with the passed-in chain (which this 2239 * function will unlock). 2240 */ 2241 hammer2_chain_t * 2242 hammer2_chain_scan(hammer2_chain_t *parent, hammer2_chain_t *chain, 2243 int *cache_indexp, int flags) 2244 { 2245 hammer2_mount_t *hmp; 2246 hammer2_blockref_t *base; 2247 hammer2_blockref_t *bref; 2248 hammer2_blockref_t bcopy; 2249 hammer2_chain_core_t *above; 2250 hammer2_key_t key; 2251 hammer2_key_t next_key; 2252 int count = 0; 2253 int how_always = HAMMER2_RESOLVE_ALWAYS; 2254 int how_maybe = HAMMER2_RESOLVE_MAYBE; 2255 int how; 2256 int generation; 2257 int maxloops = 300000; 2258 int wasdup; 2259 2260 hmp = parent->hmp; 2261 2262 /* 2263 * Scan flags borrowed from lookup 2264 */ 2265 if (flags & HAMMER2_LOOKUP_ALWAYS) { 2266 how_maybe = how_always; 2267 how = HAMMER2_RESOLVE_ALWAYS; 2268 } else if (flags & (HAMMER2_LOOKUP_NODATA | HAMMER2_LOOKUP_NOLOCK)) { 2269 how = HAMMER2_RESOLVE_NEVER; 2270 } else { 2271 how = HAMMER2_RESOLVE_MAYBE; 2272 } 2273 if (flags & (HAMMER2_LOOKUP_SHARED | HAMMER2_LOOKUP_NOLOCK)) { 2274 how_maybe |= HAMMER2_RESOLVE_SHARED; 2275 how_always |= HAMMER2_RESOLVE_SHARED; 2276 how |= HAMMER2_RESOLVE_SHARED; 2277 } 2278 2279 /* 2280 * Calculate key to locate first/next element, unlocking the previous 2281 * element as we go. Be careful, the key calculation can overflow. 2282 */ 2283 if (chain) { 2284 key = chain->bref.key + 2285 ((hammer2_key_t)1 << chain->bref.keybits); 2286 hammer2_chain_unlock(chain); 2287 chain = NULL; 2288 if (key == 0) 2289 goto done; 2290 } else { 2291 key = 0; 2292 } 2293 2294 again: 2295 if (--maxloops == 0) 2296 panic("hammer2_chain_scan: maxloops"); 2297 /* 2298 * Locate the blockref array. Currently we do a fully associative 2299 * search through the array. 2300 */ 2301 switch(parent->bref.type) { 2302 case HAMMER2_BREF_TYPE_INODE: 2303 /* 2304 * An inode with embedded data has no sub-chains. 2305 */ 2306 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) 2307 goto done; 2308 base = &parent->data->ipdata.u.blockset.blockref[0]; 2309 count = HAMMER2_SET_COUNT; 2310 break; 2311 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2312 case HAMMER2_BREF_TYPE_INDIRECT: 2313 /* 2314 * Optimize indirect blocks in the INITIAL state to avoid 2315 * I/O. 2316 */ 2317 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 2318 base = NULL; 2319 } else { 2320 if (parent->data == NULL) 2321 panic("parent->data is NULL"); 2322 base = &parent->data->npdata[0]; 2323 } 2324 count = parent->bytes / sizeof(hammer2_blockref_t); 2325 break; 2326 case HAMMER2_BREF_TYPE_VOLUME: 2327 base = &hmp->voldata.sroot_blockset.blockref[0]; 2328 count = HAMMER2_SET_COUNT; 2329 break; 2330 case HAMMER2_BREF_TYPE_FREEMAP: 2331 base = &hmp->voldata.freemap_blockset.blockref[0]; 2332 count = HAMMER2_SET_COUNT; 2333 break; 2334 default: 2335 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 2336 parent->bref.type); 2337 base = NULL; /* safety */ 2338 count = 0; /* safety */ 2339 } 2340 2341 /* 2342 * Merged scan to find next candidate. 2343 * 2344 * hammer2_base_*() functions require the above->live_* fields 2345 * to be synchronized. 2346 * 2347 * We need to hold the spinlock to access the block array and RB tree 2348 * and to interlock chain creation. 2349 */ 2350 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2351 hammer2_chain_countbrefs(parent, base, count); 2352 2353 above = parent->core; 2354 next_key = 0; 2355 spin_lock(&above->cst.spin); 2356 chain = hammer2_combined_find(parent, base, count, 2357 cache_indexp, &next_key, 2358 key, HAMMER2_MAX_KEY, 2359 &bref); 2360 generation = above->generation; 2361 2362 /* 2363 * Exhausted parent chain, we're done. 2364 */ 2365 if (bref == NULL) { 2366 spin_unlock(&above->cst.spin); 2367 KKASSERT(chain == NULL); 2368 goto done; 2369 } 2370 2371 /* 2372 * Selected from blockref or in-memory chain. 2373 */ 2374 if (chain == NULL) { 2375 bcopy = *bref; 2376 spin_unlock(&above->cst.spin); 2377 chain = hammer2_chain_get(parent, generation, &bcopy); 2378 if (chain == NULL) { 2379 kprintf("retry scan parent %p keys %016jx\n", 2380 parent, key); 2381 goto again; 2382 } 2383 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 2384 hammer2_chain_drop(chain); 2385 chain = NULL; 2386 goto again; 2387 } 2388 wasdup = 0; 2389 } else { 2390 hammer2_chain_ref(chain); 2391 wasdup = ((chain->flags & HAMMER2_CHAIN_DUPLICATED) != 0); 2392 spin_unlock(&above->cst.spin); 2393 } 2394 2395 /* 2396 * chain is referenced but not locked. We must lock the chain 2397 * to obtain definitive DUPLICATED/DELETED state 2398 */ 2399 hammer2_chain_lock(chain, how | HAMMER2_RESOLVE_NOREF); 2400 2401 /* 2402 * Skip deleted chains (XXX cache 'i' end-of-block-array? XXX) 2403 * 2404 * NOTE: chain's key range is not relevant as there might be 2405 * one-offs within the range that are not deleted. 2406 * 2407 * NOTE: XXX this could create problems with scans used in 2408 * situations other than mount-time recovery. 2409 * 2410 * NOTE: Lookups can race delete-duplicate because 2411 * delete-duplicate does not lock the parent's core 2412 * (they just use the spinlock on the core). We must 2413 * check for races by comparing the DUPLICATED flag before 2414 * releasing the spinlock with the flag after locking the 2415 * chain. 2416 */ 2417 if (chain->flags & HAMMER2_CHAIN_DELETED) { 2418 hammer2_chain_unlock(chain); 2419 chain = NULL; 2420 2421 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) == 0 || wasdup) { 2422 key = next_key; 2423 if (key == 0) 2424 goto done; 2425 } 2426 goto again; 2427 } 2428 2429 done: 2430 /* 2431 * All done, return the chain or NULL 2432 */ 2433 return (chain); 2434 } 2435 2436 /* 2437 * Create and return a new hammer2 system memory structure of the specified 2438 * key, type and size and insert it under (*parentp). This is a full 2439 * insertion, based on the supplied key/keybits, and may involve creating 2440 * indirect blocks and moving other chains around via delete/duplicate. 2441 * 2442 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2443 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2444 * FULL. This typically means that the caller is creating the chain after 2445 * doing a hammer2_chain_lookup(). 2446 * 2447 * (*parentp) must be exclusive locked and may be replaced on return 2448 * depending on how much work the function had to do. 2449 * 2450 * (*chainp) usually starts out NULL and returns the newly created chain, 2451 * but if the caller desires the caller may allocate a disconnected chain 2452 * and pass it in instead. (It is also possible for the caller to use 2453 * chain_duplicate() to create a disconnected chain, manipulate it, then 2454 * pass it into this function to insert it). 2455 * 2456 * This function should NOT be used to insert INDIRECT blocks. It is 2457 * typically used to create/insert inodes and data blocks. 2458 * 2459 * Caller must pass-in an exclusively locked parent the new chain is to 2460 * be inserted under, and optionally pass-in a disconnected, exclusively 2461 * locked chain to insert (else we create a new chain). The function will 2462 * adjust (*parentp) as necessary, create or connect the chain, and 2463 * return an exclusively locked chain in *chainp. 2464 */ 2465 int 2466 hammer2_chain_create(hammer2_trans_t *trans, hammer2_chain_t **parentp, 2467 hammer2_chain_t **chainp, 2468 hammer2_key_t key, int keybits, int type, size_t bytes) 2469 { 2470 hammer2_mount_t *hmp; 2471 hammer2_chain_t *chain; 2472 hammer2_chain_t *parent = *parentp; 2473 hammer2_chain_core_t *above; 2474 hammer2_blockref_t *base; 2475 hammer2_blockref_t dummy; 2476 int allocated = 0; 2477 int error = 0; 2478 int count; 2479 int maxloops = 300000; 2480 2481 above = parent->core; 2482 KKASSERT(ccms_thread_lock_owned(&above->cst)); 2483 hmp = parent->hmp; 2484 chain = *chainp; 2485 2486 if (chain == NULL) { 2487 /* 2488 * First allocate media space and construct the dummy bref, 2489 * then allocate the in-memory chain structure. Set the 2490 * INITIAL flag for fresh chains which do not have embedded 2491 * data. 2492 */ 2493 bzero(&dummy, sizeof(dummy)); 2494 dummy.type = type; 2495 dummy.key = key; 2496 dummy.keybits = keybits; 2497 dummy.data_off = hammer2_getradix(bytes); 2498 dummy.methods = parent->bref.methods; 2499 chain = hammer2_chain_alloc(hmp, parent->pmp, trans, &dummy); 2500 hammer2_chain_core_alloc(trans, chain, NULL); 2501 2502 /* 2503 * Lock the chain manually, chain_lock will load the chain 2504 * which we do NOT want to do. (note: chain->refs is set 2505 * to 1 by chain_alloc() for us, but lockcnt is not). 2506 */ 2507 chain->lockcnt = 1; 2508 ccms_thread_lock(&chain->core->cst, CCMS_STATE_EXCLUSIVE); 2509 allocated = 1; 2510 2511 /* 2512 * We do NOT set INITIAL here (yet). INITIAL is only 2513 * used for indirect blocks. 2514 * 2515 * Recalculate bytes to reflect the actual media block 2516 * allocation. 2517 */ 2518 bytes = (hammer2_off_t)1 << 2519 (int)(chain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 2520 chain->bytes = bytes; 2521 2522 switch(type) { 2523 case HAMMER2_BREF_TYPE_VOLUME: 2524 case HAMMER2_BREF_TYPE_FREEMAP: 2525 panic("hammer2_chain_create: called with volume type"); 2526 break; 2527 case HAMMER2_BREF_TYPE_INDIRECT: 2528 panic("hammer2_chain_create: cannot be used to" 2529 "create indirect block"); 2530 break; 2531 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2532 panic("hammer2_chain_create: cannot be used to" 2533 "create freemap root or node"); 2534 break; 2535 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2536 KKASSERT(bytes == sizeof(chain->data->bmdata)); 2537 /* fall through */ 2538 case HAMMER2_BREF_TYPE_INODE: 2539 case HAMMER2_BREF_TYPE_DATA: 2540 default: 2541 /* 2542 * leave chain->data NULL, set INITIAL 2543 */ 2544 KKASSERT(chain->data == NULL); 2545 atomic_set_int(&chain->flags, HAMMER2_CHAIN_INITIAL); 2546 break; 2547 } 2548 } else { 2549 /* 2550 * We are reattaching a chain that has been duplicated and 2551 * left disconnected under a DIFFERENT parent with potentially 2552 * different key/keybits. 2553 * 2554 * The chain must be modified in the current transaction 2555 * (the duplication code should have done that for us), 2556 * and it's modify_tid should be greater than the parent's 2557 * bref.mirror_tid. This should cause it to be created under 2558 * the new parent. 2559 * 2560 * If deleted in the same transaction, the create/delete TIDs 2561 * will be the same and effective the chain will not have 2562 * existed at all from the point of view of the parent. 2563 * 2564 * Do NOT mess with the current state of the INITIAL flag. 2565 */ 2566 KKASSERT(chain->modify_tid == trans->sync_tid); 2567 chain->bref.key = key; 2568 chain->bref.keybits = keybits; 2569 KKASSERT(chain->above == NULL); 2570 } 2571 2572 /* 2573 * Calculate how many entries we have in the blockref array and 2574 * determine if an indirect block is required. 2575 */ 2576 again: 2577 if (--maxloops == 0) 2578 panic("hammer2_chain_create: maxloops"); 2579 above = parent->core; 2580 2581 switch(parent->bref.type) { 2582 case HAMMER2_BREF_TYPE_INODE: 2583 KKASSERT((parent->data->ipdata.op_flags & 2584 HAMMER2_OPFLAG_DIRECTDATA) == 0); 2585 KKASSERT(parent->data != NULL); 2586 base = &parent->data->ipdata.u.blockset.blockref[0]; 2587 count = HAMMER2_SET_COUNT; 2588 break; 2589 case HAMMER2_BREF_TYPE_INDIRECT: 2590 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 2591 if (parent->flags & HAMMER2_CHAIN_INITIAL) 2592 base = NULL; 2593 else 2594 base = &parent->data->npdata[0]; 2595 count = parent->bytes / sizeof(hammer2_blockref_t); 2596 break; 2597 case HAMMER2_BREF_TYPE_VOLUME: 2598 KKASSERT(parent->data != NULL); 2599 base = &hmp->voldata.sroot_blockset.blockref[0]; 2600 count = HAMMER2_SET_COUNT; 2601 break; 2602 case HAMMER2_BREF_TYPE_FREEMAP: 2603 KKASSERT(parent->data != NULL); 2604 base = &hmp->voldata.freemap_blockset.blockref[0]; 2605 count = HAMMER2_SET_COUNT; 2606 break; 2607 default: 2608 panic("hammer2_chain_create: unrecognized blockref type: %d", 2609 parent->bref.type); 2610 base = NULL; 2611 count = 0; 2612 break; 2613 } 2614 2615 /* 2616 * Make sure we've counted the brefs 2617 */ 2618 if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 2619 hammer2_chain_countbrefs(parent, base, count); 2620 2621 KKASSERT(above->live_count >= 0 && above->live_count <= count); 2622 2623 /* 2624 * If no free blockref could be found we must create an indirect 2625 * block and move a number of blockrefs into it. With the parent 2626 * locked we can safely lock each child in order to delete+duplicate 2627 * it without causing a deadlock. 2628 * 2629 * This may return the new indirect block or the old parent depending 2630 * on where the key falls. NULL is returned on error. 2631 */ 2632 if (above->live_count == count) { 2633 hammer2_chain_t *nparent; 2634 2635 nparent = hammer2_chain_create_indirect(trans, parent, 2636 key, keybits, 2637 type, &error); 2638 if (nparent == NULL) { 2639 if (allocated) 2640 hammer2_chain_drop(chain); 2641 chain = NULL; 2642 goto done; 2643 } 2644 if (parent != nparent) { 2645 hammer2_chain_unlock(parent); 2646 parent = *parentp = nparent; 2647 } 2648 goto again; 2649 } 2650 2651 /* 2652 * Link the chain into its parent. 2653 */ 2654 if (chain->above != NULL) 2655 panic("hammer2: hammer2_chain_create: chain already connected"); 2656 KKASSERT(chain->above == NULL); 2657 hammer2_chain_insert(above, NULL, chain, 2658 HAMMER2_CHAIN_INSERT_SPIN | 2659 HAMMER2_CHAIN_INSERT_LIVE, 2660 0); 2661 2662 if (allocated) { 2663 /* 2664 * Mark the newly created chain modified. This will cause 2665 * FLUSH_CREATE to be set. 2666 * 2667 * Device buffers are not instantiated for DATA elements 2668 * as these are handled by logical buffers. 2669 * 2670 * Indirect and freemap node indirect blocks are handled 2671 * by hammer2_chain_create_indirect() and not by this 2672 * function. 2673 * 2674 * Data for all other bref types is expected to be 2675 * instantiated (INODE, LEAF). 2676 */ 2677 switch(chain->bref.type) { 2678 case HAMMER2_BREF_TYPE_DATA: 2679 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 2680 case HAMMER2_BREF_TYPE_INODE: 2681 hammer2_chain_modify(trans, &chain, 2682 HAMMER2_MODIFY_OPTDATA | 2683 HAMMER2_MODIFY_ASSERTNOCOPY); 2684 break; 2685 default: 2686 /* 2687 * Remaining types are not supported by this function. 2688 * In particular, INDIRECT and LEAF_NODE types are 2689 * handled by create_indirect(). 2690 */ 2691 panic("hammer2_chain_create: bad type: %d", 2692 chain->bref.type); 2693 /* NOT REACHED */ 2694 break; 2695 } 2696 } else { 2697 /* 2698 * When reconnecting a chain we must set FLUSH_CREATE and 2699 * setsubmod so the flush recognizes that it must update 2700 * the bref in the parent. 2701 */ 2702 if ((chain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 2703 hammer2_chain_ref(chain); 2704 atomic_set_int(&chain->flags, 2705 HAMMER2_CHAIN_FLUSH_CREATE); 2706 } 2707 } 2708 hammer2_chain_setsubmod(trans, chain); 2709 2710 done: 2711 *chainp = chain; 2712 2713 return (error); 2714 } 2715 2716 /* 2717 * Replace (*chainp) with a duplicate in-memory chain structure which shares 2718 * the same core and media state as the orignal. The original *chainp is 2719 * unlocked and the replacement will be returned locked. The duplicated 2720 * chain is inserted under (*parentp). 2721 * 2722 * THE CALLER MUST HAVE ALREADY PROPERLY SEEKED (*parentp) TO THE INSERTION 2723 * POINT SANS ANY REQUIRED INDIRECT BLOCK CREATIONS DUE TO THE ARRAY BEING 2724 * FULL. This typically means that the caller is creating the chain after 2725 * doing a hammer2_chain_lookup(). 2726 * 2727 * The old chain must be in a DELETED state unless snapshot is non-zero. 2728 * 2729 * The new chain will be live (i.e. not deleted), and modified. 2730 * 2731 * If (parent) is non-NULL then the new duplicated chain is inserted under 2732 * the parent. 2733 * 2734 * If (parent) is NULL then the newly duplicated chain is not inserted 2735 * anywhere, similar to if it had just been chain_alloc()'d (suitable for 2736 * passing into hammer2_chain_create() after this function returns). 2737 * 2738 * WARNING! This function cannot take snapshots all by itself. The caller 2739 * needs to do other massaging for snapshots. 2740 * 2741 * WARNING! This function calls create which means it can insert indirect 2742 * blocks. Callers may have to refactor locked chains held across 2743 * the call (other than the ones passed into the call). 2744 */ 2745 void 2746 hammer2_chain_duplicate(hammer2_trans_t *trans, hammer2_chain_t **parentp, 2747 hammer2_chain_t **chainp, hammer2_blockref_t *bref, 2748 int snapshot, int duplicate_reason) 2749 { 2750 hammer2_mount_t *hmp; 2751 hammer2_chain_t *parent; 2752 hammer2_chain_t *ochain; 2753 hammer2_chain_t *nchain; 2754 hammer2_chain_core_t *above; 2755 size_t bytes; 2756 2757 /* 2758 * We want nchain to be our go-to live chain, but ochain may be in 2759 * a MODIFIED state within the current flush synchronization segment. 2760 * Force any further modifications of ochain to do another COW 2761 * operation even if modify_tid indicates that one is not needed. 2762 * 2763 * We don't want to set FORCECOW on nchain simply as an optimization, 2764 * as many duplication calls simply move chains into ichains and 2765 * then delete the original. 2766 * 2767 * WARNING! We should never resolve DATA to device buffers 2768 * (XXX allow it if the caller did?), and since 2769 * we currently do not have the logical buffer cache 2770 * buffer in-hand to fix its cached physical offset 2771 * we also force the modify code to not COW it. XXX 2772 */ 2773 ochain = *chainp; 2774 hmp = ochain->hmp; 2775 KKASSERT(snapshot == 1 || (ochain->flags & HAMMER2_CHAIN_DELETED)); 2776 2777 /* 2778 * Now create a duplicate of the chain structure, associating 2779 * it with the same core, making it the same size, pointing it 2780 * to the same bref (the same media block). 2781 * 2782 * Give nchain the same modify_tid that we previously ensured was 2783 * sufficiently advanced to trigger a block table insertion on flush. 2784 * 2785 * nchain copies ochain's data and must inherit ochain->update_lo. 2786 * 2787 * NOTE: bref.mirror_tid duplicated by virtue of bref copy in 2788 * hammer2_chain_alloc() 2789 */ 2790 if (bref == NULL) 2791 bref = &ochain->bref; 2792 if (snapshot) { 2793 nchain = hammer2_chain_alloc(hmp, NULL, trans, bref); 2794 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_SNAPSHOT); 2795 } else { 2796 nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, bref); 2797 } 2798 hammer2_chain_core_alloc(trans, nchain, ochain); 2799 bytes = (hammer2_off_t)1 << 2800 (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 2801 nchain->bytes = bytes; 2802 nchain->modify_tid = ochain->modify_tid; 2803 nchain->update_lo = ochain->update_lo; 2804 nchain->inode_reason = ochain->inode_reason + 0x100000; 2805 atomic_set_int(&nchain->flags, 2806 ochain->flags & (HAMMER2_CHAIN_INITIAL | 2807 HAMMER2_CHAIN_FORCECOW | 2808 HAMMER2_CHAIN_UNLINKED)); 2809 if (ochain->modify_tid == trans->sync_tid) 2810 atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); 2811 2812 /* 2813 * Switch from ochain to nchain 2814 */ 2815 hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER | 2816 HAMMER2_RESOLVE_NOREF); 2817 /* nchain has 1 ref */ 2818 hammer2_chain_unlock(ochain); 2819 2820 /* 2821 * Place nchain in the modified state, instantiate media data 2822 * if necessary. Because modify_tid is already completely 2823 * synchronized this should not result in a delete-duplicate. 2824 * 2825 * We want nchain at the target to look like a new insertion. 2826 * Forcing the modification to be INPLACE accomplishes this 2827 * because we get the same nchain with an updated modify_tid. 2828 */ 2829 if (nchain->bref.type == HAMMER2_BREF_TYPE_DATA) { 2830 hammer2_chain_modify(trans, &nchain, 2831 HAMMER2_MODIFY_OPTDATA | 2832 HAMMER2_MODIFY_NOREALLOC | 2833 HAMMER2_MODIFY_INPLACE); 2834 } else if (nchain->flags & HAMMER2_CHAIN_INITIAL) { 2835 hammer2_chain_modify(trans, &nchain, 2836 HAMMER2_MODIFY_OPTDATA | 2837 HAMMER2_MODIFY_INPLACE); 2838 } else { 2839 hammer2_chain_modify(trans, &nchain, 2840 HAMMER2_MODIFY_INPLACE); 2841 } 2842 2843 /* 2844 * If parent is not NULL the duplicated chain will be entered under 2845 * the parent and the FLUSH_CREATE bit set to tell flush to update 2846 * the blockref. 2847 * 2848 * Having both chains locked is extremely important for atomicy. 2849 */ 2850 if (parentp && (parent = *parentp) != NULL) { 2851 above = parent->core; 2852 KKASSERT(ccms_thread_lock_owned(&above->cst)); 2853 KKASSERT((nchain->flags & HAMMER2_CHAIN_DELETED) == 0); 2854 KKASSERT(parent->refs > 0); 2855 2856 hammer2_chain_create(trans, parentp, &nchain, 2857 nchain->bref.key, nchain->bref.keybits, 2858 nchain->bref.type, nchain->bytes); 2859 parent = NULL; 2860 2861 KKASSERT(nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE); 2862 hammer2_chain_setsubmod(trans, nchain); 2863 } 2864 2865 *chainp = nchain; 2866 } 2867 2868 /* 2869 * Helper function for deleting chains. 2870 * 2871 * The chain is removed from the live view (the RBTREE). 2872 * 2873 * If appropriate, the chain is added to the shadow topology and FLUSH_DELETE 2874 * is set for flusher visbility. The caller is responsible for calling 2875 * setsubmod on chain, so we do not adjust update_hi here. 2876 */ 2877 static void 2878 _hammer2_chain_delete_helper(hammer2_trans_t *trans, 2879 hammer2_chain_core_t *above, 2880 hammer2_chain_t *chain) 2881 { 2882 hammer2_mount_t *hmp; 2883 hammer2_chain_t *xchain; 2884 2885 KKASSERT(chain->flags & HAMMER2_CHAIN_ONRBTREE); 2886 KKASSERT(trans->sync_tid >= chain->modify_tid); 2887 KKASSERT((chain->flags & (HAMMER2_CHAIN_DELETED | 2888 HAMMER2_CHAIN_ONDBQ | 2889 HAMMER2_CHAIN_ONDBTREE | 2890 HAMMER2_CHAIN_FLUSH_DELETE)) == 0); 2891 2892 /* 2893 * Flag as deleted, reduce live_count and bump the above core's 2894 * generation. 2895 */ 2896 chain->delete_tid = trans->sync_tid; 2897 atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELETED); 2898 atomic_add_int(&above->live_count, -1); 2899 ++above->generation; 2900 hmp = chain->hmp; 2901 2902 /* 2903 * Remove from live tree 2904 */ 2905 RB_REMOVE(hammer2_chain_tree, &above->rbtree, chain); 2906 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONRBTREE); 2907 2908 if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 2909 /* 2910 * If the chain was originally bmapped we must place on the 2911 * deleted tree and set FLUSH_DELETE (+ref) to prevent 2912 * destruction of the chain until the flush can reconcile 2913 * the parent's block table. 2914 * 2915 * NOTE! DBTREE is only representitive of the live view, 2916 * the flush must check both DBTREE and DBQ. 2917 */ 2918 xchain = RB_INSERT(hammer2_chain_tree, &above->dbtree, chain); 2919 KKASSERT(xchain == NULL); 2920 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBTREE); 2921 2922 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 2923 hammer2_chain_ref(chain); 2924 } else { 2925 /* 2926 * If the chain no longer (and never had) an actual blockmap 2927 * entry we must place it on the dbq list and set FLUSH_DELETE 2928 * (+ref) to prevent destruction of the chain until the flush 2929 * can reconcile the parent's block table. 2930 * 2931 * NOTE! DBTREE is only representitive of the live view, 2932 * the flush must check both DBTREE and DBQ. 2933 */ 2934 TAILQ_INSERT_TAIL(&above->dbq, chain, db_entry); 2935 atomic_set_int(&chain->flags, HAMMER2_CHAIN_ONDBQ); 2936 2937 atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 2938 hammer2_chain_ref(chain); 2939 } 2940 } 2941 2942 /* 2943 * Special in-place delete-duplicate sequence which does not require a 2944 * locked parent. (*chainp) is marked DELETED and atomically replaced 2945 * with a duplicate. Atomicy is at the very-fine spin-lock level in 2946 * order to ensure that lookups do not race us. 2947 * 2948 * The flush code will sometimes call this function with a deleted chain. 2949 * In this situation the old chain's memory is reallocated without 2950 * duplicating it. 2951 * 2952 * The new chain will be marked modified for the current transaction. 2953 */ 2954 void 2955 hammer2_chain_delete_duplicate(hammer2_trans_t *trans, hammer2_chain_t **chainp, 2956 int flags) 2957 { 2958 hammer2_mount_t *hmp; 2959 hammer2_chain_t *ochain; 2960 hammer2_chain_t *nchain; 2961 hammer2_chain_core_t *above; 2962 size_t bytes; 2963 2964 if (hammer2_debug & 0x20000) 2965 Debugger("dd"); 2966 2967 /* 2968 * Note that we do not have to call setsubmod on ochain, calling it 2969 * on nchain is sufficient. 2970 */ 2971 ochain = *chainp; 2972 hmp = ochain->hmp; 2973 2974 if (ochain->bref.type == HAMMER2_BREF_TYPE_INODE) { 2975 KKASSERT(ochain->data); 2976 } 2977 2978 /* 2979 * First create a duplicate of the chain structure. 2980 * (nchain is allocated with one ref). 2981 * 2982 * In the case where nchain inherits ochains core, nchain is 2983 * effectively locked due to ochain being locked (and sharing the 2984 * core), until we can give nchain its own official ock. 2985 * 2986 * WARNING! Flusher concurrency can create two cases. The first is 2987 * that the flusher might be working on a chain that has 2988 * been deleted in the live view but is live in the flusher's 2989 * view. In the second case the flusher may be duplicating 2990 * a forward-transacted chain. In both situations nchain 2991 * must be marked deleted. 2992 * 2993 * WARNING! hammer2_chain_core_alloc() also acts on these issues. 2994 */ 2995 nchain = hammer2_chain_alloc(hmp, ochain->pmp, trans, &ochain->bref); 2996 if ((ochain->flags & HAMMER2_CHAIN_DELETED) || 2997 (ochain->modify_tid > trans->sync_tid)) { 2998 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_DELETED); 2999 } 3000 if (flags & HAMMER2_DELDUP_RECORE) 3001 hammer2_chain_core_alloc(trans, nchain, NULL); 3002 else 3003 hammer2_chain_core_alloc(trans, nchain, ochain); 3004 above = ochain->above; 3005 3006 bytes = (hammer2_off_t)1 << 3007 (int)(ochain->bref.data_off & HAMMER2_OFF_MASK_RADIX); 3008 nchain->bytes = bytes; 3009 3010 /* 3011 * nchain inherits ochain's live state including its modification 3012 * state. This function disposes of the original. Because we are 3013 * doing this in-place under the same parent the block array 3014 * inserted/deleted state does not change. 3015 * 3016 * nchain copies ochain's data and must inherit ochain->update_lo. 3017 * 3018 * If ochain was previously marked FORCECOW we also flag nchain 3019 * FORCECOW (used during hardlink splits). FORCECOW forces a 3020 * reallocation of the block when we modify the chain a little later, 3021 * it does not force another delete-duplicate. 3022 * 3023 * NOTE: bref.mirror_tid duplicated by virtue of bref copy in 3024 * hammer2_chain_alloc() 3025 */ 3026 nchain->data_count += ochain->data_count; 3027 nchain->inode_count += ochain->inode_count; 3028 atomic_set_int(&nchain->flags, 3029 ochain->flags & (HAMMER2_CHAIN_INITIAL | 3030 HAMMER2_CHAIN_FORCECOW | 3031 HAMMER2_CHAIN_UNLINKED)); 3032 if (ochain->modify_tid == trans->sync_tid) 3033 atomic_set_int(&ochain->flags, HAMMER2_CHAIN_FORCECOW); 3034 nchain->inode_reason = ochain->inode_reason + 0x1000; 3035 nchain->update_lo = ochain->update_lo; 3036 nchain->dsrc = ochain->bref; /* DEBUG */ 3037 nchain->dsrc_dupfromat = trans->sync_tid; /* DEBUG */ 3038 nchain->dsrc_dupfromflags = trans->flags; /* DEBUG */ 3039 nchain->dsrc_reason = ochain->inode_reason; /* DEBUG */ 3040 nchain->dsrc_ninserts = ochain->ninserts; /* DEBUG */ 3041 nchain->dsrc_flags = ochain->flags; /* DEBUG */ 3042 nchain->dsrc_modify = ochain->modify_tid; /* DEBUG */ 3043 nchain->dsrc_delete = ochain->delete_tid; /* DEBUG */ 3044 nchain->dsrc_update_lo = ochain->update_lo; /* DEBUG */ 3045 nchain->dsrc_original = ochain; /* DEBUG */ 3046 3047 /* 3048 * Lock nchain so both chains are now locked (extremely important 3049 * for atomicy). The shared core allows us to unlock ochain without 3050 * actually unlocking ochain. 3051 */ 3052 hammer2_chain_lock(nchain, HAMMER2_RESOLVE_NEVER); 3053 /* extra ref still present from original allocation */ 3054 3055 KKASSERT(ochain->flags & (HAMMER2_CHAIN_ONRBTREE | 3056 HAMMER2_CHAIN_ONDBTREE | 3057 HAMMER2_CHAIN_ONDBQ)); 3058 spin_lock(&above->cst.spin); 3059 3060 nchain->modify_tid = ochain->modify_tid; 3061 nchain->delete_tid = HAMMER2_MAX_TID; 3062 3063 if (nchain->flags & HAMMER2_CHAIN_DELETED) { 3064 /* 3065 * Special case, used by the flush code only in two cases: 3066 * 3067 * (1) The flush must operate on a chain that is visible to 3068 * the flush but deleted in the live view. 3069 * 3070 * (2) The flush must operate on a forward-indexed chain in 3071 * the live view, which typically 3072 * 3073 * In these situations nchain will be marked deleted and 3074 * insert before ochain. nchain must inherit certain features 3075 * of ochain such as the BMAPPED state. 3076 */ 3077 KKASSERT(trans->flags & HAMMER2_TRANS_ISFLUSH); 3078 KKASSERT(ochain->modify_tid < trans->sync_tid); 3079 KKASSERT(ochain->delete_tid > trans->sync_tid); 3080 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_TEMPORARY); 3081 hammer2_chain_insert(above, ochain, nchain, 0, 0); 3082 3083 if ((ochain->flags & HAMMER2_CHAIN_DELETED) && 3084 ochain->modify_tid < trans->sync_tid) { 3085 nchain->delete_tid = ochain->delete_tid; 3086 ochain->delete_tid = trans->sync_tid; 3087 } else if (ochain->modify_tid > trans->sync_tid) { 3088 nchain->delete_tid = ochain->modify_tid; 3089 } 3090 } else { 3091 /* 3092 * Normal case, delete-duplicate deletes ochain and nchain 3093 * is the new live chain. 3094 */ 3095 _hammer2_chain_delete_helper(trans, above, ochain); 3096 hammer2_chain_insert(above, ochain, nchain, 3097 HAMMER2_CHAIN_INSERT_LIVE, 0); 3098 } 3099 spin_unlock(&above->cst.spin); 3100 3101 /* 3102 * ochain must be unlocked because ochain and nchain might share 3103 * a buffer cache buffer, so we need to release it so nchain can 3104 * potentially obtain it. 3105 */ 3106 hammer2_chain_setsubmod(trans, ochain); 3107 hammer2_chain_unlock(ochain); 3108 3109 /* 3110 * Finishing fixing up nchain. A new block will be allocated if 3111 * crossing a synchronization point (meta-data only). 3112 * 3113 * Calling hammer2_chain_modify() will update modify_tid to 3114 * (typically) trans->sync_tid. 3115 */ 3116 if (nchain->bref.type == HAMMER2_BREF_TYPE_DATA) { 3117 hammer2_chain_modify(trans, &nchain, 3118 HAMMER2_MODIFY_OPTDATA | 3119 HAMMER2_MODIFY_NOREALLOC | 3120 HAMMER2_MODIFY_INPLACE); 3121 } else if (nchain->flags & HAMMER2_CHAIN_INITIAL) { 3122 hammer2_chain_modify(trans, &nchain, 3123 HAMMER2_MODIFY_OPTDATA | 3124 HAMMER2_MODIFY_INPLACE); 3125 } else { 3126 hammer2_chain_modify(trans, &nchain, 3127 HAMMER2_MODIFY_INPLACE); 3128 } 3129 hammer2_chain_drop(nchain); 3130 3131 /* 3132 * Unconditionally set FLUSH_CREATE to force the parent blockrefs to 3133 * update as the chain_modify() above won't necessarily do it. 3134 */ 3135 if ((nchain->flags & HAMMER2_CHAIN_FLUSH_CREATE) == 0) { 3136 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_CREATE); 3137 hammer2_chain_ref(nchain); 3138 } 3139 3140 /* 3141 * If nchain is in a DELETED state we must set FLUSH_DELETE 3142 */ 3143 if (nchain->flags & HAMMER2_CHAIN_DELETED) 3144 KKASSERT((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0); 3145 #if 1 3146 if ((nchain->flags & HAMMER2_CHAIN_FLUSH_DELETE) == 0 && 3147 (nchain->flags & HAMMER2_CHAIN_DELETED)) { 3148 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_FLUSH_DELETE); 3149 hammer2_chain_ref(nchain); 3150 } 3151 #endif 3152 hammer2_chain_setsubmod(trans, nchain); 3153 *chainp = nchain; 3154 } 3155 3156 /* 3157 * Create a snapshot of the specified {parent, ochain} with the specified 3158 * label. The originating hammer2_inode must be exclusively locked for 3159 * safety. 3160 * 3161 * The ioctl code has already synced the filesystem. 3162 */ 3163 int 3164 hammer2_chain_snapshot(hammer2_trans_t *trans, hammer2_chain_t **ochainp, 3165 hammer2_ioc_pfs_t *pfs) 3166 { 3167 hammer2_mount_t *hmp; 3168 hammer2_chain_t *ochain = *ochainp; 3169 hammer2_chain_t *nchain; 3170 hammer2_inode_data_t *ipdata; 3171 hammer2_inode_t *nip; 3172 size_t name_len; 3173 hammer2_key_t lhc; 3174 struct vattr vat; 3175 uuid_t opfs_clid; 3176 int error; 3177 3178 kprintf("snapshot %s ochain->refs %d ochain->flags %08x\n", 3179 pfs->name, ochain->refs, ochain->flags); 3180 3181 name_len = strlen(pfs->name); 3182 lhc = hammer2_dirhash(pfs->name, name_len); 3183 3184 hmp = ochain->hmp; 3185 opfs_clid = ochain->data->ipdata.pfs_clid; 3186 3187 *ochainp = ochain; 3188 3189 /* 3190 * Create the snapshot directory under the super-root 3191 * 3192 * Set PFS type, generate a unique filesystem id, and generate 3193 * a cluster id. Use the same clid when snapshotting a PFS root, 3194 * which theoretically allows the snapshot to be used as part of 3195 * the same cluster (perhaps as a cache). 3196 * 3197 * Copy the (flushed) ochain's blockref array. Theoretically we 3198 * could use chain_duplicate() but it becomes difficult to disentangle 3199 * the shared core so for now just brute-force it. 3200 */ 3201 VATTR_NULL(&vat); 3202 vat.va_type = VDIR; 3203 vat.va_mode = 0755; 3204 nchain = NULL; 3205 nip = hammer2_inode_create(trans, hmp->sroot, &vat, proc0.p_ucred, 3206 pfs->name, name_len, &nchain, &error); 3207 3208 if (nip) { 3209 ipdata = hammer2_chain_modify_ip(trans, nip, &nchain, 0); 3210 ipdata->pfs_type = HAMMER2_PFSTYPE_SNAPSHOT; 3211 kern_uuidgen(&ipdata->pfs_fsid, 1); 3212 if (ochain->flags & HAMMER2_CHAIN_PFSROOT) 3213 ipdata->pfs_clid = opfs_clid; 3214 else 3215 kern_uuidgen(&ipdata->pfs_clid, 1); 3216 atomic_set_int(&nchain->flags, HAMMER2_CHAIN_PFSROOT); 3217 ipdata->u.blockset = ochain->data->ipdata.u.blockset; 3218 3219 hammer2_inode_unlock_ex(nip, nchain); 3220 } 3221 return (error); 3222 } 3223 3224 /* 3225 * Create an indirect block that covers one or more of the elements in the 3226 * current parent. Either returns the existing parent with no locking or 3227 * ref changes or returns the new indirect block locked and referenced 3228 * and leaving the original parent lock/ref intact as well. 3229 * 3230 * If an error occurs, NULL is returned and *errorp is set to the error. 3231 * 3232 * The returned chain depends on where the specified key falls. 3233 * 3234 * The key/keybits for the indirect mode only needs to follow three rules: 3235 * 3236 * (1) That all elements underneath it fit within its key space and 3237 * 3238 * (2) That all elements outside it are outside its key space. 3239 * 3240 * (3) When creating the new indirect block any elements in the current 3241 * parent that fit within the new indirect block's keyspace must be 3242 * moved into the new indirect block. 3243 * 3244 * (4) The keyspace chosen for the inserted indirect block CAN cover a wider 3245 * keyspace the the current parent, but lookup/iteration rules will 3246 * ensure (and must ensure) that rule (2) for all parents leading up 3247 * to the nearest inode or the root volume header is adhered to. This 3248 * is accomplished by always recursing through matching keyspaces in 3249 * the hammer2_chain_lookup() and hammer2_chain_next() API. 3250 * 3251 * The current implementation calculates the current worst-case keyspace by 3252 * iterating the current parent and then divides it into two halves, choosing 3253 * whichever half has the most elements (not necessarily the half containing 3254 * the requested key). 3255 * 3256 * We can also opt to use the half with the least number of elements. This 3257 * causes lower-numbered keys (aka logical file offsets) to recurse through 3258 * fewer indirect blocks and higher-numbered keys to recurse through more. 3259 * This also has the risk of not moving enough elements to the new indirect 3260 * block and being forced to create several indirect blocks before the element 3261 * can be inserted. 3262 * 3263 * Must be called with an exclusively locked parent. 3264 */ 3265 static int hammer2_chain_indkey_freemap(hammer2_chain_t *parent, 3266 hammer2_key_t *keyp, int keybits, 3267 hammer2_blockref_t *base, int count); 3268 static int hammer2_chain_indkey_normal(hammer2_chain_t *parent, 3269 hammer2_key_t *keyp, int keybits, 3270 hammer2_blockref_t *base, int count); 3271 static 3272 hammer2_chain_t * 3273 hammer2_chain_create_indirect(hammer2_trans_t *trans, hammer2_chain_t *parent, 3274 hammer2_key_t create_key, int create_bits, 3275 int for_type, int *errorp) 3276 { 3277 hammer2_mount_t *hmp; 3278 hammer2_chain_core_t *above; 3279 hammer2_chain_core_t *icore; 3280 hammer2_blockref_t *base; 3281 hammer2_blockref_t *bref; 3282 hammer2_blockref_t bcopy; 3283 hammer2_chain_t *chain; 3284 hammer2_chain_t *ichain; 3285 hammer2_chain_t dummy; 3286 hammer2_key_t key = create_key; 3287 hammer2_key_t key_beg; 3288 hammer2_key_t key_end; 3289 hammer2_key_t key_next; 3290 int keybits = create_bits; 3291 int count; 3292 int nbytes; 3293 int cache_index; 3294 int loops; 3295 int reason; 3296 int generation; 3297 int maxloops = 300000; 3298 int retry_same; 3299 int wasdup; 3300 3301 /* 3302 * Calculate the base blockref pointer or NULL if the chain 3303 * is known to be empty. We need to calculate the array count 3304 * for RB lookups either way. 3305 */ 3306 hmp = parent->hmp; 3307 *errorp = 0; 3308 KKASSERT(ccms_thread_lock_owned(&parent->core->cst)); 3309 above = parent->core; 3310 3311 /*hammer2_chain_modify(trans, &parent, HAMMER2_MODIFY_OPTDATA);*/ 3312 if (parent->flags & HAMMER2_CHAIN_INITIAL) { 3313 base = NULL; 3314 3315 switch(parent->bref.type) { 3316 case HAMMER2_BREF_TYPE_INODE: 3317 count = HAMMER2_SET_COUNT; 3318 break; 3319 case HAMMER2_BREF_TYPE_INDIRECT: 3320 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3321 count = parent->bytes / sizeof(hammer2_blockref_t); 3322 break; 3323 case HAMMER2_BREF_TYPE_VOLUME: 3324 count = HAMMER2_SET_COUNT; 3325 break; 3326 case HAMMER2_BREF_TYPE_FREEMAP: 3327 count = HAMMER2_SET_COUNT; 3328 break; 3329 default: 3330 panic("hammer2_chain_create_indirect: " 3331 "unrecognized blockref type: %d", 3332 parent->bref.type); 3333 count = 0; 3334 break; 3335 } 3336 } else { 3337 switch(parent->bref.type) { 3338 case HAMMER2_BREF_TYPE_INODE: 3339 base = &parent->data->ipdata.u.blockset.blockref[0]; 3340 count = HAMMER2_SET_COUNT; 3341 break; 3342 case HAMMER2_BREF_TYPE_INDIRECT: 3343 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 3344 base = &parent->data->npdata[0]; 3345 count = parent->bytes / sizeof(hammer2_blockref_t); 3346 break; 3347 case HAMMER2_BREF_TYPE_VOLUME: 3348 base = &hmp->voldata.sroot_blockset.blockref[0]; 3349 count = HAMMER2_SET_COUNT; 3350 break; 3351 case HAMMER2_BREF_TYPE_FREEMAP: 3352 base = &hmp->voldata.freemap_blockset.blockref[0]; 3353 count = HAMMER2_SET_COUNT; 3354 break; 3355 default: 3356 panic("hammer2_chain_create_indirect: " 3357 "unrecognized blockref type: %d", 3358 parent->bref.type); 3359 count = 0; 3360 break; 3361 } 3362 } 3363 3364 /* 3365 * dummy used in later chain allocation (no longer used for lookups). 3366 */ 3367 bzero(&dummy, sizeof(dummy)); 3368 dummy.delete_tid = HAMMER2_MAX_TID; 3369 3370 /* 3371 * When creating an indirect block for a freemap node or leaf 3372 * the key/keybits must be fitted to static radix levels because 3373 * particular radix levels use particular reserved blocks in the 3374 * related zone. 3375 * 3376 * This routine calculates the key/radix of the indirect block 3377 * we need to create, and whether it is on the high-side or the 3378 * low-side. 3379 */ 3380 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3381 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3382 keybits = hammer2_chain_indkey_freemap(parent, &key, keybits, 3383 base, count); 3384 } else { 3385 keybits = hammer2_chain_indkey_normal(parent, &key, keybits, 3386 base, count); 3387 } 3388 3389 /* 3390 * Normalize the key for the radix being represented, keeping the 3391 * high bits and throwing away the low bits. 3392 */ 3393 key &= ~(((hammer2_key_t)1 << keybits) - 1); 3394 3395 /* 3396 * How big should our new indirect block be? It has to be at least 3397 * as large as its parent. 3398 */ 3399 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) 3400 nbytes = HAMMER2_IND_BYTES_MIN; 3401 else 3402 nbytes = HAMMER2_IND_BYTES_MAX; 3403 if (nbytes < count * sizeof(hammer2_blockref_t)) 3404 nbytes = count * sizeof(hammer2_blockref_t); 3405 3406 /* 3407 * Ok, create our new indirect block 3408 */ 3409 if (for_type == HAMMER2_BREF_TYPE_FREEMAP_NODE || 3410 for_type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) { 3411 dummy.bref.type = HAMMER2_BREF_TYPE_FREEMAP_NODE; 3412 } else { 3413 dummy.bref.type = HAMMER2_BREF_TYPE_INDIRECT; 3414 } 3415 dummy.bref.key = key; 3416 dummy.bref.keybits = keybits; 3417 dummy.bref.data_off = hammer2_getradix(nbytes); 3418 dummy.bref.methods = parent->bref.methods; 3419 3420 ichain = hammer2_chain_alloc(hmp, parent->pmp, trans, &dummy.bref); 3421 atomic_set_int(&ichain->flags, HAMMER2_CHAIN_INITIAL); 3422 hammer2_chain_core_alloc(trans, ichain, NULL); 3423 icore = ichain->core; 3424 hammer2_chain_lock(ichain, HAMMER2_RESOLVE_MAYBE); 3425 hammer2_chain_drop(ichain); /* excess ref from alloc */ 3426 3427 /* 3428 * We have to mark it modified to allocate its block, but use 3429 * OPTDATA to allow it to remain in the INITIAL state. Otherwise 3430 * it won't be acted upon by the flush code. 3431 */ 3432 hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA); 3433 3434 /* 3435 * Iterate the original parent and move the matching brefs into 3436 * the new indirect block. 3437 * 3438 * XXX handle flushes. 3439 */ 3440 key_beg = 0; 3441 key_end = HAMMER2_MAX_KEY; 3442 cache_index = 0; 3443 spin_lock(&above->cst.spin); 3444 loops = 0; 3445 reason = 0; 3446 retry_same = 0; 3447 3448 for (;;) { 3449 if (++loops > 100000) { 3450 spin_unlock(&above->cst.spin); 3451 panic("excessive loops r=%d p=%p base/count %p:%d %016jx\n", 3452 reason, parent, base, count, key_next); 3453 } 3454 3455 /* 3456 * NOTE: spinlock stays intact, returned chain (if not NULL) 3457 * is not referenced or locked which means that we 3458 * cannot safely check its flagged / deletion status 3459 * until we lock it. 3460 */ 3461 chain = hammer2_combined_find(parent, base, count, 3462 &cache_index, &key_next, 3463 key_beg, key_end, 3464 &bref); 3465 generation = above->generation; 3466 if (bref == NULL) 3467 break; 3468 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3469 3470 /* 3471 * Skip keys that are not within the key/radix of the new 3472 * indirect block. They stay in the parent. 3473 */ 3474 if ((~(((hammer2_key_t)1 << keybits) - 1) & 3475 (key ^ bref->key)) != 0) { 3476 goto next_key_spinlocked; 3477 } 3478 3479 /* 3480 * Load the new indirect block by acquiring the related 3481 * chains (potentially from media as it might not be 3482 * in-memory). Then move it to the new parent (ichain) 3483 * via DELETE-DUPLICATE. 3484 * 3485 * chain is referenced but not locked. We must lock the 3486 * chain to obtain definitive DUPLICATED/DELETED state 3487 */ 3488 if (chain) { 3489 /* 3490 * Use chain already present in the RBTREE 3491 */ 3492 hammer2_chain_ref(chain); 3493 wasdup = ((chain->flags & 3494 HAMMER2_CHAIN_DUPLICATED) != 0); 3495 spin_unlock(&above->cst.spin); 3496 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER | 3497 HAMMER2_RESOLVE_NOREF); 3498 } else { 3499 /* 3500 * Get chain for blockref element. _get returns NULL 3501 * on insertion race. 3502 */ 3503 bcopy = *bref; 3504 spin_unlock(&above->cst.spin); 3505 chain = hammer2_chain_get(parent, generation, &bcopy); 3506 if (chain == NULL) { 3507 reason = 1; 3508 spin_lock(&above->cst.spin); 3509 continue; 3510 } 3511 if (bcmp(&bcopy, bref, sizeof(bcopy))) { 3512 reason = 2; 3513 hammer2_chain_drop(chain); 3514 spin_lock(&above->cst.spin); 3515 continue; 3516 } 3517 hammer2_chain_lock(chain, HAMMER2_RESOLVE_NEVER | 3518 HAMMER2_RESOLVE_NOREF); 3519 wasdup = 0; 3520 } 3521 3522 /* 3523 * This is always live so if the chain has been delete- 3524 * duplicated we raced someone and we have to retry. 3525 * 3526 * NOTE: Lookups can race delete-duplicate because 3527 * delete-duplicate does not lock the parent's core 3528 * (they just use the spinlock on the core). We must 3529 * check for races by comparing the DUPLICATED flag before 3530 * releasing the spinlock with the flag after locking the 3531 * chain. 3532 * 3533 * (note reversed logic for this one) 3534 */ 3535 if (chain->flags & HAMMER2_CHAIN_DELETED) { 3536 hammer2_chain_unlock(chain); 3537 if ((chain->flags & HAMMER2_CHAIN_DUPLICATED) && 3538 wasdup == 0) { 3539 retry_same = 1; 3540 } 3541 goto next_key; 3542 } 3543 3544 /* 3545 * Shift the chain to the indirect block. 3546 * 3547 * WARNING! Can cause held-over chains to require a refactor. 3548 * Fortunately we have none (our locked chains are 3549 * passed into and modified by the call). 3550 */ 3551 hammer2_chain_delete(trans, chain, 0); 3552 hammer2_chain_duplicate(trans, &ichain, &chain, NULL, 0, 1); 3553 hammer2_chain_unlock(chain); 3554 KKASSERT(parent->refs > 0); 3555 chain = NULL; 3556 next_key: 3557 spin_lock(&above->cst.spin); 3558 next_key_spinlocked: 3559 if (--maxloops == 0) 3560 panic("hammer2_chain_create_indirect: maxloops"); 3561 reason = 4; 3562 if (retry_same == 0) { 3563 if (key_next == 0 || key_next > key_end) 3564 break; 3565 key_beg = key_next; 3566 } 3567 /* loop */ 3568 } 3569 spin_unlock(&above->cst.spin); 3570 3571 /* 3572 * Insert the new indirect block into the parent now that we've 3573 * cleared out some entries in the parent. We calculated a good 3574 * insertion index in the loop above (ichain->index). 3575 * 3576 * We don't have to set FLUSH_CREATE here because we mark ichain 3577 * modified down below (so the normal modified -> flush -> set-moved 3578 * sequence applies). 3579 * 3580 * The insertion shouldn't race as this is a completely new block 3581 * and the parent is locked. 3582 */ 3583 KKASSERT((ichain->flags & HAMMER2_CHAIN_ONRBTREE) == 0); 3584 hammer2_chain_insert(above, NULL, ichain, 3585 HAMMER2_CHAIN_INSERT_SPIN | 3586 HAMMER2_CHAIN_INSERT_LIVE, 3587 0); 3588 3589 /* 3590 * Mark the new indirect block modified after insertion, which 3591 * will propagate up through parent all the way to the root and 3592 * also allocate the physical block in ichain for our caller, 3593 * and assign ichain->data to a pre-zero'd space (because there 3594 * is not prior data to copy into it). 3595 */ 3596 /*hammer2_chain_modify(trans, &ichain, HAMMER2_MODIFY_OPTDATA);*/ 3597 hammer2_chain_setsubmod(trans, ichain); 3598 3599 /* 3600 * Figure out what to return. 3601 */ 3602 if (~(((hammer2_key_t)1 << keybits) - 1) & 3603 (create_key ^ key)) { 3604 /* 3605 * Key being created is outside the key range, 3606 * return the original parent. 3607 */ 3608 hammer2_chain_unlock(ichain); 3609 } else { 3610 /* 3611 * Otherwise its in the range, return the new parent. 3612 * (leave both the new and old parent locked). 3613 */ 3614 parent = ichain; 3615 } 3616 3617 return(parent); 3618 } 3619 3620 /* 3621 * Calculate the keybits and highside/lowside of the freemap node the 3622 * caller is creating. 3623 * 3624 * This routine will specify the next higher-level freemap key/radix 3625 * representing the lowest-ordered set. By doing so, eventually all 3626 * low-ordered sets will be moved one level down. 3627 * 3628 * We have to be careful here because the freemap reserves a limited 3629 * number of blocks for a limited number of levels. So we can't just 3630 * push indiscriminately. 3631 */ 3632 int 3633 hammer2_chain_indkey_freemap(hammer2_chain_t *parent, hammer2_key_t *keyp, 3634 int keybits, hammer2_blockref_t *base, int count) 3635 { 3636 hammer2_chain_core_t *above; 3637 hammer2_chain_t *chain; 3638 hammer2_blockref_t *bref; 3639 hammer2_key_t key; 3640 hammer2_key_t key_beg; 3641 hammer2_key_t key_end; 3642 hammer2_key_t key_next; 3643 int cache_index; 3644 int locount; 3645 int hicount; 3646 int maxloops = 300000; 3647 3648 key = *keyp; 3649 above = parent->core; 3650 locount = 0; 3651 hicount = 0; 3652 keybits = 64; 3653 3654 /* 3655 * Calculate the range of keys in the array being careful to skip 3656 * slots which are overridden with a deletion. 3657 */ 3658 key_beg = 0; 3659 key_end = HAMMER2_MAX_KEY; 3660 cache_index = 0; 3661 spin_lock(&above->cst.spin); 3662 3663 for (;;) { 3664 if (--maxloops == 0) { 3665 panic("indkey_freemap shit %p %p:%d\n", 3666 parent, base, count); 3667 } 3668 chain = hammer2_combined_find(parent, base, count, 3669 &cache_index, &key_next, 3670 key_beg, key_end, 3671 &bref); 3672 3673 /* 3674 * Exhausted search 3675 */ 3676 if (bref == NULL) 3677 break; 3678 3679 /* 3680 * NOTE: No need to check DUPLICATED here because we do 3681 * not release the spinlock. 3682 */ 3683 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3684 if (key_next == 0 || key_next > key_end) 3685 break; 3686 key_beg = key_next; 3687 continue; 3688 } 3689 3690 /* 3691 * Use the full live (not deleted) element for the scan 3692 * iteration. HAMMER2 does not allow partial replacements. 3693 * 3694 * XXX should be built into hammer2_combined_find(). 3695 */ 3696 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3697 3698 if (keybits > bref->keybits) { 3699 key = bref->key; 3700 keybits = bref->keybits; 3701 } else if (keybits == bref->keybits && bref->key < key) { 3702 key = bref->key; 3703 } 3704 if (key_next == 0) 3705 break; 3706 key_beg = key_next; 3707 } 3708 spin_unlock(&above->cst.spin); 3709 3710 /* 3711 * Return the keybits for a higher-level FREEMAP_NODE covering 3712 * this node. 3713 */ 3714 switch(keybits) { 3715 case HAMMER2_FREEMAP_LEVEL0_RADIX: 3716 keybits = HAMMER2_FREEMAP_LEVEL1_RADIX; 3717 break; 3718 case HAMMER2_FREEMAP_LEVEL1_RADIX: 3719 keybits = HAMMER2_FREEMAP_LEVEL2_RADIX; 3720 break; 3721 case HAMMER2_FREEMAP_LEVEL2_RADIX: 3722 keybits = HAMMER2_FREEMAP_LEVEL3_RADIX; 3723 break; 3724 case HAMMER2_FREEMAP_LEVEL3_RADIX: 3725 keybits = HAMMER2_FREEMAP_LEVEL4_RADIX; 3726 break; 3727 case HAMMER2_FREEMAP_LEVEL4_RADIX: 3728 panic("hammer2_chain_indkey_freemap: level too high"); 3729 break; 3730 default: 3731 panic("hammer2_chain_indkey_freemap: bad radix"); 3732 break; 3733 } 3734 *keyp = key; 3735 3736 return (keybits); 3737 } 3738 3739 /* 3740 * Calculate the keybits and highside/lowside of the indirect block the 3741 * caller is creating. 3742 */ 3743 static int 3744 hammer2_chain_indkey_normal(hammer2_chain_t *parent, hammer2_key_t *keyp, 3745 int keybits, hammer2_blockref_t *base, int count) 3746 { 3747 hammer2_chain_core_t *above; 3748 hammer2_blockref_t *bref; 3749 hammer2_chain_t *chain; 3750 hammer2_key_t key_beg; 3751 hammer2_key_t key_end; 3752 hammer2_key_t key_next; 3753 hammer2_key_t key; 3754 int nkeybits; 3755 int locount; 3756 int hicount; 3757 int cache_index; 3758 int maxloops = 300000; 3759 3760 key = *keyp; 3761 above = parent->core; 3762 locount = 0; 3763 hicount = 0; 3764 3765 /* 3766 * Calculate the range of keys in the array being careful to skip 3767 * slots which are overridden with a deletion. Once the scan 3768 * completes we will cut the key range in half and shift half the 3769 * range into the new indirect block. 3770 */ 3771 key_beg = 0; 3772 key_end = HAMMER2_MAX_KEY; 3773 cache_index = 0; 3774 spin_lock(&above->cst.spin); 3775 3776 for (;;) { 3777 if (--maxloops == 0) { 3778 panic("indkey_freemap shit %p %p:%d\n", 3779 parent, base, count); 3780 } 3781 chain = hammer2_combined_find(parent, base, count, 3782 &cache_index, &key_next, 3783 key_beg, key_end, 3784 &bref); 3785 3786 /* 3787 * Exhausted search 3788 */ 3789 if (bref == NULL) 3790 break; 3791 3792 /* 3793 * NOTE: No need to check DUPLICATED here because we do 3794 * not release the spinlock. 3795 */ 3796 if (chain && (chain->flags & HAMMER2_CHAIN_DELETED)) { 3797 if (key_next == 0 || key_next > key_end) 3798 break; 3799 key_beg = key_next; 3800 continue; 3801 } 3802 3803 /* 3804 * Use the full live (not deleted) element for the scan 3805 * iteration. HAMMER2 does not allow partial replacements. 3806 * 3807 * XXX should be built into hammer2_combined_find(). 3808 */ 3809 key_next = bref->key + ((hammer2_key_t)1 << bref->keybits); 3810 3811 /* 3812 * Expand our calculated key range (key, keybits) to fit 3813 * the scanned key. nkeybits represents the full range 3814 * that we will later cut in half (two halves @ nkeybits - 1). 3815 */ 3816 nkeybits = keybits; 3817 if (nkeybits < bref->keybits) { 3818 if (bref->keybits > 64) { 3819 kprintf("bad bref chain %p bref %p\n", 3820 chain, bref); 3821 Debugger("fubar"); 3822 } 3823 nkeybits = bref->keybits; 3824 } 3825 while (nkeybits < 64 && 3826 (~(((hammer2_key_t)1 << nkeybits) - 1) & 3827 (key ^ bref->key)) != 0) { 3828 ++nkeybits; 3829 } 3830 3831 /* 3832 * If the new key range is larger we have to determine 3833 * which side of the new key range the existing keys fall 3834 * under by checking the high bit, then collapsing the 3835 * locount into the hicount or vise-versa. 3836 */ 3837 if (keybits != nkeybits) { 3838 if (((hammer2_key_t)1 << (nkeybits - 1)) & key) { 3839 hicount += locount; 3840 locount = 0; 3841 } else { 3842 locount += hicount; 3843 hicount = 0; 3844 } 3845 keybits = nkeybits; 3846 } 3847 3848 /* 3849 * The newly scanned key will be in the lower half or the 3850 * upper half of the (new) key range. 3851 */ 3852 if (((hammer2_key_t)1 << (nkeybits - 1)) & bref->key) 3853 ++hicount; 3854 else 3855 ++locount; 3856 3857 if (key_next == 0) 3858 break; 3859 key_beg = key_next; 3860 } 3861 spin_unlock(&above->cst.spin); 3862 bref = NULL; /* now invalid (safety) */ 3863 3864 /* 3865 * Adjust keybits to represent half of the full range calculated 3866 * above (radix 63 max) 3867 */ 3868 --keybits; 3869 3870 /* 3871 * Select whichever half contains the most elements. Theoretically 3872 * we can select either side as long as it contains at least one 3873 * element (in order to ensure that a free slot is present to hold 3874 * the indirect block). 3875 */ 3876 if (hammer2_indirect_optimize) { 3877 /* 3878 * Insert node for least number of keys, this will arrange 3879 * the first few blocks of a large file or the first few 3880 * inodes in a directory with fewer indirect blocks when 3881 * created linearly. 3882 */ 3883 if (hicount < locount && hicount != 0) 3884 key |= (hammer2_key_t)1 << keybits; 3885 else 3886 key &= ~(hammer2_key_t)1 << keybits; 3887 } else { 3888 /* 3889 * Insert node for most number of keys, best for heavily 3890 * fragmented files. 3891 */ 3892 if (hicount > locount) 3893 key |= (hammer2_key_t)1 << keybits; 3894 else 3895 key &= ~(hammer2_key_t)1 << keybits; 3896 } 3897 *keyp = key; 3898 3899 return (keybits); 3900 } 3901 3902 /* 3903 * Sets CHAIN_DELETED and CHAIN_FLUSH_DELETE in the chain being deleted and 3904 * set chain->delete_tid. The chain is not actually marked possibly-free 3905 * in the freemap until the deletion is completely flushed out (because 3906 * a flush which doesn't cover the entire deletion is flushing the deleted 3907 * chain as if it were live). 3908 * 3909 * This function does NOT generate a modification to the parent. It 3910 * would be nearly impossible to figure out which parent to modify anyway. 3911 * Such modifications are handled top-down by the flush code and are 3912 * properly merged using the flush synchronization point. 3913 * 3914 * The find/get code will properly overload the RBTREE check on top of 3915 * the bref check to detect deleted entries. 3916 * 3917 * This function is NOT recursive. Any entity already pushed into the 3918 * chain (such as an inode) may still need visibility into its contents, 3919 * as well as the ability to read and modify the contents. For example, 3920 * for an unlinked file which is still open. 3921 * 3922 * NOTE: This function does NOT set chain->modify_tid, allowing future 3923 * code to distinguish between live and deleted chains by testing 3924 * trans->sync_tid vs chain->modify_tid and chain->delete_tid. 3925 * 3926 * NOTE: Deletions normally do not occur in the middle of a duplication 3927 * chain but we use a trick for hardlink migration that refactors 3928 * the originating inode without deleting it, so we make no assumptions 3929 * here. 3930 */ 3931 void 3932 hammer2_chain_delete(hammer2_trans_t *trans, hammer2_chain_t *chain, int flags) 3933 { 3934 KKASSERT(ccms_thread_lock_owned(&chain->core->cst)); 3935 3936 /* 3937 * Nothing to do if already marked. 3938 */ 3939 if (chain->flags & HAMMER2_CHAIN_DELETED) 3940 return; 3941 3942 /* 3943 * The setting of DELETED causes finds, lookups, and _next iterations 3944 * to no longer recognize the chain. RB_SCAN()s will still have 3945 * visibility (needed for flush serialization points). 3946 * 3947 * We need the spinlock on the core whos RBTREE contains chain 3948 * to protect against races. 3949 */ 3950 spin_lock(&chain->above->cst.spin); 3951 _hammer2_chain_delete_helper(trans, chain->above, chain); 3952 spin_unlock(&chain->above->cst.spin); 3953 3954 hammer2_chain_setsubmod(trans, chain); 3955 } 3956 3957 /* 3958 * Returns the index of the nearest element in the blockref array >= elm. 3959 * Returns (count) if no element could be found. If delete_filter is non-zero 3960 * the scan filters out any blockrefs which match deleted chains on dbtree. 3961 * 3962 * Sets *key_nextp to the next key for loop purposes but does not modify 3963 * it if the next key would be higher than the current value of *key_nextp. 3964 * Note that *key_nexp can overflow to 0, which should be tested by the 3965 * caller. 3966 * 3967 * (*cache_indexp) is a heuristic and can be any value without effecting 3968 * the result. 3969 * 3970 * The spin lock on the related chain must be held. 3971 */ 3972 int 3973 hammer2_base_find(hammer2_chain_t *parent, 3974 hammer2_blockref_t *base, int count, 3975 int *cache_indexp, hammer2_key_t *key_nextp, 3976 hammer2_key_t key_beg, hammer2_key_t key_end, 3977 int delete_filter) 3978 { 3979 hammer2_chain_core_t *core = parent->core; 3980 hammer2_blockref_t *scan; 3981 hammer2_key_t scan_end; 3982 int i; 3983 int limit; 3984 3985 /* 3986 * Require the live chain's already have their core's counted 3987 * so we can optimize operations. 3988 */ 3989 KKASSERT((parent->flags & HAMMER2_CHAIN_DUPLICATED) || 3990 core->flags & HAMMER2_CORE_COUNTEDBREFS); 3991 3992 /* 3993 * Degenerate case 3994 */ 3995 if (count == 0 || base == NULL) 3996 return(count); 3997 3998 /* 3999 * Sequential optimization using *cache_indexp. This is the most 4000 * likely scenario. 4001 * 4002 * We can avoid trailing empty entries on live chains, otherwise 4003 * we might have to check the whole block array. 4004 */ 4005 i = *cache_indexp; 4006 cpu_ccfence(); 4007 if (parent->flags & HAMMER2_CHAIN_DUPLICATED) 4008 limit = count; 4009 else 4010 limit = core->live_zero; 4011 if (i >= limit) 4012 i = limit - 1; 4013 if (i < 0) 4014 i = 0; 4015 KKASSERT(i < count); 4016 4017 /* 4018 * Search backwards 4019 */ 4020 scan = &base[i]; 4021 while (i > 0 && (scan->type == 0 || scan->key > key_beg)) { 4022 --scan; 4023 --i; 4024 } 4025 *cache_indexp = i; 4026 4027 /* 4028 * Search forwards, stop when we find a scan element which 4029 * encloses the key or until we know that there are no further 4030 * elements. 4031 */ 4032 while (i < count) { 4033 if (scan->type != 0) { 4034 scan_end = scan->key + 4035 ((hammer2_key_t)1 << scan->keybits) - 1; 4036 if (scan->key > key_beg || scan_end >= key_beg) { 4037 /* 4038 * Check to see if the entry is covered by 4039 * a deleted chain and ignore the entry if 4040 * it is and delete_filter != 0. 4041 */ 4042 if (delete_filter == 0) 4043 break; 4044 if (hammer2_chain_find_deleted( 4045 parent, scan->key, scan_end) == NULL) { 4046 break; 4047 } 4048 } 4049 } 4050 if (i >= limit) 4051 return (count); 4052 ++scan; 4053 ++i; 4054 } 4055 if (i != count) { 4056 *cache_indexp = i; 4057 if (i >= limit) { 4058 i = count; 4059 } else { 4060 scan_end = scan->key + 4061 ((hammer2_key_t)1 << scan->keybits); 4062 if (scan_end && (*key_nextp > scan_end || 4063 *key_nextp == 0)) { 4064 *key_nextp = scan_end; 4065 } 4066 } 4067 } 4068 return (i); 4069 } 4070 4071 /* 4072 * Do a combined search and return the next match either from the blockref 4073 * array or from the in-memory chain. Sets *bresp to the returned bref in 4074 * both cases, or sets it to NULL if the search exhausted. Only returns 4075 * a non-NULL chain if the search matched from the in-memory chain. 4076 * 4077 * When no in-memory chain has been found and a non-NULL bref is returned 4078 * in *bresp. 4079 * 4080 * Must be called with above's spinlock held. Spinlock remains held 4081 * through the operation. 4082 * 4083 * The returned chain is not locked or referenced. Use the returned bref 4084 * to determine if the search exhausted or not. Iterate if the base find 4085 * is chosen but matches a deleted chain. 4086 */ 4087 static hammer2_chain_t * 4088 hammer2_combined_find(hammer2_chain_t *parent, 4089 hammer2_blockref_t *base, int count, 4090 int *cache_indexp, hammer2_key_t *key_nextp, 4091 hammer2_key_t key_beg, hammer2_key_t key_end, 4092 hammer2_blockref_t **bresp) 4093 { 4094 hammer2_blockref_t *bref; 4095 hammer2_chain_t *chain; 4096 int i; 4097 4098 /* 4099 * Lookup in block array and in rbtree. 4100 */ 4101 *key_nextp = key_end + 1; 4102 i = hammer2_base_find(parent, base, count, cache_indexp, 4103 key_nextp, key_beg, key_end, 1); 4104 chain = hammer2_chain_find(parent, key_nextp, key_beg, key_end); 4105 4106 /* 4107 * Neither matched 4108 */ 4109 if (i == count && chain == NULL) { 4110 *bresp = NULL; 4111 return(NULL); 4112 } 4113 4114 /* 4115 * Only chain matched. 4116 */ 4117 if (i == count) { 4118 bref = &chain->bref; 4119 goto found; 4120 } 4121 4122 /* 4123 * Only blockref matched. 4124 */ 4125 if (chain == NULL) { 4126 bref = &base[i]; 4127 goto found; 4128 } 4129 4130 /* 4131 * Both in-memory and blockref matched, select the nearer element. 4132 * 4133 * If both are flush with the left-hand side or both are the 4134 * same distance away, select the chain. In this situation the 4135 * chain must have been loaded from the matching blockmap. 4136 */ 4137 if ((chain->bref.key <= key_beg && base[i].key <= key_beg) || 4138 chain->bref.key == base[i].key) { 4139 KKASSERT(chain->bref.key == base[i].key); 4140 if ((chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 4141 kprintf("chain not bmapped %p.%d %08x\n", chain, chain->bref.type, chain->flags); 4142 kprintf("in chain mod/del %016jx %016jx\n", chain->modify_tid, chain->delete_tid); 4143 kprintf("and updlo/hi %016jx %016jx\n", chain->update_lo, chain->update_hi); 4144 } 4145 KKASSERT(chain->flags & HAMMER2_CHAIN_BMAPPED); 4146 bref = &chain->bref; 4147 goto found; 4148 } 4149 4150 /* 4151 * Select the nearer key 4152 */ 4153 if (chain->bref.key < base[i].key) { 4154 bref = &chain->bref; 4155 } else { 4156 bref = &base[i]; 4157 chain = NULL; 4158 } 4159 4160 /* 4161 * If the bref is out of bounds we've exhausted our search. 4162 */ 4163 found: 4164 if (bref->key > key_end) { 4165 *bresp = NULL; 4166 chain = NULL; 4167 } else { 4168 *bresp = bref; 4169 } 4170 return(chain); 4171 } 4172 4173 /* 4174 * Locate the specified block array element and delete it. The element 4175 * must exist. 4176 * 4177 * The spin lock on the related chain must be held. 4178 * 4179 * NOTE: live_count was adjusted when the chain was deleted, so it does not 4180 * need to be adjusted when we commit the media change. 4181 */ 4182 void 4183 hammer2_base_delete(hammer2_trans_t *trans, hammer2_chain_t *parent, 4184 hammer2_blockref_t *base, int count, 4185 int *cache_indexp, hammer2_chain_t *child) 4186 { 4187 hammer2_blockref_t *elm = &child->bref; 4188 hammer2_chain_core_t *core = parent->core; 4189 hammer2_key_t key_next; 4190 int i; 4191 4192 /* 4193 * Delete element. Expect the element to exist. 4194 * 4195 * XXX see caller, flush code not yet sophisticated enough to prevent 4196 * re-flushed in some cases. 4197 */ 4198 key_next = 0; /* max range */ 4199 i = hammer2_base_find(parent, base, count, cache_indexp, 4200 &key_next, elm->key, elm->key, 0); 4201 if (i == count || base[i].type == 0 || 4202 base[i].key != elm->key || base[i].keybits != elm->keybits) { 4203 spin_unlock(&core->cst.spin); 4204 panic("delete base %p element not found at %d/%d elm %p\n" 4205 "child ino_reason=%08x\n", 4206 base, i, count, elm, 4207 child->inode_reason); 4208 return; 4209 } 4210 bzero(&base[i], sizeof(*base)); 4211 base[i].mirror_tid = (intptr_t)parent; /* MEDIA DEBUG */ 4212 base[i].modify_tid = (intptr_t)child; /* MEDIA DEBUG */ 4213 base[i].check.debug.sync_tid = trans->sync_tid; /* MEDIA DEBUG */ 4214 ++parent->nremoves; /* DEBUG */ 4215 4216 /* 4217 * We can only optimize core->live_zero for live chains. 4218 */ 4219 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4220 if (core->live_zero == i + 1) { 4221 while (--i >= 0 && base[i].type == 0) 4222 ; 4223 core->live_zero = i + 1; 4224 } 4225 } 4226 } 4227 4228 /* 4229 * Insert the specified element. The block array must not already have the 4230 * element and must have space available for the insertion. 4231 * 4232 * The spin lock on the related chain must be held. 4233 * 4234 * NOTE: live_count was adjusted when the chain was deleted, so it does not 4235 * need to be adjusted when we commit the media change. 4236 */ 4237 void 4238 hammer2_base_insert(hammer2_trans_t *trans __unused, hammer2_chain_t *parent, 4239 hammer2_blockref_t *base, int count, 4240 int *cache_indexp, hammer2_chain_t *child) 4241 { 4242 hammer2_blockref_t *elm = &child->bref; 4243 hammer2_chain_core_t *core = parent->core; 4244 hammer2_key_t key_next; 4245 hammer2_key_t xkey; 4246 int i; 4247 int j; 4248 int k; 4249 int l; 4250 int u = 1; 4251 4252 /* 4253 * Insert new element. Expect the element to not already exist 4254 * unless we are replacing it. 4255 * 4256 * XXX see caller, flush code not yet sophisticated enough to prevent 4257 * re-flushed in some cases. 4258 */ 4259 key_next = 0; /* max range */ 4260 i = hammer2_base_find(parent, base, count, cache_indexp, 4261 &key_next, elm->key, elm->key, 0); 4262 4263 /* 4264 * Shortcut fill optimization, typical ordered insertion(s) may not 4265 * require a search. 4266 */ 4267 KKASSERT(i >= 0 && i <= count); 4268 4269 /* 4270 * We can only optimize core->live_zero for live chains. 4271 */ 4272 if (i == count && core->live_zero < count) { 4273 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4274 i = core->live_zero++; 4275 base[i] = *elm; 4276 ++parent->ninserts; /* DEBUG */ 4277 return; 4278 } 4279 } 4280 4281 xkey = elm->key + ((hammer2_key_t)1 << elm->keybits) - 1; 4282 if (i != count && (base[i].key < elm->key || xkey >= base[i].key)) { 4283 if (child->flags & HAMMER2_CHAIN_FLUSH_TEMPORARY) { 4284 kprintf("child %p special replace\n", child); 4285 base[i] = *elm; 4286 return; 4287 } else { 4288 spin_unlock(&core->cst.spin); 4289 panic("insert base %p overlapping " 4290 "elements at %d elm %p\n", 4291 base, i, elm); 4292 } 4293 } 4294 4295 /* 4296 * Try to find an empty slot before or after. 4297 */ 4298 j = i; 4299 k = i; 4300 while (j > 0 || k < count) { 4301 --j; 4302 if (j >= 0 && base[j].type == 0) { 4303 if (j == i - 1) { 4304 base[j] = *elm; 4305 } else { 4306 bcopy(&base[j+1], &base[j], 4307 (i - j - 1) * sizeof(*base)); 4308 base[i - 1] = *elm; 4309 } 4310 ++parent->ninserts; /* DEBUG */ 4311 goto validate; 4312 } 4313 ++k; 4314 if (k < count && base[k].type == 0) { 4315 bcopy(&base[i], &base[i+1], 4316 (k - i) * sizeof(hammer2_blockref_t)); 4317 base[i] = *elm; 4318 4319 /* 4320 * We can only update core->live_zero for live 4321 * chains. 4322 */ 4323 if ((parent->flags & HAMMER2_CHAIN_DUPLICATED) == 0) { 4324 if (core->live_zero <= k) 4325 core->live_zero = k + 1; 4326 } 4327 u = 2; 4328 ++parent->ninserts; /* DEBUG */ 4329 goto validate; 4330 } 4331 } 4332 panic("hammer2_base_insert: no room!"); 4333 4334 /* 4335 * Debugging 4336 */ 4337 validate: 4338 key_next = 0; 4339 for (l = 0; l < count; ++l) { 4340 if (base[l].type) { 4341 key_next = base[l].key + 4342 ((hammer2_key_t)1 << base[l].keybits) - 1; 4343 break; 4344 } 4345 } 4346 while (++l < count) { 4347 if (base[l].type) { 4348 if (base[l].key <= key_next) 4349 panic("base_insert %d %d,%d,%d fail %p:%d", u, i, j, k, base, l); 4350 key_next = base[l].key + 4351 ((hammer2_key_t)1 << base[l].keybits) - 1; 4352 4353 } 4354 } 4355 4356 } 4357 4358 #if 0 4359 4360 /* 4361 * Sort the blockref array for the chain. Used by the flush code to 4362 * sort the blockref[] array. 4363 * 4364 * The chain must be exclusively locked AND spin-locked. 4365 */ 4366 typedef hammer2_blockref_t *hammer2_blockref_p; 4367 4368 static 4369 int 4370 hammer2_base_sort_callback(const void *v1, const void *v2) 4371 { 4372 hammer2_blockref_p bref1 = *(const hammer2_blockref_p *)v1; 4373 hammer2_blockref_p bref2 = *(const hammer2_blockref_p *)v2; 4374 4375 /* 4376 * Make sure empty elements are placed at the end of the array 4377 */ 4378 if (bref1->type == 0) { 4379 if (bref2->type == 0) 4380 return(0); 4381 return(1); 4382 } else if (bref2->type == 0) { 4383 return(-1); 4384 } 4385 4386 /* 4387 * Sort by key 4388 */ 4389 if (bref1->key < bref2->key) 4390 return(-1); 4391 if (bref1->key > bref2->key) 4392 return(1); 4393 return(0); 4394 } 4395 4396 void 4397 hammer2_base_sort(hammer2_chain_t *chain) 4398 { 4399 hammer2_blockref_t *base; 4400 int count; 4401 4402 switch(chain->bref.type) { 4403 case HAMMER2_BREF_TYPE_INODE: 4404 /* 4405 * Special shortcut for embedded data returns the inode 4406 * itself. Callers must detect this condition and access 4407 * the embedded data (the strategy code does this for us). 4408 * 4409 * This is only applicable to regular files and softlinks. 4410 */ 4411 if (chain->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) 4412 return; 4413 base = &chain->data->ipdata.u.blockset.blockref[0]; 4414 count = HAMMER2_SET_COUNT; 4415 break; 4416 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4417 case HAMMER2_BREF_TYPE_INDIRECT: 4418 /* 4419 * Optimize indirect blocks in the INITIAL state to avoid 4420 * I/O. 4421 */ 4422 KKASSERT((chain->flags & HAMMER2_CHAIN_INITIAL) == 0); 4423 base = &chain->data->npdata[0]; 4424 count = chain->bytes / sizeof(hammer2_blockref_t); 4425 break; 4426 case HAMMER2_BREF_TYPE_VOLUME: 4427 base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 4428 count = HAMMER2_SET_COUNT; 4429 break; 4430 case HAMMER2_BREF_TYPE_FREEMAP: 4431 base = &chain->hmp->voldata.freemap_blockset.blockref[0]; 4432 count = HAMMER2_SET_COUNT; 4433 break; 4434 default: 4435 panic("hammer2_chain_lookup: unrecognized blockref type: %d", 4436 chain->bref.type); 4437 base = NULL; /* safety */ 4438 count = 0; /* safety */ 4439 } 4440 kqsort(base, count, sizeof(*base), hammer2_base_sort_callback); 4441 } 4442 4443 #endif 4444 4445 /* 4446 * Chain memory management 4447 */ 4448 void 4449 hammer2_chain_wait(hammer2_chain_t *chain) 4450 { 4451 tsleep(chain, 0, "chnflw", 1); 4452 } 4453 4454 /* 4455 * Manage excessive memory resource use for chain and related 4456 * structures. 4457 */ 4458 void 4459 hammer2_chain_memory_wait(hammer2_pfsmount_t *pmp) 4460 { 4461 long waiting; 4462 long count; 4463 long limit; 4464 #if 0 4465 static int zzticks; 4466 #endif 4467 4468 /* 4469 * Atomic check condition and wait. Also do an early speedup of 4470 * the syncer to try to avoid hitting the wait. 4471 */ 4472 for (;;) { 4473 waiting = pmp->inmem_dirty_chains; 4474 cpu_ccfence(); 4475 count = waiting & HAMMER2_DIRTYCHAIN_MASK; 4476 4477 limit = pmp->mp->mnt_nvnodelistsize / 10; 4478 if (limit < hammer2_limit_dirty_chains) 4479 limit = hammer2_limit_dirty_chains; 4480 if (limit < 1000) 4481 limit = 1000; 4482 4483 #if 0 4484 if ((int)(ticks - zzticks) > hz) { 4485 zzticks = ticks; 4486 kprintf("count %ld %ld\n", count, limit); 4487 } 4488 #endif 4489 4490 /* 4491 * Block if there are too many dirty chains present, wait 4492 * for the flush to clean some out. 4493 */ 4494 if (count > limit) { 4495 tsleep_interlock(&pmp->inmem_dirty_chains, 0); 4496 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 4497 waiting, 4498 waiting | HAMMER2_DIRTYCHAIN_WAITING)) { 4499 speedup_syncer(pmp->mp); 4500 tsleep(&pmp->inmem_dirty_chains, PINTERLOCKED, 4501 "chnmem", hz); 4502 } 4503 continue; /* loop on success or fail */ 4504 } 4505 4506 /* 4507 * Try to start an early flush before we are forced to block. 4508 */ 4509 if (count > limit * 7 / 10) 4510 speedup_syncer(pmp->mp); 4511 break; 4512 } 4513 } 4514 4515 void 4516 hammer2_chain_memory_inc(hammer2_pfsmount_t *pmp) 4517 { 4518 if (pmp) 4519 atomic_add_long(&pmp->inmem_dirty_chains, 1); 4520 } 4521 4522 void 4523 hammer2_chain_memory_wakeup(hammer2_pfsmount_t *pmp) 4524 { 4525 long waiting; 4526 4527 if (pmp == NULL) 4528 return; 4529 4530 for (;;) { 4531 waiting = pmp->inmem_dirty_chains; 4532 cpu_ccfence(); 4533 if (atomic_cmpset_long(&pmp->inmem_dirty_chains, 4534 waiting, 4535 (waiting - 1) & 4536 ~HAMMER2_DIRTYCHAIN_WAITING)) { 4537 break; 4538 } 4539 } 4540 4541 if (waiting & HAMMER2_DIRTYCHAIN_WAITING) 4542 wakeup(&pmp->inmem_dirty_chains); 4543 } 4544 4545 static 4546 void 4547 adjreadcounter(hammer2_blockref_t *bref, size_t bytes) 4548 { 4549 long *counterp; 4550 4551 switch(bref->type) { 4552 case HAMMER2_BREF_TYPE_DATA: 4553 counterp = &hammer2_iod_file_read; 4554 break; 4555 case HAMMER2_BREF_TYPE_INODE: 4556 counterp = &hammer2_iod_meta_read; 4557 break; 4558 case HAMMER2_BREF_TYPE_INDIRECT: 4559 counterp = &hammer2_iod_indr_read; 4560 break; 4561 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 4562 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 4563 counterp = &hammer2_iod_fmap_read; 4564 break; 4565 default: 4566 counterp = &hammer2_iod_volu_read; 4567 break; 4568 } 4569 *counterp += bytes; 4570 } 4571