1 /* 2 * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in 16 * the documentation and/or other materials provided with the 17 * distribution. 18 * 3. Neither the name of The DragonFly Project nor the names of its 19 * contributors may be used to endorse or promote products derived 20 * from this software without specific, prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 25 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 26 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 27 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 28 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 29 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 30 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 31 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 32 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 33 * SUCH DAMAGE. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/types.h> 40 #include <sys/lock.h> 41 #include <sys/uuid.h> 42 43 #include "hammer2.h" 44 45 /* 46 * Recursively flush the specified chain. The chain is locked and 47 * referenced by the caller and will remain so on return. The chain 48 * will remain referenced throughout but can temporarily lose its 49 * lock during the recursion to avoid unnecessarily stalling user 50 * processes. 51 */ 52 struct hammer2_flush_info { 53 hammer2_chain_t *parent; 54 hammer2_trans_t *trans; 55 int depth; 56 int diddeferral; 57 int pass; 58 int cache_index; 59 struct h2_flush_deferral_list flush_list; 60 hammer2_tid_t sync_tid; /* flush synchronization point */ 61 hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 62 }; 63 64 typedef struct hammer2_flush_info hammer2_flush_info_t; 65 66 static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 67 hammer2_chain_t *chain); 68 static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 69 static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 70 static void hammer2_rollup_stats(hammer2_chain_t *parent, 71 hammer2_chain_t *child, int how); 72 73 #if 0 74 static __inline 75 void 76 hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 77 int how) 78 { 79 hammer2_key_t bytes; 80 81 if (bref->type != 0) { 82 bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 83 if (bref->type == HAMMER2_BREF_TYPE_INODE) 84 info->inode_count += how; 85 if (how < 0) 86 info->data_count -= bytes; 87 else 88 info->data_count += bytes; 89 } 90 } 91 #endif 92 93 /* 94 * Transaction support functions for writing to the filesystem. 95 * 96 * Initializing a new transaction allocates a transaction ID. We 97 * don't bother marking the volume header MODIFIED. Instead, the volume 98 * will be synchronized at a later time as part of a larger flush sequence. 99 * 100 * Non-flush transactions can typically run concurrently. However if 101 * there are non-flush transaction both before AND after a flush trans, 102 * the transactions after stall until the ones before finish. 103 * 104 * Non-flush transactions occuring after a flush pointer can run concurrently 105 * with that flush. They only have to wait for transactions prior to the 106 * flush trans to complete before they unstall. 107 * 108 * WARNING! Transaction ids are only allocated when the transaction becomes 109 * active, which allows other transactions to insert ahead of us 110 * if we are forced to block (only bioq transactions do that). 111 * 112 * WARNING! Modifications to the root volume cannot dup the root volume 113 * header to handle synchronization points, so alloc_tid can 114 * wind up (harmlessly) more advanced on flush. 115 * 116 * WARNING! Operations which might call inode_duplicate()/chain_duplicate() 117 * depend heavily on having a unique sync_tid to avoid duplication 118 * collisions (which key off of delete_tid). 119 */ 120 void 121 hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags) 122 { 123 hammer2_mount_t *hmp; 124 hammer2_trans_t *scan; 125 126 bzero(trans, sizeof(*trans)); 127 trans->pmp = pmp; 128 hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 129 130 hammer2_voldata_lock(hmp); 131 trans->flags = flags; 132 trans->td = curthread; 133 /*trans->delete_gen = 0;*/ /* multiple deletions within trans */ 134 135 if (flags & HAMMER2_TRANS_ISFLUSH) { 136 /* 137 * If multiple flushes are trying to run we have to 138 * wait until it is our turn, then set curflush to 139 * indicate that a flush is now pending (but not 140 * necessarily active yet). 141 * 142 * NOTE: Do not set trans->blocked here. 143 */ 144 ++hmp->flushcnt; 145 while (hmp->curflush != NULL) { 146 lksleep(&hmp->curflush, &hmp->voldatalk, 147 0, "h2multf", hz); 148 } 149 hmp->curflush = trans; 150 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 151 152 /* 153 * If we are a flush we have to wait for all transactions 154 * prior to our flush synchronization point to complete 155 * before we can start our flush. 156 * 157 * Most importantly, this includes bioq flushes. 158 * 159 * NOTE: Do not set trans->blocked here. 160 */ 161 while (TAILQ_FIRST(&hmp->transq) != trans) { 162 lksleep(&trans->sync_tid, &hmp->voldatalk, 163 0, "h2syncw", hz); 164 } 165 166 /* 167 * don't assign sync_tid until we become the running 168 * flush. topo_flush_tid is used to control when 169 * chain modifications in concurrent transactions are 170 * required to delete-duplicate (so as not to disturb 171 * the state of what is being currently flushed). 172 */ 173 trans->sync_tid = hmp->voldata.alloc_tid++; 174 hmp->topo_flush_tid = trans->sync_tid; 175 176 /* 177 * Once we become the running flush we can wakeup anyone 178 * who blocked on us, up to the next flush. That is, 179 * our flush can run concurrent with frontend operations. 180 */ 181 scan = trans; 182 while ((scan = TAILQ_NEXT(scan, entry)) != NULL) { 183 if (scan->flags & HAMMER2_TRANS_ISFLUSH) 184 break; 185 if (scan->blocked == 0) 186 break; 187 scan->blocked = 0; 188 wakeup(&scan->blocked); 189 } 190 } else if ((flags & HAMMER2_TRANS_BUFCACHE) && hmp->curflush) { 191 /* 192 * We cannot block if we are the bioq thread. When a 193 * flush is not pending we can operate normally but 194 * if a flush IS pending the bioq thread's transaction 195 * must be placed either before or after curflush. 196 * 197 * If the current flush is waiting the bioq thread's 198 * transaction is placed before. If it is running the 199 * bioq thread's transaction is placed after. 200 */ 201 scan = TAILQ_FIRST(&hmp->transq); 202 if (scan != hmp->curflush) { 203 TAILQ_INSERT_BEFORE(hmp->curflush, trans, entry); 204 } else { 205 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 206 } 207 trans->sync_tid = hmp->voldata.alloc_tid++; 208 } else { 209 /* 210 * If this is a normal transaction and not a flush, or 211 * if this is a bioq transaction and no flush is pending, 212 * we can queue normally. 213 * 214 * Normal transactions must block while a pending flush is 215 * waiting for prior transactions to complete. Once the 216 * pending flush becomes active we can run concurrently 217 * with it. 218 */ 219 TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 220 scan = TAILQ_FIRST(&hmp->transq); 221 if (hmp->curflush && hmp->curflush != scan) { 222 trans->blocked = 1; 223 while (trans->blocked) { 224 lksleep(&trans->blocked, &hmp->voldatalk, 225 0, "h2trans", hz); 226 } 227 } 228 trans->sync_tid = hmp->voldata.alloc_tid++; 229 } 230 hammer2_voldata_unlock(hmp, 0); 231 } 232 233 void 234 hammer2_trans_done(hammer2_trans_t *trans) 235 { 236 hammer2_mount_t *hmp; 237 hammer2_trans_t *scan; 238 239 hmp = trans->pmp->cluster.chains[0]->hmp; 240 241 hammer2_voldata_lock(hmp); 242 TAILQ_REMOVE(&hmp->transq, trans, entry); 243 if (trans->flags & HAMMER2_TRANS_ISFLUSH) { 244 --hmp->flushcnt; 245 if (hmp->flushcnt) { 246 /* 247 * If we were a flush then wakeup anyone waiting on 248 * curflush (i.e. other flushes that want to run). 249 * Leave topo_flush_id set (I think we could probably 250 * clear it to zero here). 251 */ 252 hmp->curflush = NULL; 253 wakeup(&hmp->curflush); 254 } else { 255 /* 256 * Theoretically we don't have to clear flush_tid 257 * here since the flush will have synchronized 258 * all operations <= flush_tid already. But for 259 * now zero-it. 260 */ 261 hmp->curflush = NULL; 262 hmp->topo_flush_tid = 0; 263 } 264 } else { 265 /* 266 * If we are not a flush but a flush is now at the head 267 * of the queue and we were previously blocking it, 268 * we can now unblock it. 269 */ 270 if (hmp->flushcnt && 271 (scan = TAILQ_FIRST(&hmp->transq)) != NULL && 272 trans->sync_tid < scan->sync_tid && 273 (scan->flags & HAMMER2_TRANS_ISFLUSH)) { 274 wakeup(&scan->sync_tid); 275 } 276 } 277 hammer2_voldata_unlock(hmp, 0); 278 } 279 280 /* 281 * Flush the chain and all modified sub-chains through the specified 282 * synchronization point (sync_tid), propagating parent chain modifications 283 * and mirror_tid updates back up as needed. Since we are recursing downward 284 * we do not have to deal with the complexities of multi-homed chains (chains 285 * with multiple parents). 286 * 287 * Caller must have interlocked against any non-flush-related modifying 288 * operations in progress whos modify_tid values are less than or equal 289 * to the passed sync_tid. 290 * 291 * Caller must have already vetted synchronization points to ensure they 292 * are properly flushed. Only snapshots and cluster flushes can create 293 * these sorts of synchronization points. 294 * 295 * This routine can be called from several places but the most important 296 * is from the hammer2_vop_reclaim() function. We want to try to completely 297 * clean out the inode structure to prevent disconnected inodes from 298 * building up and blowing out the kmalloc pool. However, it is not actually 299 * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 300 * capability. 301 * 302 * chain is locked on call and will remain locked on return. If a flush 303 * occured, the chain's MOVED bit will be set indicating that its parent 304 * (which is not part of the flush) should be updated. 305 */ 306 void 307 hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) 308 { 309 hammer2_chain_t *scan; 310 hammer2_chain_core_t *core; 311 hammer2_flush_info_t info; 312 313 /* 314 * Execute the recursive flush and handle deferrals. 315 * 316 * Chains can be ridiculously long (thousands deep), so to 317 * avoid blowing out the kernel stack the recursive flush has a 318 * depth limit. Elements at the limit are placed on a list 319 * for re-execution after the stack has been popped. 320 */ 321 bzero(&info, sizeof(info)); 322 TAILQ_INIT(&info.flush_list); 323 info.trans = trans; 324 info.sync_tid = trans->sync_tid; 325 info.mirror_tid = 0; 326 info.cache_index = -1; 327 328 core = chain->core; 329 330 for (;;) { 331 /* 332 * Unwind deep recursions which had been deferred. This 333 * can leave MOVED set for these chains, which will be 334 * handled when we [re]flush chain after the unwind. 335 */ 336 while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 337 KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 338 TAILQ_REMOVE(&info.flush_list, scan, flush_node); 339 atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 340 341 /* 342 * Now that we've popped back up we can do a secondary 343 * recursion on the deferred elements. 344 */ 345 if (hammer2_debug & 0x0040) 346 kprintf("defered flush %p\n", scan); 347 hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 348 hammer2_chain_flush(trans, scan); 349 hammer2_chain_unlock(scan); 350 hammer2_chain_drop(scan); /* ref from deferral */ 351 } 352 353 /* 354 * Flush pass1 on root. 355 */ 356 info.diddeferral = 0; 357 hammer2_chain_flush_core(&info, chain); 358 #if FLUSH_DEBUG 359 kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 360 chain, chain->bref.type, chain->flags); 361 #endif 362 363 /* 364 * Only loop if deep recursions have been deferred. 365 */ 366 if (TAILQ_EMPTY(&info.flush_list)) 367 break; 368 } 369 } 370 371 /* 372 * This is the core of the chain flushing code. The chain is locked by the 373 * caller and remains locked on return. This function is keyed off of 374 * the SUBMODIFIED bit but must make fine-grained choices based on the 375 * synchronization point we are flushing to. 376 * 377 * If the flush accomplished any work chain will be flagged MOVED 378 * indicating a copy-on-write propagation back up is required. 379 * Deep sub-nodes may also have been entered onto the deferral list. 380 * MOVED is never set on the volume root. 381 * 382 * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 383 * only when a chain is specifically modified, and not updated 384 * for copy-on-write propagations. MODIFIED is set on any modification 385 * including copy-on-write propagations. 386 */ 387 static void 388 hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) 389 { 390 hammer2_mount_t *hmp; 391 hammer2_blockref_t *bref; 392 hammer2_off_t pbase; 393 hammer2_off_t pmask; 394 hammer2_tid_t saved_sync; 395 #if 0 396 hammer2_trans_t *trans = info->trans; 397 #endif 398 hammer2_chain_core_t *core; 399 size_t psize; 400 size_t boff; 401 char *bdata; 402 struct buf *bp; 403 int error; 404 int wasmodified; 405 int diddeferral = 0; 406 407 hmp = chain->hmp; 408 409 #if FLUSH_DEBUG 410 if (info->parent) 411 kprintf("flush_core %p->%p.%d %08x (%s)\n", 412 info->parent, chain, chain->bref.type, 413 chain->flags, 414 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 415 chain->data->ipdata.filename : "?")); 416 else 417 kprintf("flush_core NULL->%p.%d %08x (%s)\n", 418 chain, chain->bref.type, 419 chain->flags, 420 ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 421 chain->data->ipdata.filename : "?")); 422 #endif 423 /* 424 * Ignore chains modified beyond the current flush point. These 425 * will be treated as if they did not exist. 426 */ 427 if (chain->modify_tid > info->sync_tid) 428 return; 429 430 #if 0 431 /* 432 * Deleted chains which have not been destroyed must be retained, 433 * and we probably have to recurse to clean-up any sub-trees. 434 * However, restricted flushes can stop processing here because 435 * the chain cleanup will be handled by a later normal flush. 436 * 437 * The MODIFIED bit can likely be cleared in this situation and we 438 * will do so later on in this procedure. 439 */ 440 if (chain->delete_tid <= info->sync_tid) { 441 if (trans->flags & HAMMER2_TRANS_RESTRICTED) 442 return; 443 } 444 #endif 445 446 saved_sync = info->sync_tid; 447 core = chain->core; 448 449 /* 450 * If SUBMODIFIED is set we recurse the flush and adjust the 451 * blockrefs accordingly. 452 * 453 * NOTE: Looping on SUBMODIFIED can prevent a flush from ever 454 * finishing in the face of filesystem activity. 455 */ 456 if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) { 457 hammer2_chain_t *saved_parent; 458 hammer2_tid_t saved_mirror; 459 hammer2_chain_layer_t *layer; 460 461 /* 462 * Clear SUBMODIFIED to catch races. Note that any child 463 * with MODIFIED, DELETED, or MOVED set during scan2, or 464 * which tries to lastdrop but can't free its structures, 465 * or which gets defered, will cause SUBMODIFIED to be set 466 * again. 467 * 468 * We don't want to set our chain to MODIFIED gratuitously. 469 * 470 * We need an extra ref on chain because we are going to 471 * release its lock temporarily in our child loop. 472 */ 473 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED); 474 hammer2_chain_ref(chain); 475 476 /* 477 * Run two passes. The first pass handles MODIFIED and 478 * SUBMODIFIED chains and recurses while the second pass 479 * handles MOVED chains on the way back up. 480 * 481 * If the stack gets too deep we defer scan1, but must 482 * be sure to still run scan2 if on the next loop the 483 * deferred chain has been flushed and now needs MOVED 484 * handling on the way back up. 485 * 486 * Scan1 is recursive. 487 * 488 * NOTE: The act of handling a modified/submodified chain can 489 * cause the MOVED Flag to be set. It can also be set 490 * via hammer2_chain_delete() and in other situations. 491 * 492 * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 493 * because children can be physically removed during 494 * the scan. 495 */ 496 saved_parent = info->parent; 497 saved_mirror = info->mirror_tid; 498 info->parent = chain; 499 info->mirror_tid = chain->bref.mirror_tid; 500 501 if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 502 if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 503 hammer2_chain_ref(chain); 504 TAILQ_INSERT_TAIL(&info->flush_list, 505 chain, flush_node); 506 atomic_set_int(&chain->flags, 507 HAMMER2_CHAIN_DEFERRED); 508 } 509 diddeferral = 1; 510 } else { 511 info->diddeferral = 0; 512 spin_lock(&core->cst.spin); 513 KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 514 TAILQ_FOREACH_REVERSE(layer, &core->layerq, 515 h2_layer_list, entry) { 516 ++layer->refs; 517 KKASSERT(layer->good == 0xABCD); 518 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 519 NULL, hammer2_chain_flush_scan1, info); 520 --layer->refs; 521 diddeferral += info->diddeferral; 522 } 523 spin_unlock(&core->cst.spin); 524 } 525 526 /* 527 * Handle successfully flushed children who are in the MOVED 528 * state on the way back up the recursion. This can have 529 * the side-effect of clearing MOVED. 530 * 531 * Scan2 is non-recursive. 532 */ 533 if (diddeferral) { 534 atomic_set_int(&chain->flags, 535 HAMMER2_CHAIN_SUBMODIFIED); 536 spin_lock(&core->cst.spin); 537 } else { 538 spin_lock(&core->cst.spin); 539 KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 540 TAILQ_FOREACH_REVERSE(layer, &core->layerq, 541 h2_layer_list, entry) { 542 info->pass = 1; 543 ++layer->refs; 544 KKASSERT(layer->good == 0xABCD); 545 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 546 NULL, hammer2_chain_flush_scan2, info); 547 info->pass = 2; 548 RB_SCAN(hammer2_chain_tree, &layer->rbtree, 549 NULL, hammer2_chain_flush_scan2, info); 550 /*diddeferral += info->diddeferral; n/a*/ 551 --layer->refs; 552 } 553 } 554 hammer2_chain_layer_check_locked(chain->hmp, core); 555 spin_unlock(&core->cst.spin); 556 557 chain->bref.mirror_tid = info->mirror_tid; 558 info->mirror_tid = saved_mirror; 559 info->parent = saved_parent; 560 KKASSERT(chain->refs > 1); 561 hammer2_chain_drop(chain); 562 } 563 564 /* 565 * Restore sync_tid in case it was restricted by a delete/duplicate. 566 */ 567 info->sync_tid = saved_sync; 568 569 /* 570 * Rollup diddeferral for caller. Note direct assignment, not +=. 571 */ 572 info->diddeferral = diddeferral; 573 574 /* 575 * Do not flush chain if there were any deferrals. It will be 576 * retried later after the deferrals are independently handled. 577 */ 578 if (diddeferral) { 579 if (hammer2_debug & 0x0008) { 580 kprintf("%*.*s} %p/%d %04x (deferred)", 581 info->depth, info->depth, "", 582 chain, chain->refs, chain->flags); 583 } 584 return; 585 } 586 587 /* 588 * If we encounter a deleted chain within our flush we can clear 589 * the MODIFIED bit and avoid flushing it whether it has been 590 * destroyed or not. We must make sure that the chain is flagged 591 * MOVED in this situation so the parent picks up the deletion. 592 * 593 * Note that scan2 has already executed above so statistics have 594 * already been rolled up. 595 */ 596 if (chain->delete_tid <= info->sync_tid) { 597 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 598 if (chain->bp) { 599 if (chain->bytes == chain->bp->b_bufsize) 600 chain->bp->b_flags |= B_INVAL|B_RELBUF; 601 } 602 if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 603 hammer2_chain_ref(chain); 604 atomic_set_int(&chain->flags, 605 HAMMER2_CHAIN_MOVED); 606 } 607 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 608 hammer2_chain_drop(chain); 609 } 610 return; 611 } 612 #if 0 613 if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 614 (chain->flags & HAMMER2_CHAIN_DELETED) && 615 (trans->flags & HAMMER2_TRANS_RESTRICTED) == 0) { 616 /* 617 * Throw-away the MODIFIED flag 618 */ 619 if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 620 if (chain->bp) { 621 if (chain->bytes == chain->bp->b_bufsize) 622 chain->bp->b_flags |= B_INVAL|B_RELBUF; 623 } 624 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 625 hammer2_chain_drop(chain); 626 } 627 return; 628 } 629 #endif 630 631 /* 632 * A degenerate flush might not have flushed anything and thus not 633 * processed modified blocks on the way back up. Detect the case. 634 * 635 * Note that MOVED can be set without MODIFIED being set due to 636 * a deletion, in which case it is handled by Scan2 later on. 637 * 638 * Both bits can be set along with DELETED due to a deletion if 639 * modified data within the synchronization zone and the chain 640 * was then deleted beyond the zone, in which case we still have 641 * to flush for synchronization point consistency. Otherwise though 642 * DELETED and MODIFIED are treated as separate flags. 643 */ 644 if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 645 return; 646 647 /* 648 * Issue flush. 649 * 650 * A DESTROYED node that reaches this point must be flushed for 651 * synchronization point consistency. 652 */ 653 654 /* 655 * Update mirror_tid, clear MODIFIED, and set MOVED. 656 * 657 * The caller will update the parent's reference to this chain 658 * by testing MOVED as long as the modification was in-bounds. 659 * 660 * MOVED is never set on the volume root as there is no parent 661 * to adjust. 662 */ 663 if (chain->bref.mirror_tid < info->sync_tid) 664 chain->bref.mirror_tid = info->sync_tid; 665 wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 666 atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 667 if (chain == &hmp->vchain) 668 kprintf("(FLUSHED VOLUME HEADER)\n"); 669 if (chain == &hmp->fchain) 670 kprintf("(FLUSHED FREEMAP HEADER)\n"); 671 672 if ((chain->flags & HAMMER2_CHAIN_MOVED) || 673 chain == &hmp->vchain || 674 chain == &hmp->fchain) { 675 /* 676 * Drop the ref from the MODIFIED bit we cleared. 677 * Net is -0 or -1 ref depending. 678 */ 679 if (wasmodified) 680 hammer2_chain_drop(chain); 681 } else { 682 /* 683 * Drop the ref from the MODIFIED bit we cleared and 684 * set a ref for the MOVED bit we are setting. Net 685 * is +0 or +1 ref depending. 686 */ 687 if (wasmodified == 0) 688 hammer2_chain_ref(chain); 689 atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 690 } 691 692 /* 693 * If this is part of a recursive flush we can go ahead and write 694 * out the buffer cache buffer and pass a new bref back up the chain 695 * via the MOVED bit. 696 * 697 * Volume headers are NOT flushed here as they require special 698 * processing. 699 */ 700 switch(chain->bref.type) { 701 case HAMMER2_BREF_TYPE_FREEMAP: 702 hammer2_modify_volume(hmp); 703 break; 704 case HAMMER2_BREF_TYPE_VOLUME: 705 /* 706 * We should flush the free block table before we calculate 707 * CRCs and copy voldata -> volsync. 708 * 709 * To prevent SMP races, fchain must remain locked until 710 * voldata is copied to volsync. 711 */ 712 hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 713 if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 714 HAMMER2_CHAIN_SUBMODIFIED)) { 715 /* this will modify vchain as a side effect */ 716 hammer2_chain_flush(info->trans, &hmp->fchain); 717 } 718 719 /* 720 * The volume header is flushed manually by the syncer, not 721 * here. All we do is adjust the crc's. 722 */ 723 KKASSERT(chain->data != NULL); 724 KKASSERT(chain->bp == NULL); 725 kprintf("volume header mirror_tid %jd\n", 726 hmp->voldata.mirror_tid); 727 728 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 729 hammer2_icrc32( 730 (char *)&hmp->voldata + 731 HAMMER2_VOLUME_ICRC1_OFF, 732 HAMMER2_VOLUME_ICRC1_SIZE); 733 hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 734 hammer2_icrc32( 735 (char *)&hmp->voldata + 736 HAMMER2_VOLUME_ICRC0_OFF, 737 HAMMER2_VOLUME_ICRC0_SIZE); 738 hmp->voldata.icrc_volheader = 739 hammer2_icrc32( 740 (char *)&hmp->voldata + 741 HAMMER2_VOLUME_ICRCVH_OFF, 742 HAMMER2_VOLUME_ICRCVH_SIZE); 743 hmp->volsync = hmp->voldata; 744 atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 745 hammer2_chain_unlock(&hmp->fchain); 746 break; 747 case HAMMER2_BREF_TYPE_DATA: 748 /* 749 * Data elements have already been flushed via the logical 750 * file buffer cache. Their hash was set in the bref by 751 * the vop_write code. 752 * 753 * Make sure any device buffer(s) have been flushed out here. 754 * (there aren't usually any to flush). 755 */ 756 psize = hammer2_devblksize(chain->bytes); 757 pmask = (hammer2_off_t)psize - 1; 758 pbase = chain->bref.data_off & ~pmask; 759 boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 760 761 bp = getblk(hmp->devvp, pbase, psize, GETBLK_NOWAIT, 0); 762 if (bp) { 763 if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 764 (B_CACHE | B_DIRTY)) { 765 cluster_awrite(bp); 766 } else { 767 bp->b_flags |= B_RELBUF; 768 brelse(bp); 769 } 770 } 771 break; 772 #if 0 773 case HAMMER2_BREF_TYPE_INDIRECT: 774 /* 775 * Indirect blocks may be in an INITIAL state. Use the 776 * chain_lock() call to ensure that the buffer has been 777 * instantiated (even though it is already locked the buffer 778 * might not have been instantiated). 779 * 780 * Only write the buffer out if it is dirty, it is possible 781 * the operating system had already written out the buffer. 782 */ 783 hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 784 KKASSERT(chain->bp != NULL); 785 786 bp = chain->bp; 787 if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 788 (bp->b_flags & B_DIRTY)) { 789 bdwrite(chain->bp); 790 } else { 791 brelse(chain->bp); 792 } 793 chain->bp = NULL; 794 chain->data = NULL; 795 hammer2_chain_unlock(chain); 796 break; 797 #endif 798 case HAMMER2_BREF_TYPE_INDIRECT: 799 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 800 /* 801 * Device-backed. Buffer will be flushed by the sync 802 * code XXX. 803 */ 804 KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 805 break; 806 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 807 default: 808 /* 809 * Embedded elements have to be flushed out. 810 * (Basically just BREF_TYPE_INODE). 811 */ 812 KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 813 KKASSERT(chain->data != NULL); 814 KKASSERT(chain->bp == NULL); 815 bref = &chain->bref; 816 817 KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 818 KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 819 HAMMER2_CHECK_ISCSI32 || 820 HAMMER2_DEC_CHECK(chain->bref.methods) == 821 HAMMER2_CHECK_FREEMAP); 822 823 /* 824 * The data is embedded, we have to acquire the 825 * buffer cache buffer and copy the data into it. 826 */ 827 psize = hammer2_devblksize(chain->bytes); 828 pmask = (hammer2_off_t)psize - 1; 829 pbase = bref->data_off & ~pmask; 830 boff = bref->data_off & (HAMMER2_OFF_MASK & pmask); 831 832 /* 833 * The getblk() optimization can only be used if the 834 * physical block size matches the request. 835 */ 836 error = bread(hmp->devvp, pbase, psize, &bp); 837 KKASSERT(error == 0); 838 839 bdata = (char *)bp->b_data + boff; 840 841 /* 842 * Copy the data to the buffer, mark the buffer 843 * dirty, and convert the chain to unmodified. 844 */ 845 bcopy(chain->data, bdata, chain->bytes); 846 bp->b_flags |= B_CLUSTEROK; 847 bdwrite(bp); 848 bp = NULL; 849 850 switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 851 case HAMMER2_CHECK_FREEMAP: 852 chain->bref.check.freemap.icrc32 = 853 hammer2_icrc32(chain->data, chain->bytes); 854 break; 855 case HAMMER2_CHECK_ISCSI32: 856 chain->bref.check.iscsi32.value = 857 hammer2_icrc32(chain->data, chain->bytes); 858 break; 859 default: 860 panic("hammer2_flush_core: bad crc type"); 861 break; /* NOT REACHED */ 862 } 863 if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 864 ++hammer2_iod_meta_write; 865 else 866 ++hammer2_iod_indr_write; 867 } 868 } 869 870 /* 871 * Flush helper scan1 (recursive) 872 * 873 * Flushes the children of the caller's chain (parent) and updates 874 * the blockref, restricted by sync_tid. 875 * 876 * Ripouts during the loop should not cause any problems. Because we are 877 * flushing to a synchronization point, modification races will occur after 878 * sync_tid and do not have to be flushed anyway. 879 * 880 * It is also ok if the parent is chain_duplicate()'d while unlocked because 881 * the delete/duplication will install a delete_tid that is still larger than 882 * our current sync_tid. 883 */ 884 static int 885 hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 886 { 887 hammer2_flush_info_t *info = data; 888 hammer2_trans_t *trans = info->trans; 889 hammer2_chain_t *parent = info->parent; 890 int diddeferral; 891 892 /* 893 * We should only need to recurse if SUBMODIFIED is set, but as 894 * a safety also recurse if MODIFIED is also set. 895 * 896 * Return early if neither bit is set. We must re-assert the 897 * SUBMODIFIED flag in the parent if any child covered by the 898 * parent (via delete_tid) is skipped. 899 */ 900 if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 901 HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 902 return (0); 903 } 904 if (child->modify_tid > trans->sync_tid) { 905 if (parent->delete_tid > trans->sync_tid) { 906 atomic_set_int(&parent->flags, 907 HAMMER2_CHAIN_SUBMODIFIED); 908 } 909 return (0); 910 } 911 912 hammer2_chain_ref(child); 913 spin_unlock(&parent->core->cst.spin); 914 915 /* 916 * The caller has added a ref to the parent so we can temporarily 917 * unlock it in order to lock the child. Re-check the flags before 918 * continuing. 919 */ 920 hammer2_chain_unlock(parent); 921 hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 922 923 if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 924 HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 925 hammer2_chain_unlock(child); 926 hammer2_chain_drop(child); 927 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 928 spin_lock(&parent->core->cst.spin); 929 return (0); 930 } 931 if (child->modify_tid > trans->sync_tid) { 932 hammer2_chain_unlock(child); 933 hammer2_chain_drop(child); 934 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 935 spin_lock(&parent->core->cst.spin); 936 if (parent->delete_tid > trans->sync_tid) { 937 atomic_set_int(&parent->flags, 938 HAMMER2_CHAIN_SUBMODIFIED); 939 } 940 return (0); 941 } 942 943 /* 944 * The DESTROYED flag can only be initially set on an unreferenced 945 * deleted inode and will propagate downward via the mechanic below. 946 * Such inode chains have been deleted for good and should no longer 947 * be subject to delete/duplication. 948 * 949 * This optimization allows the inode reclaim (destroy unlinked file 950 * on vnode reclamation after last close) to be flagged by just 951 * setting HAMMER2_CHAIN_DESTROYED at the top level and then will 952 * cause the chains to be terminated and related buffers to be 953 * invalidated and not flushed out. 954 * 955 * We have to be careful not to propagate the DESTROYED flag if 956 * the destruction occurred after our flush sync_tid. 957 */ 958 if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 959 (child->flags & HAMMER2_CHAIN_DELETED) && 960 (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 961 atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROYED | 962 HAMMER2_CHAIN_SUBMODIFIED); 963 } 964 965 /* 966 * Recurse and collect deferral data. 967 */ 968 diddeferral = info->diddeferral; 969 ++info->depth; 970 hammer2_chain_flush_core(info, child); 971 #if FLUSH_DEBUG 972 kprintf("flush_core_done parent=%p flags=%08x child=%p.%d %08x\n", 973 parent, parent->flags, child, child->bref.type, child->flags); 974 #endif 975 --info->depth; 976 info->diddeferral += diddeferral; 977 978 if (child->flags & HAMMER2_CHAIN_SUBMODIFIED) 979 atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); 980 981 hammer2_chain_unlock(child); 982 hammer2_chain_drop(child); 983 984 hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 985 986 spin_lock(&parent->core->cst.spin); 987 988 return (0); 989 } 990 991 /* 992 * Flush helper scan2 (non-recursive) 993 * 994 * This pass on a chain's children propagates any MOVED or DELETED 995 * elements back up the chain towards the root after those elements have 996 * been fully flushed. Unlike scan1, this function is NOT recursive and 997 * the parent remains locked across the entire scan. 998 * 999 * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. 1000 * We have to do this so base[] elements can be deleted in pass 1 to make 1001 * room for adding new elements in pass 2. 1002 * 1003 * This function also rolls up storage statistics. 1004 * 1005 * NOTE! We must re-set SUBMODIFIED on the parent(s) as appropriate, and 1006 * due to the above conditions it is possible to do this and still 1007 * have some children flagged MOVED depending on the synchronization. 1008 * 1009 * NOTE! A deletion is a visbility issue, there can still be references to 1010 * deleted elements (for example, to an unlinked file which is still 1011 * open), and there can also be multiple chains pointing to the same 1012 * bref where some are deleted and some are not (for example due to 1013 * a rename). So a chain marked for deletion is basically considered 1014 * to be live until it is explicitly destroyed or until its ref-count 1015 * reaches zero (also implying that MOVED and MODIFIED are clear). 1016 */ 1017 static int 1018 hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 1019 { 1020 hammer2_flush_info_t *info = data; 1021 hammer2_chain_t *parent = info->parent; 1022 hammer2_chain_core_t *above = child->above; 1023 hammer2_mount_t *hmp = child->hmp; 1024 hammer2_trans_t *trans = info->trans; 1025 hammer2_blockref_t *base; 1026 int count; 1027 int ok; 1028 1029 /* 1030 * Inodes with stale children that have been converted to DIRECTDATA 1031 * mode (file extension or hardlink conversion typically) need to 1032 * skipped right now before we start messing with a non-existant 1033 * block table. 1034 */ 1035 #if 0 1036 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 1037 (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { 1038 goto finalize; 1039 } 1040 #endif 1041 1042 /* 1043 * Ignore children created after our flush point, treating them as 1044 * if they did not exist). These children will not cause the parent 1045 * to be updated. 1046 * 1047 * When we encounter such children and the parent chain has not been 1048 * deleted, delete/duplicated, or delete/duplicated-for-move, then 1049 * the parent may be used to funnel through several flush points. 1050 * We must re-set the SUBMODIFIED flag in the parent to ensure that 1051 * those flushes have visbility. A simple test of delete_tid suffices 1052 * to determine if the parent spans beyond our current flush. 1053 */ 1054 if (child->modify_tid > trans->sync_tid) { 1055 goto finalize; 1056 } 1057 1058 /* 1059 * Ignore children which have not changed. The parent's block table 1060 * is already correct. 1061 * 1062 * XXX The MOVED bit is only cleared when all multi-homed parents 1063 * have flushed, creating a situation where a re-flush can occur 1064 * via a parent which has already flushed. The hammer2_base_*() 1065 * functions currently have a hack to deal with this case but 1066 * we need something better. 1067 */ 1068 if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 1069 goto finalize; 1070 } 1071 1072 /* 1073 * Make sure child is referenced before we unlock. 1074 */ 1075 hammer2_chain_ref(child); 1076 spin_unlock(&above->cst.spin); 1077 1078 /* 1079 * Parent reflushed after the child has passed them by should skip 1080 * due to the modify_tid test. XXX 1081 */ 1082 hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 1083 KKASSERT(child->above == above); 1084 KKASSERT(parent->core == above); 1085 1086 /* 1087 * The parent's blockref to the child must be deleted or updated. 1088 * 1089 * This point is not reached on successful DESTROYED optimizations 1090 * but can be reached on recursive deletions and restricted flushes. 1091 * 1092 * Because flushes are ordered we do not have to make a 1093 * modify/duplicate of indirect blocks. That is, the flush 1094 * code does not have to kmalloc or duplicate anything. We 1095 * can adjust the indirect block table in-place and reuse the 1096 * chain. It IS possible that the chain has already been duplicated 1097 * or may wind up being duplicated on-the-fly by modifying code 1098 * on the frontend. We simply use the original and ignore such 1099 * chains. However, it does mean we can't clear the MOVED bit. 1100 * 1101 * XXX recursive deletions not optimized. 1102 */ 1103 hammer2_chain_modify(trans, &parent, 1104 HAMMER2_MODIFY_NO_MODIFY_TID | 1105 HAMMER2_MODIFY_ASSERTNOCOPY); 1106 1107 switch(parent->bref.type) { 1108 case HAMMER2_BREF_TYPE_INODE: 1109 /* 1110 * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we 1111 * properly duplicate the inode headers and do proper flush 1112 * range checks (all the children should be beyond the flush 1113 * point). For now just don't sync the non-applicable 1114 * children. 1115 * 1116 * XXX Can also occur due to hardlink consolidation. We 1117 * set OPFLAG_DIRECTDATA to prevent the indirect and data 1118 * blocks from syncing ot the hardlink pointer. 1119 */ 1120 #if 0 1121 KKASSERT((parent->data->ipdata.op_flags & 1122 HAMMER2_OPFLAG_DIRECTDATA) == 0); 1123 #endif 1124 #if 0 1125 if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1126 base = NULL; 1127 } else 1128 #endif 1129 { 1130 base = &parent->data->ipdata.u.blockset.blockref[0]; 1131 count = HAMMER2_SET_COUNT; 1132 } 1133 break; 1134 case HAMMER2_BREF_TYPE_INDIRECT: 1135 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1136 if (parent->data) { 1137 base = &parent->data->npdata[0]; 1138 } else { 1139 base = NULL; 1140 KKASSERT(child->flags & HAMMER2_CHAIN_DELETED); 1141 } 1142 count = parent->bytes / sizeof(hammer2_blockref_t); 1143 break; 1144 case HAMMER2_BREF_TYPE_VOLUME: 1145 base = &hmp->voldata.sroot_blockset.blockref[0]; 1146 count = HAMMER2_SET_COUNT; 1147 break; 1148 case HAMMER2_BREF_TYPE_FREEMAP: 1149 base = &parent->data->npdata[0]; 1150 count = HAMMER2_SET_COUNT; 1151 break; 1152 default: 1153 base = NULL; 1154 count = 0; 1155 panic("hammer2_chain_flush_scan2: " 1156 "unrecognized blockref type: %d", 1157 parent->bref.type); 1158 } 1159 1160 /* 1161 * Don't bother updating a deleted parent's blockrefs (caller will 1162 * optimize-out the disk write). Note that this is not optional, 1163 * a deleted parent's blockref array might not be synchronized at 1164 * all so calling hammer2_base*() functions could result in a panic. 1165 * 1166 * Otherwise, we need to be COUNTEDBREFS synchronized for the 1167 * hammer2_base_*() functions. 1168 */ 1169 if (parent->delete_tid <= trans->sync_tid) 1170 base = NULL; 1171 else if ((parent->flags & HAMMER2_CHAIN_COUNTEDBREFS) == 0) 1172 hammer2_chain_countbrefs(parent, base, count); 1173 1174 /* 1175 * Update the parent's blockref table and propagate mirror_tid. 1176 * 1177 * NOTE! Children with modify_tid's beyond our flush point are 1178 * considered to not exist for the purposes of updating the 1179 * parent's blockref array. 1180 * 1181 * NOTE! Updates to a parent's blockref table do not adjust the 1182 * parent's bref.modify_tid, only its bref.mirror_tid. 1183 */ 1184 if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { 1185 /* 1186 * Deleting. Only adjust the block array if it contains 1187 * the child's entry (child's REPLACE flag is set). Clear 1188 * the child's REPLACE flag only once all possible parent's 1189 * have been updated. 1190 */ 1191 ok = 1; 1192 if (base && (child->flags & HAMMER2_CHAIN_REPLACE)) { 1193 hammer2_rollup_stats(parent, child, -1); 1194 spin_lock(&above->cst.spin); 1195 hammer2_base_delete(parent, base, count, 1196 &info->cache_index, &child->bref); 1197 if (TAILQ_NEXT(parent, core_entry) == NULL) { 1198 atomic_clear_int(&child->flags, 1199 HAMMER2_CHAIN_REPLACE); 1200 } 1201 spin_unlock(&above->cst.spin); 1202 } 1203 if (info->mirror_tid < child->delete_tid) 1204 info->mirror_tid = child->delete_tid; 1205 } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { 1206 /* 1207 * Inserting. Only set the child's REPLACE flag indicating 1208 * that the parent's blockref array entry is valid once all 1209 * possible parent's have been updated. 1210 */ 1211 ok = 1; 1212 if (base) { 1213 if (child->flags & HAMMER2_CHAIN_REPLACE) 1214 hammer2_rollup_stats(parent, child, 0); 1215 else 1216 hammer2_rollup_stats(parent, child, 1); 1217 spin_lock(&above->cst.spin); 1218 hammer2_base_insert(parent, base, count, 1219 &info->cache_index, &child->bref, 1220 child->flags); 1221 if (TAILQ_NEXT(parent, core_entry) == NULL) { 1222 atomic_set_int(&child->flags, 1223 HAMMER2_CHAIN_REPLACE); 1224 } 1225 spin_unlock(&above->cst.spin); 1226 } 1227 if (info->mirror_tid < child->modify_tid) 1228 info->mirror_tid = child->modify_tid; 1229 } else { 1230 ok = 0; 1231 } 1232 1233 if (info->mirror_tid < child->bref.mirror_tid) { 1234 info->mirror_tid = child->bref.mirror_tid; 1235 } 1236 if ((parent->bref.type == HAMMER2_BREF_TYPE_VOLUME || 1237 parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP) && 1238 hmp->voldata.mirror_tid < child->bref.mirror_tid) { 1239 hmp->voldata.mirror_tid = child->bref.mirror_tid; 1240 } 1241 1242 /* 1243 * Only clear MOVED once all possible parents have been flushed. 1244 * 1245 * When can we safely clear the MOVED flag? Flushes down duplicate 1246 * paths can occur out of order, for example if an inode is moved 1247 * as part of a hardlink consolidation or if an inode is moved into 1248 * an indirect block indexed before the inode. 1249 */ 1250 if (ok && (child->flags & HAMMER2_CHAIN_MOVED)) { 1251 hammer2_chain_t *scan; 1252 int ok = 1; 1253 1254 spin_lock(&above->cst.spin); 1255 TAILQ_FOREACH(scan, &above->ownerq, core_entry) { 1256 /* 1257 * XXX weird code also checked at the top of scan2, 1258 * I would like to fix this by detaching the core 1259 * on initial hardlink consolidation (1->2 nlinks). 1260 */ 1261 #if 0 1262 if (scan->bref.type == HAMMER2_BREF_TYPE_INODE && 1263 (scan->data->ipdata.op_flags & 1264 HAMMER2_OPFLAG_DIRECTDATA)) { 1265 continue; 1266 } 1267 #endif 1268 if (scan->flags & HAMMER2_CHAIN_SUBMODIFIED) { 1269 ok = 0; 1270 break; 1271 } 1272 } 1273 spin_unlock(&above->cst.spin); 1274 if (ok) { 1275 atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 1276 hammer2_chain_drop(child); /* flag */ 1277 } 1278 } 1279 1280 /* 1281 * Unlock the child. This can wind up dropping the child's 1282 * last ref, removing it from the parent's RB tree, and deallocating 1283 * the structure. The RB_SCAN() our caller is doing handles the 1284 * situation. 1285 */ 1286 hammer2_chain_unlock(child); 1287 hammer2_chain_drop(child); 1288 spin_lock(&above->cst.spin); 1289 1290 /* 1291 * The parent cleared SUBMODIFIED prior to the scan. If the child 1292 * still requires a flush (possibly due to being outside the current 1293 * synchronization zone), we must re-set SUBMODIFIED on the way back 1294 * up. 1295 */ 1296 finalize: 1297 return (0); 1298 } 1299 1300 static 1301 void 1302 hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) 1303 { 1304 #if 0 1305 hammer2_chain_t *grandp; 1306 #endif 1307 1308 parent->data_count += child->data_count; 1309 parent->inode_count += child->inode_count; 1310 child->data_count = 0; 1311 child->inode_count = 0; 1312 if (how < 0) { 1313 parent->data_count -= child->bytes; 1314 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 1315 parent->inode_count -= 1; 1316 #if 0 1317 /* XXX child->data may be NULL atm */ 1318 parent->data_count -= child->data->ipdata.data_count; 1319 parent->inode_count -= child->data->ipdata.inode_count; 1320 #endif 1321 } 1322 } else if (how > 0) { 1323 parent->data_count += child->bytes; 1324 if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 1325 parent->inode_count += 1; 1326 #if 0 1327 /* XXX child->data may be NULL atm */ 1328 parent->data_count += child->data->ipdata.data_count; 1329 parent->inode_count += child->data->ipdata.inode_count; 1330 #endif 1331 } 1332 } 1333 if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 1334 parent->data->ipdata.data_count += parent->data_count; 1335 parent->data->ipdata.inode_count += parent->inode_count; 1336 #if 0 1337 for (grandp = parent->above->first_parent; 1338 grandp; 1339 grandp = grandp->next_parent) { 1340 grandp->data_count += parent->data_count; 1341 grandp->inode_count += parent->inode_count; 1342 } 1343 #endif 1344 parent->data_count = 0; 1345 parent->inode_count = 0; 1346 } 1347 } 1348