132b800e6SMatthew Dillon /* 20dea3156SMatthew Dillon * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3532b800e6SMatthew Dillon 3632b800e6SMatthew Dillon #include <sys/cdefs.h> 3732b800e6SMatthew Dillon #include <sys/param.h> 3832b800e6SMatthew Dillon #include <sys/systm.h> 3932b800e6SMatthew Dillon #include <sys/types.h> 4032b800e6SMatthew Dillon #include <sys/lock.h> 4132b800e6SMatthew Dillon #include <sys/uuid.h> 4232b800e6SMatthew Dillon 4332b800e6SMatthew Dillon #include "hammer2.h" 4432b800e6SMatthew Dillon 4532b800e6SMatthew Dillon /* 4632b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 4732b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 4832b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 4932b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 5032b800e6SMatthew Dillon * processes. 5132b800e6SMatthew Dillon */ 5232b800e6SMatthew Dillon struct hammer2_flush_info { 530dea3156SMatthew Dillon hammer2_chain_t *parent; 540dea3156SMatthew Dillon hammer2_trans_t *trans; 5532b800e6SMatthew Dillon int depth; 560dea3156SMatthew Dillon int diddeferral; 571897c66eSMatthew Dillon int pass; 581897c66eSMatthew Dillon int cache_index; 59a4dc31e0SMatthew Dillon int domodify; 601897c66eSMatthew Dillon struct h2_flush_deferral_list flush_list; 610dea3156SMatthew Dillon hammer2_tid_t sync_tid; /* flush synchronization point */ 620dea3156SMatthew Dillon hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 6332b800e6SMatthew Dillon }; 6432b800e6SMatthew Dillon 6532b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 6632b800e6SMatthew Dillon 670dea3156SMatthew Dillon static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 68a7720be7SMatthew Dillon hammer2_chain_t **chainp); 690dea3156SMatthew Dillon static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 700dea3156SMatthew Dillon static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 7191abd410SMatthew Dillon static void hammer2_rollup_stats(hammer2_chain_t *parent, 7291abd410SMatthew Dillon hammer2_chain_t *child, int how); 7332b800e6SMatthew Dillon 7493f3933aSMatthew Dillon #if 0 7593f3933aSMatthew Dillon static __inline 7693f3933aSMatthew Dillon void 7793f3933aSMatthew Dillon hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 7893f3933aSMatthew Dillon int how) 7993f3933aSMatthew Dillon { 8093f3933aSMatthew Dillon hammer2_key_t bytes; 8193f3933aSMatthew Dillon 8293f3933aSMatthew Dillon if (bref->type != 0) { 8393f3933aSMatthew Dillon bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 8493f3933aSMatthew Dillon if (bref->type == HAMMER2_BREF_TYPE_INODE) 8593f3933aSMatthew Dillon info->inode_count += how; 8693f3933aSMatthew Dillon if (how < 0) 8793f3933aSMatthew Dillon info->data_count -= bytes; 8893f3933aSMatthew Dillon else 8993f3933aSMatthew Dillon info->data_count += bytes; 9093f3933aSMatthew Dillon } 9193f3933aSMatthew Dillon } 9293f3933aSMatthew Dillon #endif 9393f3933aSMatthew Dillon 9432b800e6SMatthew Dillon /* 950dea3156SMatthew Dillon * Transaction support functions for writing to the filesystem. 960dea3156SMatthew Dillon * 970dea3156SMatthew Dillon * Initializing a new transaction allocates a transaction ID. We 980dea3156SMatthew Dillon * don't bother marking the volume header MODIFIED. Instead, the volume 99a02dfba1SMatthew Dillon * will be synchronized at a later time as part of a larger flush sequence. 1000dea3156SMatthew Dillon * 101d001f460SMatthew Dillon * Non-flush transactions can typically run concurrently. However if 102d001f460SMatthew Dillon * there are non-flush transaction both before AND after a flush trans, 103d001f460SMatthew Dillon * the transactions after stall until the ones before finish. 104d001f460SMatthew Dillon * 105d001f460SMatthew Dillon * Non-flush transactions occuring after a flush pointer can run concurrently 106d001f460SMatthew Dillon * with that flush. They only have to wait for transactions prior to the 107d001f460SMatthew Dillon * flush trans to complete before they unstall. 108d001f460SMatthew Dillon * 109355d67fcSMatthew Dillon * WARNING! Transaction ids are only allocated when the transaction becomes 110355d67fcSMatthew Dillon * active, which allows other transactions to insert ahead of us 111355d67fcSMatthew Dillon * if we are forced to block (only bioq transactions do that). 112355d67fcSMatthew Dillon * 1130dea3156SMatthew Dillon * WARNING! Modifications to the root volume cannot dup the root volume 1140dea3156SMatthew Dillon * header to handle synchronization points, so alloc_tid can 1150dea3156SMatthew Dillon * wind up (harmlessly) more advanced on flush. 1160dea3156SMatthew Dillon */ 1170dea3156SMatthew Dillon void 118a5913bdfSMatthew Dillon hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags) 1190dea3156SMatthew Dillon { 120a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 121a4dc31e0SMatthew Dillon hammer2_trans_t *head; 122d001f460SMatthew Dillon 1230dea3156SMatthew Dillon bzero(trans, sizeof(*trans)); 124a5913bdfSMatthew Dillon trans->pmp = pmp; 1257bed8d7eSMatthew Dillon hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 126d001f460SMatthew Dillon 1270dea3156SMatthew Dillon hammer2_voldata_lock(hmp); 128d001f460SMatthew Dillon trans->flags = flags; 129d001f460SMatthew Dillon trans->td = curthread; 1301897c66eSMatthew Dillon /*trans->delete_gen = 0;*/ /* multiple deletions within trans */ 131d001f460SMatthew Dillon 132d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 133d001f460SMatthew Dillon /* 134355d67fcSMatthew Dillon * If multiple flushes are trying to run we have to 135a4dc31e0SMatthew Dillon * wait until it is our turn. All flushes are serialized. 136355d67fcSMatthew Dillon * 137a4dc31e0SMatthew Dillon * We queue ourselves and then wait to become the head 138a4dc31e0SMatthew Dillon * of the queue, allowing all prior flushes to complete. 139355d67fcSMatthew Dillon */ 140355d67fcSMatthew Dillon ++hmp->flushcnt; 141355d67fcSMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid++; 142a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 143355d67fcSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 144a4dc31e0SMatthew Dillon if (TAILQ_FIRST(&hmp->transq) != trans) { 145d001f460SMatthew Dillon trans->blocked = 1; 146d001f460SMatthew Dillon while (trans->blocked) { 147a4dc31e0SMatthew Dillon lksleep(&trans->sync_tid, &hmp->voldatalk, 148a4dc31e0SMatthew Dillon 0, "h2multf", hz); 149d001f460SMatthew Dillon } 150d001f460SMatthew Dillon } 151a4dc31e0SMatthew Dillon } else if (hmp->flushcnt == 0) { 152a7720be7SMatthew Dillon /* 153a4dc31e0SMatthew Dillon * No flushes are pending, we can go. 154a7720be7SMatthew Dillon */ 155a4dc31e0SMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 156a4dc31e0SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid; 157a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 158a7720be7SMatthew Dillon 159a4dc31e0SMatthew Dillon /* XXX improve/optimize inode allocation */ 160a4dc31e0SMatthew Dillon } else { 161a4dc31e0SMatthew Dillon /* 162a4dc31e0SMatthew Dillon * One or more flushes are pending. We insert after 163a4dc31e0SMatthew Dillon * the current flush and may block. We have priority 164a4dc31e0SMatthew Dillon * over any flushes that are not the current flush. 165a4dc31e0SMatthew Dillon * 166a4dc31e0SMatthew Dillon * TRANS_BUFCACHE transactions cannot block. 167a4dc31e0SMatthew Dillon */ 168a4dc31e0SMatthew Dillon TAILQ_FOREACH(head, &hmp->transq, entry) { 169a4dc31e0SMatthew Dillon if (head->flags & HAMMER2_TRANS_ISFLUSH) 170a4dc31e0SMatthew Dillon break; 171a7720be7SMatthew Dillon } 172a4dc31e0SMatthew Dillon KKASSERT(head); 173a4dc31e0SMatthew Dillon TAILQ_INSERT_AFTER(&hmp->transq, head, trans, entry); 174a4dc31e0SMatthew Dillon trans->sync_tid = head->real_tid + 1; 175a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 176a4dc31e0SMatthew Dillon 177a4dc31e0SMatthew Dillon if ((trans->flags & HAMMER2_TRANS_BUFCACHE) == 0) { 178a4dc31e0SMatthew Dillon if (TAILQ_FIRST(&hmp->transq) != head) { 179a4dc31e0SMatthew Dillon trans->blocked = 1; 180a4dc31e0SMatthew Dillon while (trans->blocked) { 181a4dc31e0SMatthew Dillon lksleep(&trans->sync_tid, 182a4dc31e0SMatthew Dillon &hmp->voldatalk, 0, 183a4dc31e0SMatthew Dillon "h2multf", hz); 184a4dc31e0SMatthew Dillon } 185a4dc31e0SMatthew Dillon } 186a4dc31e0SMatthew Dillon } 187a4dc31e0SMatthew Dillon } 188a4dc31e0SMatthew Dillon if (flags & HAMMER2_TRANS_NEWINODE) 189a4dc31e0SMatthew Dillon trans->inode_tid = hmp->voldata.inode_tid++; 190a7720be7SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 191a7720be7SMatthew Dillon } 192a7720be7SMatthew Dillon 1930dea3156SMatthew Dillon void 1940dea3156SMatthew Dillon hammer2_trans_done(hammer2_trans_t *trans) 1950dea3156SMatthew Dillon { 196a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 197a4dc31e0SMatthew Dillon hammer2_trans_t *head; 198d001f460SMatthew Dillon hammer2_trans_t *scan; 199a02dfba1SMatthew Dillon 2007bed8d7eSMatthew Dillon hmp = trans->pmp->cluster.chains[0]->hmp; 201a5913bdfSMatthew Dillon 202a4dc31e0SMatthew Dillon /* 203a4dc31e0SMatthew Dillon * Remove and adjust flushcnt 204a4dc31e0SMatthew Dillon */ 205a02dfba1SMatthew Dillon hammer2_voldata_lock(hmp); 206d001f460SMatthew Dillon TAILQ_REMOVE(&hmp->transq, trans, entry); 207a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 208d001f460SMatthew Dillon --hmp->flushcnt; 209a4dc31e0SMatthew Dillon 210355d67fcSMatthew Dillon /* 211a4dc31e0SMatthew Dillon * Unblock the head of the queue and any additional transactions 212a4dc31e0SMatthew Dillon * up to the next flush. 213355d67fcSMatthew Dillon */ 214a4dc31e0SMatthew Dillon head = TAILQ_FIRST(&hmp->transq); 215a4dc31e0SMatthew Dillon if (head && head->blocked) { 216a4dc31e0SMatthew Dillon head->blocked = 0; 217a4dc31e0SMatthew Dillon wakeup(&head->sync_tid); 218a4dc31e0SMatthew Dillon 219a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(head, entry); 220a4dc31e0SMatthew Dillon while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { 221a4dc31e0SMatthew Dillon scan->blocked = 0; 222d001f460SMatthew Dillon wakeup(&scan->sync_tid); 223a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(scan, entry); 224a02dfba1SMatthew Dillon } 225a02dfba1SMatthew Dillon } 226a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 227a02dfba1SMatthew Dillon } 228a02dfba1SMatthew Dillon 2290dea3156SMatthew Dillon /* 2300dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 2310dea3156SMatthew Dillon * synchronization point (sync_tid), propagating parent chain modifications 2320dea3156SMatthew Dillon * and mirror_tid updates back up as needed. Since we are recursing downward 2330dea3156SMatthew Dillon * we do not have to deal with the complexities of multi-homed chains (chains 2340dea3156SMatthew Dillon * with multiple parents). 2350dea3156SMatthew Dillon * 2360dea3156SMatthew Dillon * Caller must have interlocked against any non-flush-related modifying 2370dea3156SMatthew Dillon * operations in progress whos modify_tid values are less than or equal 2380dea3156SMatthew Dillon * to the passed sync_tid. 2390dea3156SMatthew Dillon * 2400dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 2410dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 2420dea3156SMatthew Dillon * these sorts of synchronization points. 2430dea3156SMatthew Dillon * 24432b800e6SMatthew Dillon * This routine can be called from several places but the most important 24532b800e6SMatthew Dillon * is from the hammer2_vop_reclaim() function. We want to try to completely 24632b800e6SMatthew Dillon * clean out the inode structure to prevent disconnected inodes from 2470dea3156SMatthew Dillon * building up and blowing out the kmalloc pool. However, it is not actually 2480dea3156SMatthew Dillon * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 2490dea3156SMatthew Dillon * capability. 25032b800e6SMatthew Dillon * 2510dea3156SMatthew Dillon * chain is locked on call and will remain locked on return. If a flush 2520dea3156SMatthew Dillon * occured, the chain's MOVED bit will be set indicating that its parent 253*053e752cSMatthew Dillon * (which is not part of the flush) should be updated. The chain may be 254*053e752cSMatthew Dillon * replaced by the call. 25532b800e6SMatthew Dillon */ 25632b800e6SMatthew Dillon void 257a7720be7SMatthew Dillon hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) 25832b800e6SMatthew Dillon { 259a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 26032b800e6SMatthew Dillon hammer2_chain_t *scan; 261731b2a84SMatthew Dillon hammer2_chain_core_t *core; 26232b800e6SMatthew Dillon hammer2_flush_info_t info; 26332b800e6SMatthew Dillon 26432b800e6SMatthew Dillon /* 26532b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 26632b800e6SMatthew Dillon * 26732b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 26832b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 26932b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 27032b800e6SMatthew Dillon * for re-execution after the stack has been popped. 27132b800e6SMatthew Dillon */ 27232b800e6SMatthew Dillon bzero(&info, sizeof(info)); 27332b800e6SMatthew Dillon TAILQ_INIT(&info.flush_list); 2740dea3156SMatthew Dillon info.trans = trans; 2750dea3156SMatthew Dillon info.sync_tid = trans->sync_tid; 2760dea3156SMatthew Dillon info.mirror_tid = 0; 2771897c66eSMatthew Dillon info.cache_index = -1; 27832b800e6SMatthew Dillon 279731b2a84SMatthew Dillon core = chain->core; 280a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 281a4dc31e0SMatthew Dillon kprintf("CHAIN FLUSH trans %p.%016jx chain %p.%d mod %016jx upd %016jx\n", trans, trans->sync_tid, chain, chain->bref.type, chain->modify_tid, core->update_tid); 282a4dc31e0SMatthew Dillon #endif 283731b2a84SMatthew Dillon 284a7720be7SMatthew Dillon /* 285a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 286a7720be7SMatthew Dillon * chain. 287a7720be7SMatthew Dillon */ 288a7720be7SMatthew Dillon hammer2_chain_ref(chain); 289a7720be7SMatthew Dillon 2900dea3156SMatthew Dillon for (;;) { 29132b800e6SMatthew Dillon /* 2920dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 2930dea3156SMatthew Dillon * can leave MOVED set for these chains, which will be 2940dea3156SMatthew Dillon * handled when we [re]flush chain after the unwind. 29532b800e6SMatthew Dillon */ 29632b800e6SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 29732b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 29832b800e6SMatthew Dillon TAILQ_REMOVE(&info.flush_list, scan, flush_node); 29932b800e6SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 30032b800e6SMatthew Dillon 30132b800e6SMatthew Dillon /* 30232b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 30332b800e6SMatthew Dillon * recursion on the deferred elements. 304*053e752cSMatthew Dillon * 305*053e752cSMatthew Dillon * NOTE: hammer2_chain_flush() may replace scan. 30632b800e6SMatthew Dillon */ 30732b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 308*053e752cSMatthew Dillon kprintf("deferred flush %p\n", scan); 3090dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 310*053e752cSMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 311a7720be7SMatthew Dillon hammer2_chain_flush(trans, &scan); 3120dea3156SMatthew Dillon hammer2_chain_unlock(scan); 31332b800e6SMatthew Dillon } 31432b800e6SMatthew Dillon 31532b800e6SMatthew Dillon /* 3168853dfb5SMatthew Dillon * Flush pass1 on root. 31732b800e6SMatthew Dillon */ 3180dea3156SMatthew Dillon info.diddeferral = 0; 319a7720be7SMatthew Dillon hammer2_chain_flush_core(&info, &chain); 3209797e933SMatthew Dillon #if FLUSH_DEBUG 3219797e933SMatthew Dillon kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 3229797e933SMatthew Dillon chain, chain->bref.type, chain->flags); 3239797e933SMatthew Dillon #endif 32432b800e6SMatthew Dillon 32532b800e6SMatthew Dillon /* 3260dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 32732b800e6SMatthew Dillon */ 3280dea3156SMatthew Dillon if (TAILQ_EMPTY(&info.flush_list)) 32932b800e6SMatthew Dillon break; 33032b800e6SMatthew Dillon } 331a7720be7SMatthew Dillon hammer2_chain_drop(chain); 332a7720be7SMatthew Dillon *chainp = chain; 33332b800e6SMatthew Dillon } 33432b800e6SMatthew Dillon 335476d2aadSMatthew Dillon /* 336ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 337a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 338a7720be7SMatthew Dillon * locked and will have an extra ref on return. 339a7720be7SMatthew Dillon * 340a7720be7SMatthew Dillon * This function is keyed off of the update_tid bit but must make 341a7720be7SMatthew Dillon * fine-grained choices based on the synchronization point we are flushing to. 3420dea3156SMatthew Dillon * 3430dea3156SMatthew Dillon * If the flush accomplished any work chain will be flagged MOVED 3440dea3156SMatthew Dillon * indicating a copy-on-write propagation back up is required. 3450dea3156SMatthew Dillon * Deep sub-nodes may also have been entered onto the deferral list. 3460dea3156SMatthew Dillon * MOVED is never set on the volume root. 3470dea3156SMatthew Dillon * 3480dea3156SMatthew Dillon * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 3490dea3156SMatthew Dillon * only when a chain is specifically modified, and not updated 3500dea3156SMatthew Dillon * for copy-on-write propagations. MODIFIED is set on any modification 3510dea3156SMatthew Dillon * including copy-on-write propagations. 352476d2aadSMatthew Dillon */ 35332b800e6SMatthew Dillon static void 354a7720be7SMatthew Dillon hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp) 35532b800e6SMatthew Dillon { 356a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 3570dea3156SMatthew Dillon hammer2_mount_t *hmp; 35832b800e6SMatthew Dillon hammer2_blockref_t *bref; 35932b800e6SMatthew Dillon hammer2_off_t pbase; 360a98aa0b0SMatthew Dillon hammer2_off_t pmask; 3619b6b3df4SMatthew Dillon #if 0 362a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 3639b6b3df4SMatthew Dillon #endif 364731b2a84SMatthew Dillon hammer2_chain_core_t *core; 365a98aa0b0SMatthew Dillon size_t psize; 36632b800e6SMatthew Dillon size_t boff; 36732b800e6SMatthew Dillon char *bdata; 36832b800e6SMatthew Dillon struct buf *bp; 36932b800e6SMatthew Dillon int error; 37032b800e6SMatthew Dillon int wasmodified; 3710dea3156SMatthew Dillon int diddeferral = 0; 37232b800e6SMatthew Dillon 373a5913bdfSMatthew Dillon hmp = chain->hmp; 37432b800e6SMatthew Dillon 3759797e933SMatthew Dillon #if FLUSH_DEBUG 3769797e933SMatthew Dillon if (info->parent) 3779797e933SMatthew Dillon kprintf("flush_core %p->%p.%d %08x (%s)\n", 3789797e933SMatthew Dillon info->parent, chain, chain->bref.type, 3799797e933SMatthew Dillon chain->flags, 3809797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 3819797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 3829797e933SMatthew Dillon else 3839797e933SMatthew Dillon kprintf("flush_core NULL->%p.%d %08x (%s)\n", 3849797e933SMatthew Dillon chain, chain->bref.type, 3859797e933SMatthew Dillon chain->flags, 3869797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 3879797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 388a4dc31e0SMatthew Dillon kprintf("PUSH %p.%d %08x mod=%016jx del=%016jx mirror=%016jx\n", chain, chain->bref.type, chain->flags, chain->modify_tid, chain->delete_tid, chain->bref.mirror_tid); 3899797e933SMatthew Dillon #endif 390a4dc31e0SMatthew Dillon 39132b800e6SMatthew Dillon /* 392731b2a84SMatthew Dillon * Ignore chains modified beyond the current flush point. These 393a4dc31e0SMatthew Dillon * will be treated as if they did not exist. Subchains with lower 394a4dc31e0SMatthew Dillon * modify_tid's will still be accessible via other parents. 395a4dc31e0SMatthew Dillon * 396a4dc31e0SMatthew Dillon * (vchain and fchain are exceptions since they cannot be duplicated) 397ea155208SMatthew Dillon */ 398a4dc31e0SMatthew Dillon if (chain->modify_tid > info->sync_tid && 399a4dc31e0SMatthew Dillon chain != &hmp->fchain && chain != &hmp->vchain) { 400a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 5; 401ea155208SMatthew Dillon return; 402a4dc31e0SMatthew Dillon } 403731b2a84SMatthew Dillon 404731b2a84SMatthew Dillon core = chain->core; 405ea155208SMatthew Dillon 406ea155208SMatthew Dillon /* 407a7720be7SMatthew Dillon * If update_tid triggers we recurse the flush and adjust the 40832b800e6SMatthew Dillon * blockrefs accordingly. 40932b800e6SMatthew Dillon * 410a7720be7SMatthew Dillon * NOTE: Looping on update_tid can prevent a flush from ever 41132b800e6SMatthew Dillon * finishing in the face of filesystem activity. 412a7720be7SMatthew Dillon * 413a7720be7SMatthew Dillon * NOTE: We must recurse whether chain is flagged DELETED or not. 414a7720be7SMatthew Dillon * However, if it is flagged DELETED we limit sync_tid to 415a7720be7SMatthew Dillon * delete_tid to ensure that the chain's bref.mirror_tid is 416a7720be7SMatthew Dillon * not fully updated and causes it to miss the non-DELETED 417a7720be7SMatthew Dillon * path. 41832b800e6SMatthew Dillon */ 419a4dc31e0SMatthew Dillon if (chain->bref.mirror_tid < core->update_tid && 420a4dc31e0SMatthew Dillon chain->bref.mirror_tid < info->sync_tid) { 4210dea3156SMatthew Dillon hammer2_chain_t *saved_parent; 422ea155208SMatthew Dillon hammer2_tid_t saved_mirror; 4231897c66eSMatthew Dillon hammer2_chain_layer_t *layer; 424a4dc31e0SMatthew Dillon int saved_domodify; 425a4dc31e0SMatthew Dillon int save_gen; 42632b800e6SMatthew Dillon 42732b800e6SMatthew Dillon /* 428a7720be7SMatthew Dillon * Races will bump update_tid above trans->sync_tid causing 429a7720be7SMatthew Dillon * us to catch the issue in a later flush. We do not update 430a7720be7SMatthew Dillon * update_tid if a deferral (or error XXX) occurs. 43132b800e6SMatthew Dillon * 43232b800e6SMatthew Dillon * We don't want to set our chain to MODIFIED gratuitously. 43332b800e6SMatthew Dillon * 43432b800e6SMatthew Dillon * We need an extra ref on chain because we are going to 43532b800e6SMatthew Dillon * release its lock temporarily in our child loop. 43632b800e6SMatthew Dillon */ 43732b800e6SMatthew Dillon 43832b800e6SMatthew Dillon /* 4390dea3156SMatthew Dillon * Run two passes. The first pass handles MODIFIED and 440a7720be7SMatthew Dillon * update_tid recursions while the second pass handles 441a7720be7SMatthew Dillon * MOVED chains on the way back up. 44232b800e6SMatthew Dillon * 4430dea3156SMatthew Dillon * If the stack gets too deep we defer scan1, but must 4440dea3156SMatthew Dillon * be sure to still run scan2 if on the next loop the 4450dea3156SMatthew Dillon * deferred chain has been flushed and now needs MOVED 4460dea3156SMatthew Dillon * handling on the way back up. 44732b800e6SMatthew Dillon * 4480dea3156SMatthew Dillon * Scan1 is recursive. 44932b800e6SMatthew Dillon * 4500dea3156SMatthew Dillon * NOTE: The act of handling a modified/submodified chain can 4510dea3156SMatthew Dillon * cause the MOVED Flag to be set. It can also be set 4520dea3156SMatthew Dillon * via hammer2_chain_delete() and in other situations. 4530dea3156SMatthew Dillon * 4540dea3156SMatthew Dillon * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 4550dea3156SMatthew Dillon * because children can be physically removed during 4560dea3156SMatthew Dillon * the scan. 457a4dc31e0SMatthew Dillon * 458a4dc31e0SMatthew Dillon * NOTE: We would normally not care about insertions except 459a4dc31e0SMatthew Dillon * that some insertions might occur from the flush 460a4dc31e0SMatthew Dillon * itself, so loop on generation number changes. 46132b800e6SMatthew Dillon */ 4620dea3156SMatthew Dillon saved_parent = info->parent; 463ea155208SMatthew Dillon saved_mirror = info->mirror_tid; 464a4dc31e0SMatthew Dillon saved_domodify = info->domodify; 4650dea3156SMatthew Dillon info->parent = chain; 466ea155208SMatthew Dillon info->mirror_tid = chain->bref.mirror_tid; 467a4dc31e0SMatthew Dillon info->domodify = 0; 468a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 6; 46932b800e6SMatthew Dillon 4700dea3156SMatthew Dillon if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 4710dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 4720dea3156SMatthew Dillon hammer2_chain_ref(chain); 4730dea3156SMatthew Dillon TAILQ_INSERT_TAIL(&info->flush_list, 4740dea3156SMatthew Dillon chain, flush_node); 4750dea3156SMatthew Dillon atomic_set_int(&chain->flags, 4760dea3156SMatthew Dillon HAMMER2_CHAIN_DEFERRED); 4770dea3156SMatthew Dillon } 4780dea3156SMatthew Dillon diddeferral = 1; 47932b800e6SMatthew Dillon } else { 4800dea3156SMatthew Dillon info->diddeferral = 0; 481731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 48251a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 483a4dc31e0SMatthew Dillon do { 484a4dc31e0SMatthew Dillon save_gen = core->generation; 4851897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 4861897c66eSMatthew Dillon h2_layer_list, entry) { 4871897c66eSMatthew Dillon ++layer->refs; 48851a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 489a4dc31e0SMatthew Dillon RB_SCAN(hammer2_chain_tree, 490a4dc31e0SMatthew Dillon &layer->rbtree, 491a4dc31e0SMatthew Dillon NULL, hammer2_chain_flush_scan1, 492a4dc31e0SMatthew Dillon info); 4931897c66eSMatthew Dillon --layer->refs; 4940dea3156SMatthew Dillon diddeferral += info->diddeferral; 49532b800e6SMatthew Dillon } 496a4dc31e0SMatthew Dillon } while (core->generation != save_gen); 4971897c66eSMatthew Dillon spin_unlock(&core->cst.spin); 4981897c66eSMatthew Dillon } 49932b800e6SMatthew Dillon 500a4dc31e0SMatthew Dillon /* 501a4dc31e0SMatthew Dillon * Blockrefs are only updated on live chains. 502a4dc31e0SMatthew Dillon * 503a4dc31e0SMatthew Dillon * We are possibly causing a delete-duplicate from inside the 504a4dc31e0SMatthew Dillon * flush itself. The parent might be live or might have been 505a4dc31e0SMatthew Dillon * deleted concurrently in a post-flush transaction. If 506a4dc31e0SMatthew Dillon * the parent was deleted our modified chain will also be 507a4dc31e0SMatthew Dillon * marked deleted, but since it inherits the parent's 508a4dc31e0SMatthew Dillon * delete_tid it will still appear to be 'live' for the 509a4dc31e0SMatthew Dillon * purposes of the flush. 510a4dc31e0SMatthew Dillon * 511a4dc31e0SMatthew Dillon * There may also be a side-effect due to the freemap 512a4dc31e0SMatthew Dillon * allocation. See freemap_alloc() 513a4dc31e0SMatthew Dillon */ 514a4dc31e0SMatthew Dillon if (info->domodify && chain->delete_tid > info->sync_tid) { 515a4dc31e0SMatthew Dillon hammer2_chain_modify(info->trans, &info->parent, 516a4dc31e0SMatthew Dillon HAMMER2_MODIFY_NO_MODIFY_TID); 517a4dc31e0SMatthew Dillon if (info->parent != chain) { 518a4dc31e0SMatthew Dillon hammer2_chain_drop(chain); 519a4dc31e0SMatthew Dillon hammer2_chain_ref(info->parent); 520a4dc31e0SMatthew Dillon } 521a4dc31e0SMatthew Dillon chain = info->parent; 522a4dc31e0SMatthew Dillon } 523a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 7; 524a7720be7SMatthew Dillon 52532b800e6SMatthew Dillon /* 5260dea3156SMatthew Dillon * Handle successfully flushed children who are in the MOVED 5270dea3156SMatthew Dillon * state on the way back up the recursion. This can have 5280dea3156SMatthew Dillon * the side-effect of clearing MOVED. 52932b800e6SMatthew Dillon * 530a7720be7SMatthew Dillon * Scan2 may replace info->parent. If it does it will also 531a7720be7SMatthew Dillon * replace the extra ref we made. 532a7720be7SMatthew Dillon * 5330dea3156SMatthew Dillon * Scan2 is non-recursive. 53432b800e6SMatthew Dillon */ 535cd189b1eSMatthew Dillon if (diddeferral) { 536731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 5371897c66eSMatthew Dillon } else { 5381897c66eSMatthew Dillon spin_lock(&core->cst.spin); 53951a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 5401897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 5411897c66eSMatthew Dillon h2_layer_list, entry) { 5421897c66eSMatthew Dillon info->pass = 1; 5431897c66eSMatthew Dillon ++layer->refs; 54451a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 5451897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 5460dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 5471897c66eSMatthew Dillon info->pass = 2; 5481897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 5491897c66eSMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 5501897c66eSMatthew Dillon --layer->refs; 551a7720be7SMatthew Dillon KKASSERT(info->parent->core == core); 55232b800e6SMatthew Dillon } 5530dea3156SMatthew Dillon 5540dea3156SMatthew Dillon /* 555a7720be7SMatthew Dillon * Mirror_tid propagates all changes. It is also used 556a7720be7SMatthew Dillon * in scan2 to determine when a chain must be applied 557a7720be7SMatthew Dillon * to the related block table. 558ea155208SMatthew Dillon */ 559a7720be7SMatthew Dillon KKASSERT(info->parent->bref.mirror_tid <= 560a7720be7SMatthew Dillon info->mirror_tid); 561a4dc31e0SMatthew Dillon chain->bref.mirror_tid = info->mirror_tid; 562a7720be7SMatthew Dillon } 563a7720be7SMatthew Dillon 564a7720be7SMatthew Dillon /* 565a4dc31e0SMatthew Dillon * info->parent must not have been replaced again 566a7720be7SMatthew Dillon */ 567a4dc31e0SMatthew Dillon KKASSERT(info->parent == chain); 568a4dc31e0SMatthew Dillon 569a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 8; 570a7720be7SMatthew Dillon *chainp = chain; 571a7720be7SMatthew Dillon 572a7720be7SMatthew Dillon hammer2_chain_layer_check_locked(chain->hmp, core); 573a7720be7SMatthew Dillon spin_unlock(&core->cst.spin); 574a7720be7SMatthew Dillon 575a7720be7SMatthew Dillon info->mirror_tid = saved_mirror; 576a7720be7SMatthew Dillon info->parent = saved_parent; 577a4dc31e0SMatthew Dillon info->domodify = saved_domodify; 578a7720be7SMatthew Dillon KKASSERT(chain->refs > 1); 579a7720be7SMatthew Dillon } 580a7720be7SMatthew Dillon 581a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 582a7720be7SMatthew Dillon kprintf("POP %p.%d\n", chain, chain->bref.type); 583a7720be7SMatthew Dillon #endif 584ea155208SMatthew Dillon 585ea155208SMatthew Dillon /* 5860dea3156SMatthew Dillon * Rollup diddeferral for caller. Note direct assignment, not +=. 5870dea3156SMatthew Dillon */ 5880dea3156SMatthew Dillon info->diddeferral = diddeferral; 5890dea3156SMatthew Dillon 5900dea3156SMatthew Dillon /* 5910dea3156SMatthew Dillon * Do not flush chain if there were any deferrals. It will be 5920dea3156SMatthew Dillon * retried later after the deferrals are independently handled. 5930dea3156SMatthew Dillon */ 5940dea3156SMatthew Dillon if (diddeferral) { 595a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 99; 5960dea3156SMatthew Dillon if (hammer2_debug & 0x0008) { 5970dea3156SMatthew Dillon kprintf("%*.*s} %p/%d %04x (deferred)", 5980dea3156SMatthew Dillon info->depth, info->depth, "", 5990dea3156SMatthew Dillon chain, chain->refs, chain->flags); 60032b800e6SMatthew Dillon } 60132b800e6SMatthew Dillon return; 60232b800e6SMatthew Dillon } 60332b800e6SMatthew Dillon 60432b800e6SMatthew Dillon /* 605731b2a84SMatthew Dillon * If we encounter a deleted chain within our flush we can clear 606731b2a84SMatthew Dillon * the MODIFIED bit and avoid flushing it whether it has been 60793f3933aSMatthew Dillon * destroyed or not. We must make sure that the chain is flagged 60893f3933aSMatthew Dillon * MOVED in this situation so the parent picks up the deletion. 60991abd410SMatthew Dillon * 610a7720be7SMatthew Dillon * Since this chain will now never be written to disk we need to 611a7720be7SMatthew Dillon * adjust bref.mirror_tid such that it does not prevent sub-chains 612a7720be7SMatthew Dillon * from clearing their MOVED bits. 613a7720be7SMatthew Dillon * 614a7720be7SMatthew Dillon * NOTE: scan2 has already executed above so statistics have 61591abd410SMatthew Dillon * already been rolled up. 616a7720be7SMatthew Dillon * 617a7720be7SMatthew Dillon * NOTE: Deletions do not prevent flush recursion as a deleted 618a7720be7SMatthew Dillon * inode (removed file) which is still open may still require 619a7720be7SMatthew Dillon * on-media storage to be able to clean related pages out from 620a7720be7SMatthew Dillon * the system caches. 621a7720be7SMatthew Dillon * 622a7720be7SMatthew Dillon * NOTE: Even though this chain will not issue write I/O, we must 623a7720be7SMatthew Dillon * still update chain->bref.mirror_tid for flush management 624a7720be7SMatthew Dillon * purposes. 62532b800e6SMatthew Dillon */ 626731b2a84SMatthew Dillon if (chain->delete_tid <= info->sync_tid) { 627a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 9; 628731b2a84SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 6291a7cfe5aSMatthew Dillon if (chain->bp) { 6301a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 631731b2a84SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 6321a7cfe5aSMatthew Dillon } 63393f3933aSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 63493f3933aSMatthew Dillon hammer2_chain_ref(chain); 63593f3933aSMatthew Dillon atomic_set_int(&chain->flags, 63693f3933aSMatthew Dillon HAMMER2_CHAIN_MOVED); 63793f3933aSMatthew Dillon } 638731b2a84SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 639a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 640a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 641731b2a84SMatthew Dillon hammer2_chain_drop(chain); 642731b2a84SMatthew Dillon } 643a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 644a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 645731b2a84SMatthew Dillon return; 646731b2a84SMatthew Dillon } 647731b2a84SMatthew Dillon #if 0 6480dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 649a864c5d9SMatthew Dillon (chain->flags & HAMMER2_CHAIN_DELETED) && 650a864c5d9SMatthew Dillon (trans->flags & HAMMER2_TRANS_RESTRICTED) == 0) { 651a864c5d9SMatthew Dillon /* 652a864c5d9SMatthew Dillon * Throw-away the MODIFIED flag 653a864c5d9SMatthew Dillon */ 6540dea3156SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 6551a7cfe5aSMatthew Dillon if (chain->bp) { 6561a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 6570dea3156SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 6581a7cfe5aSMatthew Dillon } 6590dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 6600dea3156SMatthew Dillon hammer2_chain_drop(chain); 6610dea3156SMatthew Dillon } 6620dea3156SMatthew Dillon return; 6630dea3156SMatthew Dillon } 664731b2a84SMatthew Dillon #endif 6650dea3156SMatthew Dillon 6660dea3156SMatthew Dillon /* 667ea155208SMatthew Dillon * A degenerate flush might not have flushed anything and thus not 668ea155208SMatthew Dillon * processed modified blocks on the way back up. Detect the case. 6690dea3156SMatthew Dillon * 6700dea3156SMatthew Dillon * Note that MOVED can be set without MODIFIED being set due to 6710dea3156SMatthew Dillon * a deletion, in which case it is handled by Scan2 later on. 6720dea3156SMatthew Dillon * 6730dea3156SMatthew Dillon * Both bits can be set along with DELETED due to a deletion if 6740dea3156SMatthew Dillon * modified data within the synchronization zone and the chain 6750dea3156SMatthew Dillon * was then deleted beyond the zone, in which case we still have 676ea155208SMatthew Dillon * to flush for synchronization point consistency. Otherwise though 677ea155208SMatthew Dillon * DELETED and MODIFIED are treated as separate flags. 6780dea3156SMatthew Dillon */ 679a7720be7SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 680a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 681a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 682a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 10; 6830dea3156SMatthew Dillon return; 684a7720be7SMatthew Dillon } 685a4dc31e0SMatthew Dillon chain->debug_reason = (chain->debug_reason & ~255) | 11; 6860dea3156SMatthew Dillon 6870dea3156SMatthew Dillon /* 6880dea3156SMatthew Dillon * Issue flush. 6890dea3156SMatthew Dillon * 6900dea3156SMatthew Dillon * A DESTROYED node that reaches this point must be flushed for 6910dea3156SMatthew Dillon * synchronization point consistency. 6920dea3156SMatthew Dillon */ 6930dea3156SMatthew Dillon 6940dea3156SMatthew Dillon /* 6950dea3156SMatthew Dillon * Update mirror_tid, clear MODIFIED, and set MOVED. 6960dea3156SMatthew Dillon * 6970dea3156SMatthew Dillon * The caller will update the parent's reference to this chain 6980dea3156SMatthew Dillon * by testing MOVED as long as the modification was in-bounds. 6990dea3156SMatthew Dillon * 7000dea3156SMatthew Dillon * MOVED is never set on the volume root as there is no parent 7010dea3156SMatthew Dillon * to adjust. 7020dea3156SMatthew Dillon */ 703a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 704a7720be7SMatthew Dillon kprintf("Flush %p.%d %016jx/%d sync_tid %016jx\n", 705a7720be7SMatthew Dillon chain, chain->bref.type, 706a7720be7SMatthew Dillon chain->bref.key, chain->bref.keybits, 707a7720be7SMatthew Dillon info->sync_tid); 708a7720be7SMatthew Dillon } 709a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 710a7720be7SMatthew Dillon Debugger("Flush hell"); 711a7720be7SMatthew Dillon } 7120dea3156SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 7130dea3156SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 7140dea3156SMatthew Dillon wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 7150dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 7160dea3156SMatthew Dillon if (chain == &hmp->vchain) 7170dea3156SMatthew Dillon kprintf("(FLUSHED VOLUME HEADER)\n"); 7181a7cfe5aSMatthew Dillon if (chain == &hmp->fchain) 7191a7cfe5aSMatthew Dillon kprintf("(FLUSHED FREEMAP HEADER)\n"); 7200dea3156SMatthew Dillon 7210dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) || 7221a7cfe5aSMatthew Dillon chain == &hmp->vchain || 7231a7cfe5aSMatthew Dillon chain == &hmp->fchain) { 72432b800e6SMatthew Dillon /* 72532b800e6SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared. 726d7bfb2cbSMatthew Dillon * Net is -0 or -1 ref depending. 72732b800e6SMatthew Dillon */ 72832b800e6SMatthew Dillon if (wasmodified) 7290dea3156SMatthew Dillon hammer2_chain_drop(chain); 73032b800e6SMatthew Dillon } else { 73132b800e6SMatthew Dillon /* 732d7bfb2cbSMatthew Dillon * Drop the ref from the MODIFIED bit we cleared and 733d7bfb2cbSMatthew Dillon * set a ref for the MOVED bit we are setting. Net 734d7bfb2cbSMatthew Dillon * is +0 or +1 ref depending. 73532b800e6SMatthew Dillon */ 73632b800e6SMatthew Dillon if (wasmodified == 0) 7370dea3156SMatthew Dillon hammer2_chain_ref(chain); 73832b800e6SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 73932b800e6SMatthew Dillon } 74032b800e6SMatthew Dillon 74132b800e6SMatthew Dillon /* 74232b800e6SMatthew Dillon * If this is part of a recursive flush we can go ahead and write 743ea155208SMatthew Dillon * out the buffer cache buffer and pass a new bref back up the chain 744ea155208SMatthew Dillon * via the MOVED bit. 74532b800e6SMatthew Dillon * 746ea155208SMatthew Dillon * Volume headers are NOT flushed here as they require special 747ea155208SMatthew Dillon * processing. 74832b800e6SMatthew Dillon */ 74932b800e6SMatthew Dillon switch(chain->bref.type) { 7501a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 7511a7cfe5aSMatthew Dillon hammer2_modify_volume(hmp); 752a4dc31e0SMatthew Dillon hmp->voldata.freemap_tid = chain->bref.mirror_tid; 7531a7cfe5aSMatthew Dillon break; 75432b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 75532b800e6SMatthew Dillon /* 7561a7cfe5aSMatthew Dillon * We should flush the free block table before we calculate 7571a7cfe5aSMatthew Dillon * CRCs and copy voldata -> volsync. 75893f3933aSMatthew Dillon * 75993f3933aSMatthew Dillon * To prevent SMP races, fchain must remain locked until 76093f3933aSMatthew Dillon * voldata is copied to volsync. 7611a7cfe5aSMatthew Dillon */ 7621a7cfe5aSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 763a7720be7SMatthew Dillon if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 764a4dc31e0SMatthew Dillon hmp->voldata.freemap_tid < hmp->fchain.core->update_tid) { 7651a7cfe5aSMatthew Dillon /* this will modify vchain as a side effect */ 766a7720be7SMatthew Dillon hammer2_chain_t *tmp = &hmp->fchain; 767a7720be7SMatthew Dillon hammer2_chain_flush(info->trans, &tmp); 768a7720be7SMatthew Dillon KKASSERT(tmp == &hmp->fchain); 7691a7cfe5aSMatthew Dillon } 770a4dc31e0SMatthew Dillon hmp->voldata.mirror_tid = chain->bref.mirror_tid; 7711a7cfe5aSMatthew Dillon 7721a7cfe5aSMatthew Dillon /* 77332b800e6SMatthew Dillon * The volume header is flushed manually by the syncer, not 774ea155208SMatthew Dillon * here. All we do is adjust the crc's. 77532b800e6SMatthew Dillon */ 77632b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 77732b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 77832b800e6SMatthew Dillon kprintf("volume header mirror_tid %jd\n", 77932b800e6SMatthew Dillon hmp->voldata.mirror_tid); 78032b800e6SMatthew Dillon 78132b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 78232b800e6SMatthew Dillon hammer2_icrc32( 78332b800e6SMatthew Dillon (char *)&hmp->voldata + 78432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 78532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 78632b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 78732b800e6SMatthew Dillon hammer2_icrc32( 78832b800e6SMatthew Dillon (char *)&hmp->voldata + 78932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 79032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 79132b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 79232b800e6SMatthew Dillon hammer2_icrc32( 79332b800e6SMatthew Dillon (char *)&hmp->voldata + 79432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 79532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 79632b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 7970dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 79893f3933aSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 79932b800e6SMatthew Dillon break; 80032b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 80132b800e6SMatthew Dillon /* 80232b800e6SMatthew Dillon * Data elements have already been flushed via the logical 80332b800e6SMatthew Dillon * file buffer cache. Their hash was set in the bref by 80432b800e6SMatthew Dillon * the vop_write code. 80532b800e6SMatthew Dillon * 806ea155208SMatthew Dillon * Make sure any device buffer(s) have been flushed out here. 807ea155208SMatthew Dillon * (there aren't usually any to flush). 80832b800e6SMatthew Dillon */ 809a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 810a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 811a98aa0b0SMatthew Dillon pbase = chain->bref.data_off & ~pmask; 812a98aa0b0SMatthew Dillon boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 81332b800e6SMatthew Dillon 814a98aa0b0SMatthew Dillon bp = getblk(hmp->devvp, pbase, psize, GETBLK_NOWAIT, 0); 81532b800e6SMatthew Dillon if (bp) { 81632b800e6SMatthew Dillon if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 81732b800e6SMatthew Dillon (B_CACHE | B_DIRTY)) { 81832b800e6SMatthew Dillon cluster_awrite(bp); 81932b800e6SMatthew Dillon } else { 82032b800e6SMatthew Dillon bp->b_flags |= B_RELBUF; 82132b800e6SMatthew Dillon brelse(bp); 82232b800e6SMatthew Dillon } 82332b800e6SMatthew Dillon } 82432b800e6SMatthew Dillon break; 825512beabdSMatthew Dillon #if 0 82632b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 82732b800e6SMatthew Dillon /* 82832b800e6SMatthew Dillon * Indirect blocks may be in an INITIAL state. Use the 82932b800e6SMatthew Dillon * chain_lock() call to ensure that the buffer has been 83032b800e6SMatthew Dillon * instantiated (even though it is already locked the buffer 83132b800e6SMatthew Dillon * might not have been instantiated). 83232b800e6SMatthew Dillon * 83332b800e6SMatthew Dillon * Only write the buffer out if it is dirty, it is possible 83432b800e6SMatthew Dillon * the operating system had already written out the buffer. 83532b800e6SMatthew Dillon */ 8360dea3156SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 83732b800e6SMatthew Dillon KKASSERT(chain->bp != NULL); 83832b800e6SMatthew Dillon 83932b800e6SMatthew Dillon bp = chain->bp; 84032b800e6SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 84132b800e6SMatthew Dillon (bp->b_flags & B_DIRTY)) { 84232b800e6SMatthew Dillon bdwrite(chain->bp); 84332b800e6SMatthew Dillon } else { 84432b800e6SMatthew Dillon brelse(chain->bp); 84532b800e6SMatthew Dillon } 84632b800e6SMatthew Dillon chain->bp = NULL; 84732b800e6SMatthew Dillon chain->data = NULL; 8480dea3156SMatthew Dillon hammer2_chain_unlock(chain); 84932b800e6SMatthew Dillon break; 850512beabdSMatthew Dillon #endif 851512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 8521a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 8531a7cfe5aSMatthew Dillon /* 8541a7cfe5aSMatthew Dillon * Device-backed. Buffer will be flushed by the sync 8551a7cfe5aSMatthew Dillon * code XXX. 8561a7cfe5aSMatthew Dillon */ 857512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 8581a7cfe5aSMatthew Dillon break; 859512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 86032b800e6SMatthew Dillon default: 86132b800e6SMatthew Dillon /* 86232b800e6SMatthew Dillon * Embedded elements have to be flushed out. 8631a7cfe5aSMatthew Dillon * (Basically just BREF_TYPE_INODE). 86432b800e6SMatthew Dillon */ 865512beabdSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 86632b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 86732b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 86832b800e6SMatthew Dillon bref = &chain->bref; 86932b800e6SMatthew Dillon 87032b800e6SMatthew Dillon KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 8719061bde5SMatthew Dillon KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 872512beabdSMatthew Dillon HAMMER2_CHECK_ISCSI32 || 873512beabdSMatthew Dillon HAMMER2_DEC_CHECK(chain->bref.methods) == 874512beabdSMatthew Dillon HAMMER2_CHECK_FREEMAP); 87532b800e6SMatthew Dillon 87632b800e6SMatthew Dillon /* 87732b800e6SMatthew Dillon * The data is embedded, we have to acquire the 87832b800e6SMatthew Dillon * buffer cache buffer and copy the data into it. 87932b800e6SMatthew Dillon */ 880a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 881a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 882a98aa0b0SMatthew Dillon pbase = bref->data_off & ~pmask; 883a98aa0b0SMatthew Dillon boff = bref->data_off & (HAMMER2_OFF_MASK & pmask); 88432b800e6SMatthew Dillon 88532b800e6SMatthew Dillon /* 88632b800e6SMatthew Dillon * The getblk() optimization can only be used if the 88732b800e6SMatthew Dillon * physical block size matches the request. 88832b800e6SMatthew Dillon */ 889a98aa0b0SMatthew Dillon error = bread(hmp->devvp, pbase, psize, &bp); 89032b800e6SMatthew Dillon KKASSERT(error == 0); 891a98aa0b0SMatthew Dillon 89232b800e6SMatthew Dillon bdata = (char *)bp->b_data + boff; 89332b800e6SMatthew Dillon 89432b800e6SMatthew Dillon /* 89532b800e6SMatthew Dillon * Copy the data to the buffer, mark the buffer 89632b800e6SMatthew Dillon * dirty, and convert the chain to unmodified. 89732b800e6SMatthew Dillon */ 89832b800e6SMatthew Dillon bcopy(chain->data, bdata, chain->bytes); 89932b800e6SMatthew Dillon bp->b_flags |= B_CLUSTEROK; 90032b800e6SMatthew Dillon bdwrite(bp); 90132b800e6SMatthew Dillon bp = NULL; 902a98aa0b0SMatthew Dillon 903512beabdSMatthew Dillon switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 904512beabdSMatthew Dillon case HAMMER2_CHECK_FREEMAP: 905512beabdSMatthew Dillon chain->bref.check.freemap.icrc32 = 906512beabdSMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 907512beabdSMatthew Dillon break; 908512beabdSMatthew Dillon case HAMMER2_CHECK_ISCSI32: 90932b800e6SMatthew Dillon chain->bref.check.iscsi32.value = 91032b800e6SMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 911512beabdSMatthew Dillon break; 912512beabdSMatthew Dillon default: 913512beabdSMatthew Dillon panic("hammer2_flush_core: bad crc type"); 914512beabdSMatthew Dillon break; /* NOT REACHED */ 915512beabdSMatthew Dillon } 91632b800e6SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 91732b800e6SMatthew Dillon ++hammer2_iod_meta_write; 91832b800e6SMatthew Dillon else 91932b800e6SMatthew Dillon ++hammer2_iod_indr_write; 92032b800e6SMatthew Dillon } 92132b800e6SMatthew Dillon } 92232b800e6SMatthew Dillon 92332b800e6SMatthew Dillon /* 9240dea3156SMatthew Dillon * Flush helper scan1 (recursive) 9250dea3156SMatthew Dillon * 9260dea3156SMatthew Dillon * Flushes the children of the caller's chain (parent) and updates 927ea155208SMatthew Dillon * the blockref, restricted by sync_tid. 9280dea3156SMatthew Dillon * 9290dea3156SMatthew Dillon * Ripouts during the loop should not cause any problems. Because we are 9300dea3156SMatthew Dillon * flushing to a synchronization point, modification races will occur after 9310dea3156SMatthew Dillon * sync_tid and do not have to be flushed anyway. 932ea155208SMatthew Dillon * 933ea155208SMatthew Dillon * It is also ok if the parent is chain_duplicate()'d while unlocked because 934ea155208SMatthew Dillon * the delete/duplication will install a delete_tid that is still larger than 935ea155208SMatthew Dillon * our current sync_tid. 93632b800e6SMatthew Dillon */ 9370dea3156SMatthew Dillon static int 9380dea3156SMatthew Dillon hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 93932b800e6SMatthew Dillon { 9400dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 941cd189b1eSMatthew Dillon hammer2_trans_t *trans = info->trans; 9420dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 9430dea3156SMatthew Dillon int diddeferral; 94432b800e6SMatthew Dillon 9450dea3156SMatthew Dillon /* 946a4dc31e0SMatthew Dillon * Child is beyond the flush synchronization zone, don't persue. 947a4dc31e0SMatthew Dillon * Remember that modifications generally delete-duplicate so if the 948a4dc31e0SMatthew Dillon * sub-tree is dirty another child will get us there. But not this 949a4dc31e0SMatthew Dillon * one. 950a4dc31e0SMatthew Dillon * 951a4dc31e0SMatthew Dillon * Or MODIFIED is not set and child is already fully synchronized 952a4dc31e0SMatthew Dillon * with its sub-tree. Don't persue. 9530dea3156SMatthew Dillon */ 954a4dc31e0SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 955a4dc31e0SMatthew Dillon KKASSERT(child->delete_tid >= child->modify_tid); 956a4dc31e0SMatthew Dillon child->debug_reason = (child->debug_reason & ~255) | 1; 9570dea3156SMatthew Dillon return (0); 95832b800e6SMatthew Dillon } 959cd189b1eSMatthew Dillon 960a4dc31e0SMatthew Dillon /* 961a4dc31e0SMatthew Dillon * We must ref the child before unlocking the spinlock. 962a4dc31e0SMatthew Dillon * 963a4dc31e0SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 964a4dc31e0SMatthew Dillon * unlock it in order to lock the child. 965a4dc31e0SMatthew Dillon */ 966ea155208SMatthew Dillon hammer2_chain_ref(child); 9670dea3156SMatthew Dillon spin_unlock(&parent->core->cst.spin); 9680dea3156SMatthew Dillon 9690dea3156SMatthew Dillon hammer2_chain_unlock(parent); 9700dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 9710dea3156SMatthew Dillon 972a7720be7SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 973a4dc31e0SMatthew Dillon (child->bref.mirror_tid >= child->core->update_tid || 974a4dc31e0SMatthew Dillon child->bref.mirror_tid >= info->sync_tid)) { 975a4dc31e0SMatthew Dillon child->debug_reason = (child->debug_reason & ~255) | 2; 976a4dc31e0SMatthew Dillon goto skip; 977a4dc31e0SMatthew Dillon } 978a4dc31e0SMatthew Dillon 979a4dc31e0SMatthew Dillon /* 980a4dc31e0SMatthew Dillon * Re-check the flags before continuing. 981a4dc31e0SMatthew Dillon */ 982a4dc31e0SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 983a4dc31e0SMatthew Dillon child->debug_reason = (child->debug_reason & ~255) | 3; 9840dea3156SMatthew Dillon hammer2_chain_unlock(child); 985ea155208SMatthew Dillon hammer2_chain_drop(child); 986ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 9870dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 9880dea3156SMatthew Dillon return (0); 9890dea3156SMatthew Dillon } 990a4dc31e0SMatthew Dillon 991a4dc31e0SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 992a4dc31e0SMatthew Dillon (child->bref.mirror_tid >= child->core->update_tid || 993a4dc31e0SMatthew Dillon child->bref.mirror_tid >= info->sync_tid)) { 994a4dc31e0SMatthew Dillon child->debug_reason = (child->debug_reason & ~255) | 4; 995a4dc31e0SMatthew Dillon goto skip; 996cd189b1eSMatthew Dillon } 9970dea3156SMatthew Dillon 9980dea3156SMatthew Dillon /* 999ea155208SMatthew Dillon * The DESTROYED flag can only be initially set on an unreferenced 1000ea155208SMatthew Dillon * deleted inode and will propagate downward via the mechanic below. 1001ea155208SMatthew Dillon * Such inode chains have been deleted for good and should no longer 1002ea155208SMatthew Dillon * be subject to delete/duplication. 10030dea3156SMatthew Dillon * 10040dea3156SMatthew Dillon * This optimization allows the inode reclaim (destroy unlinked file 10050dea3156SMatthew Dillon * on vnode reclamation after last close) to be flagged by just 1006ea155208SMatthew Dillon * setting HAMMER2_CHAIN_DESTROYED at the top level and then will 1007ea155208SMatthew Dillon * cause the chains to be terminated and related buffers to be 1008ea155208SMatthew Dillon * invalidated and not flushed out. 1009ea155208SMatthew Dillon * 1010ea155208SMatthew Dillon * We have to be careful not to propagate the DESTROYED flag if 1011ea155208SMatthew Dillon * the destruction occurred after our flush sync_tid. 10120dea3156SMatthew Dillon */ 10130dea3156SMatthew Dillon if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 10149797e933SMatthew Dillon (child->flags & HAMMER2_CHAIN_DELETED) && 10150dea3156SMatthew Dillon (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 1016a7720be7SMatthew Dillon atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROYED); 1017a7720be7SMatthew Dillon /* 1018a7720be7SMatthew Dillon * Force downward recursion by bringing update_tid up to 1019a7720be7SMatthew Dillon * at least sync_tid. Parent's mirror_tid has not yet 1020a7720be7SMatthew Dillon * been updated. 1021a7720be7SMatthew Dillon * 1022a4dc31e0SMatthew Dillon * Vnode reclamation may have forced update_tid to MAX_TID 1023a4dc31e0SMatthew Dillon * (we do this because there was no transaction at the time). 1024a7720be7SMatthew Dillon * In this situation bring it down to something reasonable 1025a7720be7SMatthew Dillon * so the elements being destroyed can be retired. 1026a7720be7SMatthew Dillon */ 1027a7720be7SMatthew Dillon spin_lock(&child->core->cst.spin); 1028a7720be7SMatthew Dillon if (child->core->update_tid < trans->sync_tid || 1029a7720be7SMatthew Dillon child->core->update_tid == HAMMER2_MAX_TID) { 1030a7720be7SMatthew Dillon child->core->update_tid = trans->sync_tid; 1031a7720be7SMatthew Dillon } 1032a7720be7SMatthew Dillon spin_unlock(&child->core->cst.spin); 10330dea3156SMatthew Dillon } 10340dea3156SMatthew Dillon 10350dea3156SMatthew Dillon /* 10360dea3156SMatthew Dillon * Recurse and collect deferral data. 10370dea3156SMatthew Dillon */ 10380dea3156SMatthew Dillon diddeferral = info->diddeferral; 10390dea3156SMatthew Dillon ++info->depth; 1040a7720be7SMatthew Dillon hammer2_chain_flush_core(info, &child); 1041a4dc31e0SMatthew Dillon 1042a7720be7SMatthew Dillon /* 1043a7720be7SMatthew Dillon * NOTE: If child failed to fully synchronize, child's bref.mirror_tid 1044a7720be7SMatthew Dillon * will not have been updated. Bumping diddeferral prevents 1045a7720be7SMatthew Dillon * the parent chain from updating bref.mirror_tid on the way 1046a7720be7SMatthew Dillon * back up in order to force a retry later. 1047a7720be7SMatthew Dillon */ 1048a4dc31e0SMatthew Dillon if (child->bref.mirror_tid < child->core->update_tid && 1049a4dc31e0SMatthew Dillon child->bref.mirror_tid < info->sync_tid) { 1050a7720be7SMatthew Dillon ++diddeferral; 1051a4dc31e0SMatthew Dillon } 1052a7720be7SMatthew Dillon 10530dea3156SMatthew Dillon --info->depth; 10540dea3156SMatthew Dillon info->diddeferral += diddeferral; 10550dea3156SMatthew Dillon 1056a4dc31e0SMatthew Dillon skip: 1057a4dc31e0SMatthew Dillon /* 1058a4dc31e0SMatthew Dillon * Check the conditions that could cause SCAN2 to modify the parent. 1059a4dc31e0SMatthew Dillon * Modify the parent here instead of in SCAN2, which would cause 1060a4dc31e0SMatthew Dillon * rollup chicken-and-egg races. 1061a4dc31e0SMatthew Dillon */ 1062a4dc31e0SMatthew Dillon if (child->delete_tid <= trans->sync_tid && 1063a4dc31e0SMatthew Dillon child->delete_tid > parent->bref.mirror_tid && 1064a4dc31e0SMatthew Dillon child->modify_tid <= parent->bref.mirror_tid) { 1065a4dc31e0SMatthew Dillon info->domodify = 1; 1066a4dc31e0SMatthew Dillon } else if (child->delete_tid > trans->sync_tid && 1067a4dc31e0SMatthew Dillon child->modify_tid > parent->bref.mirror_tid) { 1068a4dc31e0SMatthew Dillon info->domodify = 1; 1069a4dc31e0SMatthew Dillon } 1070a4dc31e0SMatthew Dillon 1071a4dc31e0SMatthew Dillon /* 1072a4dc31e0SMatthew Dillon * Relock to continue the loop 1073a4dc31e0SMatthew Dillon */ 1074a4dc31e0SMatthew Dillon hammer2_chain_unlock(child); 1075ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1076a4dc31e0SMatthew Dillon hammer2_chain_drop(child); 1077a4dc31e0SMatthew Dillon KKASSERT(info->parent == parent); 10780dea3156SMatthew Dillon 10790dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 10800dea3156SMatthew Dillon return (0); 10810dea3156SMatthew Dillon } 10820dea3156SMatthew Dillon 10830dea3156SMatthew Dillon /* 10840dea3156SMatthew Dillon * Flush helper scan2 (non-recursive) 10850dea3156SMatthew Dillon * 10860dea3156SMatthew Dillon * This pass on a chain's children propagates any MOVED or DELETED 1087ea155208SMatthew Dillon * elements back up the chain towards the root after those elements have 1088ea155208SMatthew Dillon * been fully flushed. Unlike scan1, this function is NOT recursive and 1089ea155208SMatthew Dillon * the parent remains locked across the entire scan. 10900dea3156SMatthew Dillon * 10911897c66eSMatthew Dillon * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. 10921897c66eSMatthew Dillon * We have to do this so base[] elements can be deleted in pass 1 to make 10931897c66eSMatthew Dillon * room for adding new elements in pass 2. 10941897c66eSMatthew Dillon * 109591abd410SMatthew Dillon * This function also rolls up storage statistics. 109691abd410SMatthew Dillon * 10971897c66eSMatthew Dillon * NOTE! A deletion is a visbility issue, there can still be references to 1098ea155208SMatthew Dillon * deleted elements (for example, to an unlinked file which is still 1099ea155208SMatthew Dillon * open), and there can also be multiple chains pointing to the same 1100ea155208SMatthew Dillon * bref where some are deleted and some are not (for example due to 1101ea155208SMatthew Dillon * a rename). So a chain marked for deletion is basically considered 1102a864c5d9SMatthew Dillon * to be live until it is explicitly destroyed or until its ref-count 1103a864c5d9SMatthew Dillon * reaches zero (also implying that MOVED and MODIFIED are clear). 1104a4dc31e0SMatthew Dillon * 1105a4dc31e0SMatthew Dillon * NOTE! Info->parent will be locked but will only be instantiated/modified 1106a4dc31e0SMatthew Dillon * if it is either MODIFIED or if scan1 determined that block table 1107a4dc31e0SMatthew Dillon * updates will occur. 11080dea3156SMatthew Dillon */ 11090dea3156SMatthew Dillon static int 11100dea3156SMatthew Dillon hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 11110dea3156SMatthew Dillon { 11120dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 11130dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 1114731b2a84SMatthew Dillon hammer2_chain_core_t *above = child->above; 1115a5913bdfSMatthew Dillon hammer2_mount_t *hmp = child->hmp; 1116a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 11170dea3156SMatthew Dillon hammer2_blockref_t *base; 11180dea3156SMatthew Dillon int count; 11191897c66eSMatthew Dillon int ok; 1120ea155208SMatthew Dillon 1121a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1122a4dc31e0SMatthew Dillon kprintf("SCAN2 %p.%d %08x mod=%016jx del=%016jx trans=%016jx\n", child, child->bref.type, child->flags, child->modify_tid, child->delete_tid, info->trans->sync_tid); 1123a4dc31e0SMatthew Dillon #endif 1124ea155208SMatthew Dillon /* 11259797e933SMatthew Dillon * Inodes with stale children that have been converted to DIRECTDATA 11269797e933SMatthew Dillon * mode (file extension or hardlink conversion typically) need to 11279797e933SMatthew Dillon * skipped right now before we start messing with a non-existant 11289797e933SMatthew Dillon * block table. 11299797e933SMatthew Dillon */ 113009dd2dfeSMatthew Dillon #if 0 11319797e933SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 11329797e933SMatthew Dillon (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { 11339797e933SMatthew Dillon goto finalize; 11349797e933SMatthew Dillon } 113509dd2dfeSMatthew Dillon #endif 11369797e933SMatthew Dillon 11379797e933SMatthew Dillon /* 1138731b2a84SMatthew Dillon * Ignore children created after our flush point, treating them as 1139cd189b1eSMatthew Dillon * if they did not exist). These children will not cause the parent 1140cd189b1eSMatthew Dillon * to be updated. 1141731b2a84SMatthew Dillon * 1142a7720be7SMatthew Dillon * Children deleted after our flush point are treated as having been 1143a7720be7SMatthew Dillon * created for the purposes of the flush. The parent's update_tid 1144a7720be7SMatthew Dillon * will already be higher than our trans->sync_tid so the flush path 1145a7720be7SMatthew Dillon * is left intact. 1146a7720be7SMatthew Dillon * 1147cd189b1eSMatthew Dillon * When we encounter such children and the parent chain has not been 1148cd189b1eSMatthew Dillon * deleted, delete/duplicated, or delete/duplicated-for-move, then 1149cd189b1eSMatthew Dillon * the parent may be used to funnel through several flush points. 1150a7720be7SMatthew Dillon * These chains will still be visible to later flushes due to having 1151a7720be7SMatthew Dillon * a higher update_tid than we can set in the current flush. 1152731b2a84SMatthew Dillon */ 1153731b2a84SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1154a4dc31e0SMatthew Dillon KKASSERT(child->delete_tid >= child->modify_tid); 1155731b2a84SMatthew Dillon goto finalize; 1156731b2a84SMatthew Dillon } 1157731b2a84SMatthew Dillon 1158731b2a84SMatthew Dillon /* 1159731b2a84SMatthew Dillon * Ignore children which have not changed. The parent's block table 1160731b2a84SMatthew Dillon * is already correct. 11611897c66eSMatthew Dillon * 11621897c66eSMatthew Dillon * XXX The MOVED bit is only cleared when all multi-homed parents 11631897c66eSMatthew Dillon * have flushed, creating a situation where a re-flush can occur 11641897c66eSMatthew Dillon * via a parent which has already flushed. The hammer2_base_*() 11651897c66eSMatthew Dillon * functions currently have a hack to deal with this case but 11661897c66eSMatthew Dillon * we need something better. 1167ea155208SMatthew Dillon */ 1168ea155208SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 1169a4dc31e0SMatthew Dillon KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); 11700dea3156SMatthew Dillon goto finalize; 11710dea3156SMatthew Dillon } 1172ea155208SMatthew Dillon 11731897c66eSMatthew Dillon /* 11741897c66eSMatthew Dillon * Make sure child is referenced before we unlock. 11751897c66eSMatthew Dillon */ 1176ea155208SMatthew Dillon hammer2_chain_ref(child); 1177731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 11780dea3156SMatthew Dillon 11790dea3156SMatthew Dillon /* 11801897c66eSMatthew Dillon * Parent reflushed after the child has passed them by should skip 11811897c66eSMatthew Dillon * due to the modify_tid test. XXX 11820dea3156SMatthew Dillon */ 11830dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 11841897c66eSMatthew Dillon KKASSERT(child->above == above); 11851897c66eSMatthew Dillon KKASSERT(parent->core == above); 11860dea3156SMatthew Dillon 11870dea3156SMatthew Dillon /* 11880dea3156SMatthew Dillon * The parent's blockref to the child must be deleted or updated. 11890dea3156SMatthew Dillon * 11900dea3156SMatthew Dillon * This point is not reached on successful DESTROYED optimizations 1191a864c5d9SMatthew Dillon * but can be reached on recursive deletions and restricted flushes. 1192ea155208SMatthew Dillon * 1193a7720be7SMatthew Dillon * The chain_modify here may delete-duplicate the block. This can 1194a7720be7SMatthew Dillon * cause a multitude of issues if the block was already modified 1195a7720be7SMatthew Dillon * by a later (post-flush) transaction. Primarily blockrefs in 1196a7720be7SMatthew Dillon * the later block can be out-of-date, so if the situation occurs 1197a7720be7SMatthew Dillon * we can't throw away the MOVED bit on the current blocks until 1198a7720be7SMatthew Dillon * the later blocks are flushed (so as to be able to regenerate all 1199a7720be7SMatthew Dillon * the changes that were made). 1200a7720be7SMatthew Dillon * 12014a59bd3eSMatthew Dillon * Because flushes are ordered we do not have to make a 12024a59bd3eSMatthew Dillon * modify/duplicate of indirect blocks. That is, the flush 12034a59bd3eSMatthew Dillon * code does not have to kmalloc or duplicate anything. We 12044a59bd3eSMatthew Dillon * can adjust the indirect block table in-place and reuse the 12054a59bd3eSMatthew Dillon * chain. It IS possible that the chain has already been duplicated 12064a59bd3eSMatthew Dillon * or may wind up being duplicated on-the-fly by modifying code 12074a59bd3eSMatthew Dillon * on the frontend. We simply use the original and ignore such 12084a59bd3eSMatthew Dillon * chains. However, it does mean we can't clear the MOVED bit. 12094a59bd3eSMatthew Dillon * 1210ea155208SMatthew Dillon * XXX recursive deletions not optimized. 12110dea3156SMatthew Dillon */ 12120dea3156SMatthew Dillon 12130dea3156SMatthew Dillon switch(parent->bref.type) { 12140dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 1215ea155208SMatthew Dillon /* 1216ea155208SMatthew Dillon * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we 1217ea155208SMatthew Dillon * properly duplicate the inode headers and do proper flush 1218ea155208SMatthew Dillon * range checks (all the children should be beyond the flush 1219ea155208SMatthew Dillon * point). For now just don't sync the non-applicable 1220ea155208SMatthew Dillon * children. 1221ea155208SMatthew Dillon * 1222ea155208SMatthew Dillon * XXX Can also occur due to hardlink consolidation. We 1223ea155208SMatthew Dillon * set OPFLAG_DIRECTDATA to prevent the indirect and data 1224ea155208SMatthew Dillon * blocks from syncing ot the hardlink pointer. 1225ea155208SMatthew Dillon */ 1226a4dc31e0SMatthew Dillon if (parent->data) 12270dea3156SMatthew Dillon base = &parent->data->ipdata.u.blockset.blockref[0]; 1228a4dc31e0SMatthew Dillon else 1229a4dc31e0SMatthew Dillon base = NULL; 12300dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 12310dea3156SMatthew Dillon break; 12320dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 12331a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1234a4dc31e0SMatthew Dillon if (parent->data) 123593f3933aSMatthew Dillon base = &parent->data->npdata[0]; 1236a4dc31e0SMatthew Dillon else 12370dea3156SMatthew Dillon base = NULL; 12380dea3156SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 12390dea3156SMatthew Dillon break; 12400dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 12410dea3156SMatthew Dillon base = &hmp->voldata.sroot_blockset.blockref[0]; 12420dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 12430dea3156SMatthew Dillon break; 12441a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 124593f3933aSMatthew Dillon base = &parent->data->npdata[0]; 12461a7cfe5aSMatthew Dillon count = HAMMER2_SET_COUNT; 12471a7cfe5aSMatthew Dillon break; 12480dea3156SMatthew Dillon default: 12490dea3156SMatthew Dillon base = NULL; 12500dea3156SMatthew Dillon count = 0; 12511897c66eSMatthew Dillon panic("hammer2_chain_flush_scan2: " 12520dea3156SMatthew Dillon "unrecognized blockref type: %d", 12530dea3156SMatthew Dillon parent->bref.type); 12540dea3156SMatthew Dillon } 12550dea3156SMatthew Dillon 12560dea3156SMatthew Dillon /* 12571897c66eSMatthew Dillon * Don't bother updating a deleted parent's blockrefs (caller will 12581897c66eSMatthew Dillon * optimize-out the disk write). Note that this is not optional, 12591897c66eSMatthew Dillon * a deleted parent's blockref array might not be synchronized at 12601897c66eSMatthew Dillon * all so calling hammer2_base*() functions could result in a panic. 12611897c66eSMatthew Dillon * 12621897c66eSMatthew Dillon * Otherwise, we need to be COUNTEDBREFS synchronized for the 12631897c66eSMatthew Dillon * hammer2_base_*() functions. 12641897c66eSMatthew Dillon */ 1265a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1266a4dc31e0SMatthew Dillon kprintf("SCAN2 base=%p pass=%d PARENT %p.%d DTID=%016jx SYNC=%016jx\n", 1267a4dc31e0SMatthew Dillon base, 1268a4dc31e0SMatthew Dillon info->pass, parent, parent->bref.type, parent->delete_tid, trans->sync_tid); 1269a4dc31e0SMatthew Dillon #endif 12701897c66eSMatthew Dillon if (parent->delete_tid <= trans->sync_tid) 12711897c66eSMatthew Dillon base = NULL; 1272c057466cSMatthew Dillon else if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 127351a0d27cSMatthew Dillon hammer2_chain_countbrefs(parent, base, count); 12741897c66eSMatthew Dillon 12751897c66eSMatthew Dillon /* 12760dea3156SMatthew Dillon * Update the parent's blockref table and propagate mirror_tid. 1277d5fabb70SMatthew Dillon * 1278731b2a84SMatthew Dillon * NOTE! Children with modify_tid's beyond our flush point are 1279731b2a84SMatthew Dillon * considered to not exist for the purposes of updating the 1280731b2a84SMatthew Dillon * parent's blockref array. 1281d5fabb70SMatthew Dillon * 1282731b2a84SMatthew Dillon * NOTE! Updates to a parent's blockref table do not adjust the 1283731b2a84SMatthew Dillon * parent's bref.modify_tid, only its bref.mirror_tid. 1284a7720be7SMatthew Dillon * 1285a4dc31e0SMatthew Dillon * SCAN1 has already put the parent in a modified state 1286a4dc31e0SMatthew Dillon * so if it isn't we panic. 1287a4dc31e0SMatthew Dillon * 1288a7720be7SMatthew Dillon * NOTE! chain->modify_tid vs chain->bref.modify_tid. The chain's 1289a7720be7SMatthew Dillon * internal modify_tid is always updated based on creation 1290a7720be7SMatthew Dillon * or delete-duplicate. However, the bref.modify_tid is NOT 1291a7720be7SMatthew Dillon * updated due to simple blockref updates. 12920dea3156SMatthew Dillon */ 1293a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1294a7720be7SMatthew Dillon kprintf("chain %p->%p pass %d trans %016jx sync %p.%d %016jx/%d C=%016jx D=%016jx PMIRROR %016jx\n", 1295a7720be7SMatthew Dillon parent, child, 1296a7720be7SMatthew Dillon info->pass, trans->sync_tid, 1297a7720be7SMatthew Dillon child, child->bref.type, 1298a7720be7SMatthew Dillon child->bref.key, child->bref.keybits, 1299a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, parent->bref.mirror_tid); 1300a7720be7SMatthew Dillon #endif 1301a7720be7SMatthew Dillon 13021897c66eSMatthew Dillon if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { 13031897c66eSMatthew Dillon /* 1304a7720be7SMatthew Dillon * Deleting. The block array is expected to contain the 1305a7720be7SMatthew Dillon * child's entry if: 1306a7720be7SMatthew Dillon * 1307a7720be7SMatthew Dillon * (1) The deletion occurred after the parent's block table 1308a7720be7SMatthew Dillon * was last synchronized (delete_tid), and 1309a7720be7SMatthew Dillon * 1310a7720be7SMatthew Dillon * (2) The creation occurred before or during the parent's 1311a7720be7SMatthew Dillon * last block table synchronization. 13121897c66eSMatthew Dillon */ 1313a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1314a4dc31e0SMatthew Dillon kprintf("S2A %p b=%p d/b=%016jx/%016jx m/b=%016jx/%016jx\n", 1315a4dc31e0SMatthew Dillon child, base, child->delete_tid, parent->bref.mirror_tid, 1316a4dc31e0SMatthew Dillon child->modify_tid, parent->bref.mirror_tid); 1317a4dc31e0SMatthew Dillon #endif 13181897c66eSMatthew Dillon ok = 1; 1319a7720be7SMatthew Dillon if (base && 1320a7720be7SMatthew Dillon child->delete_tid > parent->bref.mirror_tid && 1321a7720be7SMatthew Dillon child->modify_tid <= parent->bref.mirror_tid) { 1322a4dc31e0SMatthew Dillon KKASSERT(parent->modify_tid == trans->sync_tid); 132391abd410SMatthew Dillon hammer2_rollup_stats(parent, child, -1); 13241897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1325a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1326a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1327a7720be7SMatthew Dillon "flg=%08x %016jx/%d delete\n", 1328a7720be7SMatthew Dillon trans->sync_tid, 1329a7720be7SMatthew Dillon parent, parent->bref.type, 1330a7720be7SMatthew Dillon child, child->bref.type, 1331a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1332a7720be7SMatthew Dillon child->flags, 1333a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1334a7720be7SMatthew Dillon #endif 133551a0d27cSMatthew Dillon hammer2_base_delete(parent, base, count, 1336a7720be7SMatthew Dillon &info->cache_index, child); 13371897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 133893f3933aSMatthew Dillon } 1339ea155208SMatthew Dillon if (info->mirror_tid < child->delete_tid) 1340ea155208SMatthew Dillon info->mirror_tid = child->delete_tid; 13411897c66eSMatthew Dillon } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { 13421897c66eSMatthew Dillon /* 1343a7720be7SMatthew Dillon * Inserting. The block array is expected to NOT contain 1344a7720be7SMatthew Dillon * the child's entry if: 1345a7720be7SMatthew Dillon * 1346a7720be7SMatthew Dillon * (1) The creation occurred after the parent's block table 1347a7720be7SMatthew Dillon * was last synchronized (modify_tid), and 1348a7720be7SMatthew Dillon * 1349a7720be7SMatthew Dillon * (2) The child is not being deleted in the same 1350a7720be7SMatthew Dillon * transaction. 13511897c66eSMatthew Dillon */ 13521897c66eSMatthew Dillon ok = 1; 1353a7720be7SMatthew Dillon if (base && 1354a4dc31e0SMatthew Dillon child->modify_tid > parent->bref.mirror_tid) { 1355a4dc31e0SMatthew Dillon KKASSERT(parent->modify_tid == trans->sync_tid); 13561897c66eSMatthew Dillon hammer2_rollup_stats(parent, child, 1); 13571897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1358a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1359a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1360a7720be7SMatthew Dillon "flg=%08x %016jx/%d insert\n", 1361a7720be7SMatthew Dillon trans->sync_tid, 1362a7720be7SMatthew Dillon parent, parent->bref.type, 1363a7720be7SMatthew Dillon child, child->bref.type, 1364a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1365a7720be7SMatthew Dillon child->flags, 1366a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1367a7720be7SMatthew Dillon #endif 136851a0d27cSMatthew Dillon hammer2_base_insert(parent, base, count, 1369a7720be7SMatthew Dillon &info->cache_index, child); 13701897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 137193f3933aSMatthew Dillon } 1372ea155208SMatthew Dillon if (info->mirror_tid < child->modify_tid) 1373ea155208SMatthew Dillon info->mirror_tid = child->modify_tid; 13741897c66eSMatthew Dillon } else { 13751897c66eSMatthew Dillon ok = 0; 13760dea3156SMatthew Dillon } 13770dea3156SMatthew Dillon 1378ea155208SMatthew Dillon if (info->mirror_tid < child->bref.mirror_tid) { 1379a4dc31e0SMatthew Dillon KKASSERT(child->bref.mirror_tid <= trans->sync_tid); 1380ea155208SMatthew Dillon info->mirror_tid = child->bref.mirror_tid; 13810dea3156SMatthew Dillon } 13820dea3156SMatthew Dillon 13830dea3156SMatthew Dillon /* 13841897c66eSMatthew Dillon * Only clear MOVED once all possible parents have been flushed. 13851897c66eSMatthew Dillon * 1386731b2a84SMatthew Dillon * When can we safely clear the MOVED flag? Flushes down duplicate 1387731b2a84SMatthew Dillon * paths can occur out of order, for example if an inode is moved 1388731b2a84SMatthew Dillon * as part of a hardlink consolidation or if an inode is moved into 1389731b2a84SMatthew Dillon * an indirect block indexed before the inode. 13900dea3156SMatthew Dillon */ 13911897c66eSMatthew Dillon if (ok && (child->flags & HAMMER2_CHAIN_MOVED)) { 1392cd189b1eSMatthew Dillon hammer2_chain_t *scan; 1393a7720be7SMatthew Dillon 1394a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1395a7720be7SMatthew Dillon kprintf("CHECKMOVED %p (parent=%p)", child, parent); 1396731b2a84SMatthew Dillon 1397731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 13981897c66eSMatthew Dillon TAILQ_FOREACH(scan, &above->ownerq, core_entry) { 139909dd2dfeSMatthew Dillon /* 1400a4dc31e0SMatthew Dillon * Can't clear child's MOVED until all parent's have 1401a4dc31e0SMatthew Dillon * synchronized with it. 1402a7720be7SMatthew Dillon * 1403a4dc31e0SMatthew Dillon * Ignore our current parent (we use 'ok' from above), 1404a4dc31e0SMatthew Dillon * 1405a4dc31e0SMatthew Dillon * ignore any parents which have been deleted as-of 1406a4dc31e0SMatthew Dillon * our transaction id (their block array doesn't get 1407a4dc31e0SMatthew Dillon * updated). 140809dd2dfeSMatthew Dillon */ 1409a7720be7SMatthew Dillon if (scan == parent || 1410a7720be7SMatthew Dillon scan->delete_tid <= trans->sync_tid) 141109dd2dfeSMatthew Dillon continue; 1412a4dc31e0SMatthew Dillon 1413a4dc31e0SMatthew Dillon /* 1414a4dc31e0SMatthew Dillon * parent not synchronized if child modified or 1415a4dc31e0SMatthew Dillon * deleted after the parent's last sync point. 1416a4dc31e0SMatthew Dillon * 1417a4dc31e0SMatthew Dillon * (For the purpose of clearing the MOVED bit 1418a4dc31e0SMatthew Dillon * we do not restrict the tests to just flush 1419a4dc31e0SMatthew Dillon * transactions). 1420a4dc31e0SMatthew Dillon */ 1421a4dc31e0SMatthew Dillon if (scan->bref.mirror_tid < child->modify_tid || 1422a4dc31e0SMatthew Dillon ((child->flags & HAMMER2_CHAIN_DELETED) && 1423a4dc31e0SMatthew Dillon scan->bref.mirror_tid < child->delete_tid)) { 1424a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1425a7720be7SMatthew Dillon kprintf("(fail scan %p %016jx/%016jx)", 1426a7720be7SMatthew Dillon scan, scan->bref.mirror_tid, 1427a7720be7SMatthew Dillon child->modify_tid); 1428731b2a84SMatthew Dillon ok = 0; 1429731b2a84SMatthew Dillon } 1430731b2a84SMatthew Dillon } 1431a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1432a7720be7SMatthew Dillon kprintf("\n"); 1433731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 1434a4dc31e0SMatthew Dillon 1435a4dc31e0SMatthew Dillon /* 1436a4dc31e0SMatthew Dillon * Can we finally clear MOVED? 1437a4dc31e0SMatthew Dillon */ 1438731b2a84SMatthew Dillon if (ok) { 1439a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1440a7720be7SMatthew Dillon kprintf("clear moved %p.%d %016jx/%d\n", 1441a7720be7SMatthew Dillon child, child->bref.type, 1442a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1443a4dc31e0SMatthew Dillon if (child->modify_tid <= trans->sync_tid && 1444a4dc31e0SMatthew Dillon (child->delete_tid == HAMMER2_MAX_TID || 1445a4dc31e0SMatthew Dillon child->delete_tid <= trans->sync_tid)) { 1446a4dc31e0SMatthew Dillon atomic_clear_int(&child->flags, 1447a4dc31e0SMatthew Dillon HAMMER2_CHAIN_MOVED); 1448ea155208SMatthew Dillon hammer2_chain_drop(child); /* flag */ 1449a4dc31e0SMatthew Dillon KKASSERT((child->flags & 1450a4dc31e0SMatthew Dillon HAMMER2_CHAIN_MODIFIED) == 0); 1451a4dc31e0SMatthew Dillon } else { 1452a4dc31e0SMatthew Dillon kprintf("ok problem child %p %016jx/%016jx vs %016jx\n", child, child->modify_tid, child->delete_tid, trans->sync_tid); 1453a4dc31e0SMatthew Dillon } 1454a7720be7SMatthew Dillon } else { 1455a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1456a7720be7SMatthew Dillon kprintf("keep moved %p.%d %016jx/%d\n", 1457a7720be7SMatthew Dillon child, child->bref.type, 1458a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1459ea155208SMatthew Dillon } 14600dea3156SMatthew Dillon } 14610dea3156SMatthew Dillon 14620dea3156SMatthew Dillon /* 14630dea3156SMatthew Dillon * Unlock the child. This can wind up dropping the child's 14640dea3156SMatthew Dillon * last ref, removing it from the parent's RB tree, and deallocating 14650dea3156SMatthew Dillon * the structure. The RB_SCAN() our caller is doing handles the 14660dea3156SMatthew Dillon * situation. 14670dea3156SMatthew Dillon */ 14680dea3156SMatthew Dillon hammer2_chain_unlock(child); 1469ea155208SMatthew Dillon hammer2_chain_drop(child); 1470731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 14710dea3156SMatthew Dillon 14720dea3156SMatthew Dillon /* 1473a7720be7SMatthew Dillon * The parent may have been delete-duplicated. 14740dea3156SMatthew Dillon */ 1475a7720be7SMatthew Dillon info->parent = parent; 14760dea3156SMatthew Dillon finalize: 14770dea3156SMatthew Dillon return (0); 147832b800e6SMatthew Dillon } 147991abd410SMatthew Dillon 148091abd410SMatthew Dillon static 148191abd410SMatthew Dillon void 148291abd410SMatthew Dillon hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) 148391abd410SMatthew Dillon { 14841897c66eSMatthew Dillon #if 0 148591abd410SMatthew Dillon hammer2_chain_t *grandp; 14861897c66eSMatthew Dillon #endif 148791abd410SMatthew Dillon 148891abd410SMatthew Dillon parent->data_count += child->data_count; 148991abd410SMatthew Dillon parent->inode_count += child->inode_count; 149091abd410SMatthew Dillon child->data_count = 0; 149191abd410SMatthew Dillon child->inode_count = 0; 149291abd410SMatthew Dillon if (how < 0) { 149391abd410SMatthew Dillon parent->data_count -= child->bytes; 149491abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 149591abd410SMatthew Dillon parent->inode_count -= 1; 14969ec04660SMatthew Dillon #if 0 14979ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 149891abd410SMatthew Dillon parent->data_count -= child->data->ipdata.data_count; 149991abd410SMatthew Dillon parent->inode_count -= child->data->ipdata.inode_count; 15009ec04660SMatthew Dillon #endif 150191abd410SMatthew Dillon } 150291abd410SMatthew Dillon } else if (how > 0) { 150391abd410SMatthew Dillon parent->data_count += child->bytes; 150491abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 150591abd410SMatthew Dillon parent->inode_count += 1; 15069ec04660SMatthew Dillon #if 0 15079ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 150891abd410SMatthew Dillon parent->data_count += child->data->ipdata.data_count; 150991abd410SMatthew Dillon parent->inode_count += child->data->ipdata.inode_count; 15109ec04660SMatthew Dillon #endif 151191abd410SMatthew Dillon } 151291abd410SMatthew Dillon } 151391abd410SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 151491abd410SMatthew Dillon parent->data->ipdata.data_count += parent->data_count; 151591abd410SMatthew Dillon parent->data->ipdata.inode_count += parent->inode_count; 15161897c66eSMatthew Dillon #if 0 151791abd410SMatthew Dillon for (grandp = parent->above->first_parent; 151891abd410SMatthew Dillon grandp; 151991abd410SMatthew Dillon grandp = grandp->next_parent) { 152091abd410SMatthew Dillon grandp->data_count += parent->data_count; 152191abd410SMatthew Dillon grandp->inode_count += parent->inode_count; 152291abd410SMatthew Dillon } 15231897c66eSMatthew Dillon #endif 152491abd410SMatthew Dillon parent->data_count = 0; 152591abd410SMatthew Dillon parent->inode_count = 0; 152691abd410SMatthew Dillon } 152791abd410SMatthew Dillon } 1528