132b800e6SMatthew Dillon /* 20dea3156SMatthew Dillon * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3532b800e6SMatthew Dillon 3632b800e6SMatthew Dillon #include <sys/cdefs.h> 3732b800e6SMatthew Dillon #include <sys/param.h> 3832b800e6SMatthew Dillon #include <sys/systm.h> 3932b800e6SMatthew Dillon #include <sys/types.h> 4032b800e6SMatthew Dillon #include <sys/lock.h> 4132b800e6SMatthew Dillon #include <sys/uuid.h> 4232b800e6SMatthew Dillon 4332b800e6SMatthew Dillon #include "hammer2.h" 4432b800e6SMatthew Dillon 4532b800e6SMatthew Dillon /* 4632b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 4732b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 4832b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 4932b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 5032b800e6SMatthew Dillon * processes. 5132b800e6SMatthew Dillon */ 5232b800e6SMatthew Dillon struct hammer2_flush_info { 530dea3156SMatthew Dillon hammer2_chain_t *parent; 540dea3156SMatthew Dillon hammer2_trans_t *trans; 5532b800e6SMatthew Dillon int depth; 560dea3156SMatthew Dillon int diddeferral; 570dea3156SMatthew Dillon struct flush_deferral_list flush_list; 580dea3156SMatthew Dillon hammer2_tid_t sync_tid; /* flush synchronization point */ 590dea3156SMatthew Dillon hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 6032b800e6SMatthew Dillon }; 6132b800e6SMatthew Dillon 6232b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 6332b800e6SMatthew Dillon 640dea3156SMatthew Dillon static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 650dea3156SMatthew Dillon hammer2_chain_t *chain); 660dea3156SMatthew Dillon static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 670dea3156SMatthew Dillon static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 6832b800e6SMatthew Dillon 6993f3933aSMatthew Dillon #if 0 7093f3933aSMatthew Dillon static __inline 7193f3933aSMatthew Dillon void 7293f3933aSMatthew Dillon hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 7393f3933aSMatthew Dillon int how) 7493f3933aSMatthew Dillon { 7593f3933aSMatthew Dillon hammer2_key_t bytes; 7693f3933aSMatthew Dillon 7793f3933aSMatthew Dillon if (bref->type != 0) { 7893f3933aSMatthew Dillon bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 7993f3933aSMatthew Dillon if (bref->type == HAMMER2_BREF_TYPE_INODE) 8093f3933aSMatthew Dillon info->inode_count += how; 8193f3933aSMatthew Dillon if (how < 0) 8293f3933aSMatthew Dillon info->data_count -= bytes; 8393f3933aSMatthew Dillon else 8493f3933aSMatthew Dillon info->data_count += bytes; 8593f3933aSMatthew Dillon } 8693f3933aSMatthew Dillon } 8793f3933aSMatthew Dillon #endif 8893f3933aSMatthew Dillon 8932b800e6SMatthew Dillon /* 900dea3156SMatthew Dillon * Transaction support functions for writing to the filesystem. 910dea3156SMatthew Dillon * 920dea3156SMatthew Dillon * Initializing a new transaction allocates a transaction ID. We 930dea3156SMatthew Dillon * don't bother marking the volume header MODIFIED. Instead, the volume 94a02dfba1SMatthew Dillon * will be synchronized at a later time as part of a larger flush sequence. 950dea3156SMatthew Dillon * 96d001f460SMatthew Dillon * Non-flush transactions can typically run concurrently. However if 97d001f460SMatthew Dillon * there are non-flush transaction both before AND after a flush trans, 98d001f460SMatthew Dillon * the transactions after stall until the ones before finish. 99d001f460SMatthew Dillon * 100d001f460SMatthew Dillon * Non-flush transactions occuring after a flush pointer can run concurrently 101d001f460SMatthew Dillon * with that flush. They only have to wait for transactions prior to the 102d001f460SMatthew Dillon * flush trans to complete before they unstall. 103d001f460SMatthew Dillon * 1040dea3156SMatthew Dillon * WARNING! Modifications to the root volume cannot dup the root volume 1050dea3156SMatthew Dillon * header to handle synchronization points, so alloc_tid can 1060dea3156SMatthew Dillon * wind up (harmlessly) more advanced on flush. 107a02dfba1SMatthew Dillon * 108a02dfba1SMatthew Dillon * WARNING! Operations which might call inode_duplicate()/chain_duplicate() 109a02dfba1SMatthew Dillon * depend heavily on having a unique sync_tid to avoid duplication 110a02dfba1SMatthew Dillon * collisions (which key off of delete_tid). 1110dea3156SMatthew Dillon */ 1120dea3156SMatthew Dillon void 113*a5913bdfSMatthew Dillon hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags) 1140dea3156SMatthew Dillon { 115*a5913bdfSMatthew Dillon hammer2_cluster_t *cluster; 116*a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 117d001f460SMatthew Dillon hammer2_trans_t *scan; 118d001f460SMatthew Dillon 1190dea3156SMatthew Dillon bzero(trans, sizeof(*trans)); 120*a5913bdfSMatthew Dillon trans->pmp = pmp; 121*a5913bdfSMatthew Dillon cluster = pmp->cluster; 122*a5913bdfSMatthew Dillon hmp = cluster->hmp; 123d001f460SMatthew Dillon 1240dea3156SMatthew Dillon hammer2_voldata_lock(hmp); 1250dea3156SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid++; 126d001f460SMatthew Dillon trans->flags = flags; 127d001f460SMatthew Dillon trans->td = curthread; 128d001f460SMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 129d001f460SMatthew Dillon 130d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 131d001f460SMatthew Dillon /* 132d001f460SMatthew Dillon * If we are a flush we have to wait for all transactions 133d001f460SMatthew Dillon * prior to our flush synchronization point to complete 134d001f460SMatthew Dillon * before we can start our flush. 135d001f460SMatthew Dillon */ 136d001f460SMatthew Dillon ++hmp->flushcnt; 1374a59bd3eSMatthew Dillon if (hmp->curflush == NULL) { 138d001f460SMatthew Dillon hmp->curflush = trans; 1391a7cfe5aSMatthew Dillon hmp->topo_flush_tid = trans->sync_tid; 1404a59bd3eSMatthew Dillon } 141d001f460SMatthew Dillon while (TAILQ_FIRST(&hmp->transq) != trans) { 142d001f460SMatthew Dillon lksleep(&trans->sync_tid, &hmp->voldatalk, 143d001f460SMatthew Dillon 0, "h2syncw", hz); 144a02dfba1SMatthew Dillon } 145a02dfba1SMatthew Dillon 146a02dfba1SMatthew Dillon /* 147d001f460SMatthew Dillon * Once we become the running flush we can wakeup anyone 148d001f460SMatthew Dillon * who blocked on us. 149a02dfba1SMatthew Dillon */ 150d001f460SMatthew Dillon scan = trans; 151d001f460SMatthew Dillon while ((scan = TAILQ_NEXT(scan, entry)) != NULL) { 152d001f460SMatthew Dillon if (scan->flags & HAMMER2_TRANS_ISFLUSH) 153d001f460SMatthew Dillon break; 154d001f460SMatthew Dillon if (scan->blocked == 0) 155d001f460SMatthew Dillon break; 156d001f460SMatthew Dillon scan->blocked = 0; 157d001f460SMatthew Dillon wakeup(&scan->blocked); 158a02dfba1SMatthew Dillon } 159a02dfba1SMatthew Dillon } else { 160a02dfba1SMatthew Dillon /* 161d001f460SMatthew Dillon * If we are not a flush but our sync_tid is after a 162d001f460SMatthew Dillon * stalled flush, we have to wait until that flush unstalls 163d001f460SMatthew Dillon * (that is, all transactions prior to that flush complete), 164d001f460SMatthew Dillon * but then we can run concurrently with that flush. 165d001f460SMatthew Dillon * 166d001f460SMatthew Dillon * (flushcnt check only good as pre-condition, otherwise it 167d001f460SMatthew Dillon * may represent elements queued after us after we block). 168a02dfba1SMatthew Dillon */ 169d001f460SMatthew Dillon if (hmp->flushcnt > 1 || 170d001f460SMatthew Dillon (hmp->curflush && 171d001f460SMatthew Dillon TAILQ_FIRST(&hmp->transq) != hmp->curflush)) { 172d001f460SMatthew Dillon trans->blocked = 1; 173d001f460SMatthew Dillon while (trans->blocked) { 174d001f460SMatthew Dillon lksleep(&trans->blocked, &hmp->voldatalk, 175d001f460SMatthew Dillon 0, "h2trans", hz); 176d001f460SMatthew Dillon } 177d001f460SMatthew Dillon } 178a02dfba1SMatthew Dillon } 179a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 1800dea3156SMatthew Dillon } 1810dea3156SMatthew Dillon 1820dea3156SMatthew Dillon void 1830dea3156SMatthew Dillon hammer2_trans_done(hammer2_trans_t *trans) 1840dea3156SMatthew Dillon { 185*a5913bdfSMatthew Dillon hammer2_cluster_t *cluster; 186*a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 187d001f460SMatthew Dillon hammer2_trans_t *scan; 188a02dfba1SMatthew Dillon 189*a5913bdfSMatthew Dillon cluster = trans->pmp->cluster; 190*a5913bdfSMatthew Dillon hmp = cluster->hmp; 191*a5913bdfSMatthew Dillon 192a02dfba1SMatthew Dillon hammer2_voldata_lock(hmp); 193d001f460SMatthew Dillon TAILQ_REMOVE(&hmp->transq, trans, entry); 194d001f460SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) { 195d001f460SMatthew Dillon /* 196d001f460SMatthew Dillon * If we were a flush we have to adjust curflush to the 197d001f460SMatthew Dillon * next flush. 1984a59bd3eSMatthew Dillon * 1994a59bd3eSMatthew Dillon * flush_tid is used to partition copy-on-write operations 2004a59bd3eSMatthew Dillon * (mostly duplicate-on-modify ops), which is what allows 2014a59bd3eSMatthew Dillon * us to execute a flush concurrent with modifying operations 2024a59bd3eSMatthew Dillon * with higher TIDs. 203d001f460SMatthew Dillon */ 204d001f460SMatthew Dillon --hmp->flushcnt; 205d001f460SMatthew Dillon if (hmp->flushcnt) { 206d001f460SMatthew Dillon TAILQ_FOREACH(scan, &hmp->transq, entry) { 207d001f460SMatthew Dillon if (scan->flags & HAMMER2_TRANS_ISFLUSH) 208d001f460SMatthew Dillon break; 209d001f460SMatthew Dillon } 2104a59bd3eSMatthew Dillon KKASSERT(scan); 211d001f460SMatthew Dillon hmp->curflush = scan; 2121a7cfe5aSMatthew Dillon hmp->topo_flush_tid = scan->sync_tid; 213d001f460SMatthew Dillon } else { 2144a59bd3eSMatthew Dillon /* 2154a59bd3eSMatthew Dillon * Theoretically we don't have to clear flush_tid 2164a59bd3eSMatthew Dillon * here since the flush will have synchronized 2174a59bd3eSMatthew Dillon * all operations <= flush_tid already. But for 2184a59bd3eSMatthew Dillon * now zero-it. 2194a59bd3eSMatthew Dillon */ 220d001f460SMatthew Dillon hmp->curflush = NULL; 2211a7cfe5aSMatthew Dillon hmp->topo_flush_tid = 0; 222d001f460SMatthew Dillon } 223d001f460SMatthew Dillon } else { 224d001f460SMatthew Dillon /* 225d001f460SMatthew Dillon * If we are not a flush but a flush is now at the head 226d001f460SMatthew Dillon * of the queue and we were previously blocking it, 227d001f460SMatthew Dillon * we can now unblock it. 228d001f460SMatthew Dillon */ 229d001f460SMatthew Dillon if (hmp->flushcnt && 230d001f460SMatthew Dillon (scan = TAILQ_FIRST(&hmp->transq)) != NULL && 231d001f460SMatthew Dillon trans->sync_tid < scan->sync_tid && 232d001f460SMatthew Dillon (scan->flags & HAMMER2_TRANS_ISFLUSH)) { 233d001f460SMatthew Dillon wakeup(&scan->sync_tid); 234a02dfba1SMatthew Dillon } 235a02dfba1SMatthew Dillon } 236a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 237a02dfba1SMatthew Dillon } 238a02dfba1SMatthew Dillon 2390dea3156SMatthew Dillon /* 2400dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 2410dea3156SMatthew Dillon * synchronization point (sync_tid), propagating parent chain modifications 2420dea3156SMatthew Dillon * and mirror_tid updates back up as needed. Since we are recursing downward 2430dea3156SMatthew Dillon * we do not have to deal with the complexities of multi-homed chains (chains 2440dea3156SMatthew Dillon * with multiple parents). 2450dea3156SMatthew Dillon * 2460dea3156SMatthew Dillon * Caller must have interlocked against any non-flush-related modifying 2470dea3156SMatthew Dillon * operations in progress whos modify_tid values are less than or equal 2480dea3156SMatthew Dillon * to the passed sync_tid. 2490dea3156SMatthew Dillon * 2500dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 2510dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 2520dea3156SMatthew Dillon * these sorts of synchronization points. 2530dea3156SMatthew Dillon * 25432b800e6SMatthew Dillon * This routine can be called from several places but the most important 25532b800e6SMatthew Dillon * is from the hammer2_vop_reclaim() function. We want to try to completely 25632b800e6SMatthew Dillon * clean out the inode structure to prevent disconnected inodes from 2570dea3156SMatthew Dillon * building up and blowing out the kmalloc pool. However, it is not actually 2580dea3156SMatthew Dillon * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 2590dea3156SMatthew Dillon * capability. 26032b800e6SMatthew Dillon * 2610dea3156SMatthew Dillon * chain is locked on call and will remain locked on return. If a flush 2620dea3156SMatthew Dillon * occured, the chain's MOVED bit will be set indicating that its parent 2630dea3156SMatthew Dillon * (which is not part of the flush) should be updated. 26432b800e6SMatthew Dillon */ 26532b800e6SMatthew Dillon void 2660dea3156SMatthew Dillon hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) 26732b800e6SMatthew Dillon { 26832b800e6SMatthew Dillon hammer2_chain_t *scan; 269731b2a84SMatthew Dillon hammer2_chain_core_t *core; 27032b800e6SMatthew Dillon hammer2_flush_info_t info; 27132b800e6SMatthew Dillon 27232b800e6SMatthew Dillon /* 27332b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 27432b800e6SMatthew Dillon * 27532b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 27632b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 27732b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 27832b800e6SMatthew Dillon * for re-execution after the stack has been popped. 27932b800e6SMatthew Dillon */ 28032b800e6SMatthew Dillon bzero(&info, sizeof(info)); 28132b800e6SMatthew Dillon TAILQ_INIT(&info.flush_list); 2820dea3156SMatthew Dillon info.trans = trans; 2830dea3156SMatthew Dillon info.sync_tid = trans->sync_tid; 2840dea3156SMatthew Dillon info.mirror_tid = 0; 28532b800e6SMatthew Dillon 286731b2a84SMatthew Dillon core = chain->core; 287731b2a84SMatthew Dillon 2880dea3156SMatthew Dillon for (;;) { 28932b800e6SMatthew Dillon /* 2900dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 2910dea3156SMatthew Dillon * can leave MOVED set for these chains, which will be 2920dea3156SMatthew Dillon * handled when we [re]flush chain after the unwind. 29332b800e6SMatthew Dillon */ 29432b800e6SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 29532b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 29632b800e6SMatthew Dillon TAILQ_REMOVE(&info.flush_list, scan, flush_node); 29732b800e6SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 29832b800e6SMatthew Dillon 29932b800e6SMatthew Dillon /* 30032b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 30132b800e6SMatthew Dillon * recursion on the deferred elements. 30232b800e6SMatthew Dillon */ 30332b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 30432b800e6SMatthew Dillon kprintf("defered flush %p\n", scan); 3050dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 3060dea3156SMatthew Dillon hammer2_chain_flush(trans, scan); 3070dea3156SMatthew Dillon hammer2_chain_unlock(scan); 3080dea3156SMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 30932b800e6SMatthew Dillon } 31032b800e6SMatthew Dillon 31132b800e6SMatthew Dillon /* 3128853dfb5SMatthew Dillon * Flush pass1 on root. 31332b800e6SMatthew Dillon */ 3140dea3156SMatthew Dillon info.diddeferral = 0; 3150dea3156SMatthew Dillon hammer2_chain_flush_core(&info, chain); 3169797e933SMatthew Dillon #if FLUSH_DEBUG 3179797e933SMatthew Dillon kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 3189797e933SMatthew Dillon chain, chain->bref.type, chain->flags); 3199797e933SMatthew Dillon #endif 32032b800e6SMatthew Dillon 32132b800e6SMatthew Dillon /* 3220dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 32332b800e6SMatthew Dillon */ 3240dea3156SMatthew Dillon if (TAILQ_EMPTY(&info.flush_list)) 32532b800e6SMatthew Dillon break; 32632b800e6SMatthew Dillon } 32732b800e6SMatthew Dillon } 32832b800e6SMatthew Dillon 329476d2aadSMatthew Dillon /* 330ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 331ea155208SMatthew Dillon * caller and remains locked on return. This function is keyed off of 332ea155208SMatthew Dillon * the SUBMODIFIED bit but must make fine-grained choices based on the 333ea155208SMatthew Dillon * synchronization point we are flushing to. 3340dea3156SMatthew Dillon * 3350dea3156SMatthew Dillon * If the flush accomplished any work chain will be flagged MOVED 3360dea3156SMatthew Dillon * indicating a copy-on-write propagation back up is required. 3370dea3156SMatthew Dillon * Deep sub-nodes may also have been entered onto the deferral list. 3380dea3156SMatthew Dillon * MOVED is never set on the volume root. 3390dea3156SMatthew Dillon * 3400dea3156SMatthew Dillon * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 3410dea3156SMatthew Dillon * only when a chain is specifically modified, and not updated 3420dea3156SMatthew Dillon * for copy-on-write propagations. MODIFIED is set on any modification 3430dea3156SMatthew Dillon * including copy-on-write propagations. 344476d2aadSMatthew Dillon */ 34532b800e6SMatthew Dillon static void 3460dea3156SMatthew Dillon hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain) 34732b800e6SMatthew Dillon { 3480dea3156SMatthew Dillon hammer2_mount_t *hmp; 34932b800e6SMatthew Dillon hammer2_blockref_t *bref; 35032b800e6SMatthew Dillon hammer2_off_t pbase; 351a98aa0b0SMatthew Dillon hammer2_off_t pmask; 352ea155208SMatthew Dillon hammer2_tid_t saved_sync; 353a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 354731b2a84SMatthew Dillon hammer2_chain_core_t *core; 355a98aa0b0SMatthew Dillon size_t psize; 35632b800e6SMatthew Dillon size_t boff; 35732b800e6SMatthew Dillon char *bdata; 35832b800e6SMatthew Dillon struct buf *bp; 35932b800e6SMatthew Dillon int error; 36032b800e6SMatthew Dillon int wasmodified; 3610dea3156SMatthew Dillon int diddeferral = 0; 36232b800e6SMatthew Dillon 363*a5913bdfSMatthew Dillon hmp = chain->hmp; 36432b800e6SMatthew Dillon 3659797e933SMatthew Dillon #if FLUSH_DEBUG 3669797e933SMatthew Dillon if (info->parent) 3679797e933SMatthew Dillon kprintf("flush_core %p->%p.%d %08x (%s)\n", 3689797e933SMatthew Dillon info->parent, chain, chain->bref.type, 3699797e933SMatthew Dillon chain->flags, 3709797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 3719797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 3729797e933SMatthew Dillon else 3739797e933SMatthew Dillon kprintf("flush_core NULL->%p.%d %08x (%s)\n", 3749797e933SMatthew Dillon chain, chain->bref.type, 3759797e933SMatthew Dillon chain->flags, 3769797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 3779797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 3789797e933SMatthew Dillon #endif 37932b800e6SMatthew Dillon /* 380731b2a84SMatthew Dillon * Ignore chains modified beyond the current flush point. These 381731b2a84SMatthew Dillon * will be treated as if they did not exist. 382ea155208SMatthew Dillon */ 383ea155208SMatthew Dillon if (chain->modify_tid > info->sync_tid) 384ea155208SMatthew Dillon return; 385731b2a84SMatthew Dillon 386731b2a84SMatthew Dillon /* 387731b2a84SMatthew Dillon * Deleted chains which have not been destroyed must be retained, 388731b2a84SMatthew Dillon * and we probably have to recurse to clean-up any sub-trees. 389731b2a84SMatthew Dillon * However, restricted flushes can stop processing here because 390731b2a84SMatthew Dillon * the chain cleanup will be handled by a later normal flush. 391731b2a84SMatthew Dillon * 392731b2a84SMatthew Dillon * The MODIFIED bit can likely be cleared in this situation and we 393731b2a84SMatthew Dillon * will do so later on in this procedure. 394731b2a84SMatthew Dillon */ 395731b2a84SMatthew Dillon if (chain->delete_tid <= info->sync_tid) { 396731b2a84SMatthew Dillon if (trans->flags & HAMMER2_TRANS_RESTRICTED) 397a864c5d9SMatthew Dillon return; 3989797e933SMatthew Dillon } 399a864c5d9SMatthew Dillon 400a864c5d9SMatthew Dillon saved_sync = info->sync_tid; 401731b2a84SMatthew Dillon core = chain->core; 402ea155208SMatthew Dillon 403ea155208SMatthew Dillon /* 40432b800e6SMatthew Dillon * If SUBMODIFIED is set we recurse the flush and adjust the 40532b800e6SMatthew Dillon * blockrefs accordingly. 40632b800e6SMatthew Dillon * 40732b800e6SMatthew Dillon * NOTE: Looping on SUBMODIFIED can prevent a flush from ever 40832b800e6SMatthew Dillon * finishing in the face of filesystem activity. 40932b800e6SMatthew Dillon */ 41032b800e6SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) { 4110dea3156SMatthew Dillon hammer2_chain_t *saved_parent; 412ea155208SMatthew Dillon hammer2_tid_t saved_mirror; 41332b800e6SMatthew Dillon 41432b800e6SMatthew Dillon /* 4150dea3156SMatthew Dillon * Clear SUBMODIFIED to catch races. Note that any child 4160dea3156SMatthew Dillon * with MODIFIED, DELETED, or MOVED set during Scan2, after 4170dea3156SMatthew Dillon * it processes the child, will cause SUBMODIFIED to be 4180dea3156SMatthew Dillon * re-set. 41932b800e6SMatthew Dillon * child has to be flushed SUBMODIFIED will wind up being 42032b800e6SMatthew Dillon * set again (for next time), but this does not stop us from 42132b800e6SMatthew Dillon * synchronizing block updates which occurred. 42232b800e6SMatthew Dillon * 42332b800e6SMatthew Dillon * We don't want to set our chain to MODIFIED gratuitously. 42432b800e6SMatthew Dillon * 42532b800e6SMatthew Dillon * We need an extra ref on chain because we are going to 42632b800e6SMatthew Dillon * release its lock temporarily in our child loop. 42732b800e6SMatthew Dillon */ 42832b800e6SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED); 4290dea3156SMatthew Dillon hammer2_chain_ref(chain); 43032b800e6SMatthew Dillon 43132b800e6SMatthew Dillon /* 4320dea3156SMatthew Dillon * Run two passes. The first pass handles MODIFIED and 4330dea3156SMatthew Dillon * SUBMODIFIED chains and recurses while the second pass 4340dea3156SMatthew Dillon * handles MOVED chains on the way back up. 43532b800e6SMatthew Dillon * 4360dea3156SMatthew Dillon * If the stack gets too deep we defer scan1, but must 4370dea3156SMatthew Dillon * be sure to still run scan2 if on the next loop the 4380dea3156SMatthew Dillon * deferred chain has been flushed and now needs MOVED 4390dea3156SMatthew Dillon * handling on the way back up. 44032b800e6SMatthew Dillon * 4410dea3156SMatthew Dillon * Scan1 is recursive. 44232b800e6SMatthew Dillon * 4430dea3156SMatthew Dillon * NOTE: The act of handling a modified/submodified chain can 4440dea3156SMatthew Dillon * cause the MOVED Flag to be set. It can also be set 4450dea3156SMatthew Dillon * via hammer2_chain_delete() and in other situations. 4460dea3156SMatthew Dillon * 4470dea3156SMatthew Dillon * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 4480dea3156SMatthew Dillon * because children can be physically removed during 4490dea3156SMatthew Dillon * the scan. 45032b800e6SMatthew Dillon */ 4510dea3156SMatthew Dillon saved_parent = info->parent; 452ea155208SMatthew Dillon saved_mirror = info->mirror_tid; 4530dea3156SMatthew Dillon info->parent = chain; 454ea155208SMatthew Dillon info->mirror_tid = chain->bref.mirror_tid; 45532b800e6SMatthew Dillon 4560dea3156SMatthew Dillon if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 4570dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 4580dea3156SMatthew Dillon hammer2_chain_ref(chain); 4590dea3156SMatthew Dillon TAILQ_INSERT_TAIL(&info->flush_list, 4600dea3156SMatthew Dillon chain, flush_node); 4610dea3156SMatthew Dillon atomic_set_int(&chain->flags, 4620dea3156SMatthew Dillon HAMMER2_CHAIN_DEFERRED); 4630dea3156SMatthew Dillon } 4640dea3156SMatthew Dillon diddeferral = 1; 46532b800e6SMatthew Dillon } else { 4660dea3156SMatthew Dillon info->diddeferral = 0; 467731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 4680dea3156SMatthew Dillon RB_SCAN(hammer2_chain_tree, &chain->core->rbtree, 4690dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan1, info); 470731b2a84SMatthew Dillon spin_unlock(&core->cst.spin); 4710dea3156SMatthew Dillon diddeferral += info->diddeferral; 47232b800e6SMatthew Dillon } 47332b800e6SMatthew Dillon 47432b800e6SMatthew Dillon /* 4750dea3156SMatthew Dillon * Handle successfully flushed children who are in the MOVED 4760dea3156SMatthew Dillon * state on the way back up the recursion. This can have 4770dea3156SMatthew Dillon * the side-effect of clearing MOVED. 47832b800e6SMatthew Dillon * 4790dea3156SMatthew Dillon * We execute this even if there were deferrals to try to 4800dea3156SMatthew Dillon * keep the chain topology cleaner. 48132b800e6SMatthew Dillon * 4820dea3156SMatthew Dillon * Scan2 is non-recursive. 48332b800e6SMatthew Dillon */ 484cd189b1eSMatthew Dillon if (diddeferral) { 485cd189b1eSMatthew Dillon atomic_set_int(&chain->flags, 486cd189b1eSMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED); 487cd189b1eSMatthew Dillon } else { 4889797e933SMatthew Dillon #if FLUSH_DEBUG 4898853dfb5SMatthew Dillon kprintf("scan2_start parent %p %08x\n", 4908853dfb5SMatthew Dillon chain, chain->flags); 4919797e933SMatthew Dillon #endif 492731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 493731b2a84SMatthew Dillon RB_SCAN(hammer2_chain_tree, &core->rbtree, 4940dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 495731b2a84SMatthew Dillon spin_unlock(&core->cst.spin); 4969797e933SMatthew Dillon #if FLUSH_DEBUG 4978853dfb5SMatthew Dillon kprintf("scan2_stop parent %p %08x\n", 4988853dfb5SMatthew Dillon chain, chain->flags); 4999797e933SMatthew Dillon #endif 500cd189b1eSMatthew Dillon } 501ea155208SMatthew Dillon chain->bref.mirror_tid = info->mirror_tid; 502ea155208SMatthew Dillon info->mirror_tid = saved_mirror; 5030dea3156SMatthew Dillon info->parent = saved_parent; 5040dea3156SMatthew Dillon hammer2_chain_drop(chain); 50532b800e6SMatthew Dillon } 5060dea3156SMatthew Dillon 5070dea3156SMatthew Dillon /* 508ea155208SMatthew Dillon * Restore sync_tid in case it was restricted by a delete/duplicate. 509ea155208SMatthew Dillon */ 510ea155208SMatthew Dillon info->sync_tid = saved_sync; 511ea155208SMatthew Dillon 512ea155208SMatthew Dillon /* 5130dea3156SMatthew Dillon * Rollup diddeferral for caller. Note direct assignment, not +=. 5140dea3156SMatthew Dillon */ 5150dea3156SMatthew Dillon info->diddeferral = diddeferral; 5160dea3156SMatthew Dillon 5170dea3156SMatthew Dillon /* 5180dea3156SMatthew Dillon * Do not flush chain if there were any deferrals. It will be 5190dea3156SMatthew Dillon * retried later after the deferrals are independently handled. 5200dea3156SMatthew Dillon */ 5210dea3156SMatthew Dillon if (diddeferral) { 5220dea3156SMatthew Dillon if (hammer2_debug & 0x0008) { 5230dea3156SMatthew Dillon kprintf("%*.*s} %p/%d %04x (deferred)", 5240dea3156SMatthew Dillon info->depth, info->depth, "", 5250dea3156SMatthew Dillon chain, chain->refs, chain->flags); 52632b800e6SMatthew Dillon } 52732b800e6SMatthew Dillon return; 52832b800e6SMatthew Dillon } 52932b800e6SMatthew Dillon 53032b800e6SMatthew Dillon /* 531731b2a84SMatthew Dillon * If we encounter a deleted chain within our flush we can clear 532731b2a84SMatthew Dillon * the MODIFIED bit and avoid flushing it whether it has been 53393f3933aSMatthew Dillon * destroyed or not. We must make sure that the chain is flagged 53493f3933aSMatthew Dillon * MOVED in this situation so the parent picks up the deletion. 53532b800e6SMatthew Dillon */ 536731b2a84SMatthew Dillon if (chain->delete_tid <= info->sync_tid) { 537731b2a84SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 5381a7cfe5aSMatthew Dillon if (chain->bp) { 5391a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 540731b2a84SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 5411a7cfe5aSMatthew Dillon } 54293f3933aSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 54393f3933aSMatthew Dillon hammer2_chain_ref(chain); 54493f3933aSMatthew Dillon atomic_set_int(&chain->flags, 54593f3933aSMatthew Dillon HAMMER2_CHAIN_MOVED); 54693f3933aSMatthew Dillon } 547731b2a84SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 548731b2a84SMatthew Dillon hammer2_chain_drop(chain); 549731b2a84SMatthew Dillon } 550731b2a84SMatthew Dillon return; 551731b2a84SMatthew Dillon } 552731b2a84SMatthew Dillon #if 0 5530dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 554a864c5d9SMatthew Dillon (chain->flags & HAMMER2_CHAIN_DELETED) && 555a864c5d9SMatthew Dillon (trans->flags & HAMMER2_TRANS_RESTRICTED) == 0) { 556a864c5d9SMatthew Dillon /* 557a864c5d9SMatthew Dillon * Throw-away the MODIFIED flag 558a864c5d9SMatthew Dillon */ 5590dea3156SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 5601a7cfe5aSMatthew Dillon if (chain->bp) { 5611a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 5620dea3156SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 5631a7cfe5aSMatthew Dillon } 5640dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 5650dea3156SMatthew Dillon hammer2_chain_drop(chain); 5660dea3156SMatthew Dillon } 5670dea3156SMatthew Dillon return; 5680dea3156SMatthew Dillon } 569731b2a84SMatthew Dillon #endif 5700dea3156SMatthew Dillon 5710dea3156SMatthew Dillon /* 572ea155208SMatthew Dillon * A degenerate flush might not have flushed anything and thus not 573ea155208SMatthew Dillon * processed modified blocks on the way back up. Detect the case. 5740dea3156SMatthew Dillon * 5750dea3156SMatthew Dillon * Note that MOVED can be set without MODIFIED being set due to 5760dea3156SMatthew Dillon * a deletion, in which case it is handled by Scan2 later on. 5770dea3156SMatthew Dillon * 5780dea3156SMatthew Dillon * Both bits can be set along with DELETED due to a deletion if 5790dea3156SMatthew Dillon * modified data within the synchronization zone and the chain 5800dea3156SMatthew Dillon * was then deleted beyond the zone, in which case we still have 581ea155208SMatthew Dillon * to flush for synchronization point consistency. Otherwise though 582ea155208SMatthew Dillon * DELETED and MODIFIED are treated as separate flags. 5830dea3156SMatthew Dillon */ 5840dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) 5850dea3156SMatthew Dillon return; 5860dea3156SMatthew Dillon 5870dea3156SMatthew Dillon /* 5880dea3156SMatthew Dillon * Issue flush. 5890dea3156SMatthew Dillon * 5900dea3156SMatthew Dillon * A DESTROYED node that reaches this point must be flushed for 5910dea3156SMatthew Dillon * synchronization point consistency. 5920dea3156SMatthew Dillon */ 5930dea3156SMatthew Dillon 5940dea3156SMatthew Dillon /* 5950dea3156SMatthew Dillon * Update mirror_tid, clear MODIFIED, and set MOVED. 5960dea3156SMatthew Dillon * 5970dea3156SMatthew Dillon * The caller will update the parent's reference to this chain 5980dea3156SMatthew Dillon * by testing MOVED as long as the modification was in-bounds. 5990dea3156SMatthew Dillon * 6000dea3156SMatthew Dillon * MOVED is never set on the volume root as there is no parent 6010dea3156SMatthew Dillon * to adjust. 6020dea3156SMatthew Dillon */ 6030dea3156SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 6040dea3156SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 6050dea3156SMatthew Dillon wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 6060dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 6070dea3156SMatthew Dillon if (chain == &hmp->vchain) 6080dea3156SMatthew Dillon kprintf("(FLUSHED VOLUME HEADER)\n"); 6091a7cfe5aSMatthew Dillon if (chain == &hmp->fchain) 6101a7cfe5aSMatthew Dillon kprintf("(FLUSHED FREEMAP HEADER)\n"); 6110dea3156SMatthew Dillon 6120dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) || 6131a7cfe5aSMatthew Dillon chain == &hmp->vchain || 6141a7cfe5aSMatthew Dillon chain == &hmp->fchain) { 61532b800e6SMatthew Dillon /* 61632b800e6SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared. 61732b800e6SMatthew Dillon */ 61832b800e6SMatthew Dillon if (wasmodified) 6190dea3156SMatthew Dillon hammer2_chain_drop(chain); 62032b800e6SMatthew Dillon } else { 62132b800e6SMatthew Dillon /* 62232b800e6SMatthew Dillon * If we were MODIFIED we inherit the ref from clearing 62332b800e6SMatthew Dillon * that bit, otherwise we need another ref. 62432b800e6SMatthew Dillon */ 62532b800e6SMatthew Dillon if (wasmodified == 0) 6260dea3156SMatthew Dillon hammer2_chain_ref(chain); 62732b800e6SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 62832b800e6SMatthew Dillon } 62932b800e6SMatthew Dillon 63032b800e6SMatthew Dillon /* 63132b800e6SMatthew Dillon * If this is part of a recursive flush we can go ahead and write 632ea155208SMatthew Dillon * out the buffer cache buffer and pass a new bref back up the chain 633ea155208SMatthew Dillon * via the MOVED bit. 63432b800e6SMatthew Dillon * 635ea155208SMatthew Dillon * Volume headers are NOT flushed here as they require special 636ea155208SMatthew Dillon * processing. 63732b800e6SMatthew Dillon */ 63832b800e6SMatthew Dillon switch(chain->bref.type) { 6391a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 6401a7cfe5aSMatthew Dillon hammer2_modify_volume(hmp); 6411a7cfe5aSMatthew Dillon break; 64232b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 64332b800e6SMatthew Dillon /* 6441a7cfe5aSMatthew Dillon * We should flush the free block table before we calculate 6451a7cfe5aSMatthew Dillon * CRCs and copy voldata -> volsync. 64693f3933aSMatthew Dillon * 64793f3933aSMatthew Dillon * To prevent SMP races, fchain must remain locked until 64893f3933aSMatthew Dillon * voldata is copied to volsync. 6491a7cfe5aSMatthew Dillon */ 6501a7cfe5aSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 6511a7cfe5aSMatthew Dillon if (hmp->fchain.flags & (HAMMER2_CHAIN_MODIFIED | 6521a7cfe5aSMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED)) { 6531a7cfe5aSMatthew Dillon /* this will modify vchain as a side effect */ 6541a7cfe5aSMatthew Dillon hammer2_chain_flush(info->trans, &hmp->fchain); 6551a7cfe5aSMatthew Dillon } 6561a7cfe5aSMatthew Dillon 6571a7cfe5aSMatthew Dillon /* 65832b800e6SMatthew Dillon * The volume header is flushed manually by the syncer, not 659ea155208SMatthew Dillon * here. All we do is adjust the crc's. 66032b800e6SMatthew Dillon */ 66132b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 66232b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 66332b800e6SMatthew Dillon kprintf("volume header mirror_tid %jd\n", 66432b800e6SMatthew Dillon hmp->voldata.mirror_tid); 66532b800e6SMatthew Dillon 66632b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 66732b800e6SMatthew Dillon hammer2_icrc32( 66832b800e6SMatthew Dillon (char *)&hmp->voldata + 66932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 67032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 67132b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 67232b800e6SMatthew Dillon hammer2_icrc32( 67332b800e6SMatthew Dillon (char *)&hmp->voldata + 67432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 67532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 67632b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 67732b800e6SMatthew Dillon hammer2_icrc32( 67832b800e6SMatthew Dillon (char *)&hmp->voldata + 67932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 68032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 68132b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 6820dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 68393f3933aSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 68432b800e6SMatthew Dillon break; 68532b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 68632b800e6SMatthew Dillon /* 68732b800e6SMatthew Dillon * Data elements have already been flushed via the logical 68832b800e6SMatthew Dillon * file buffer cache. Their hash was set in the bref by 68932b800e6SMatthew Dillon * the vop_write code. 69032b800e6SMatthew Dillon * 691ea155208SMatthew Dillon * Make sure any device buffer(s) have been flushed out here. 692ea155208SMatthew Dillon * (there aren't usually any to flush). 69332b800e6SMatthew Dillon */ 694a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 695a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 696a98aa0b0SMatthew Dillon pbase = chain->bref.data_off & ~pmask; 697a98aa0b0SMatthew Dillon boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 69832b800e6SMatthew Dillon 699a98aa0b0SMatthew Dillon bp = getblk(hmp->devvp, pbase, psize, GETBLK_NOWAIT, 0); 70032b800e6SMatthew Dillon if (bp) { 70132b800e6SMatthew Dillon if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 70232b800e6SMatthew Dillon (B_CACHE | B_DIRTY)) { 70332b800e6SMatthew Dillon cluster_awrite(bp); 70432b800e6SMatthew Dillon } else { 70532b800e6SMatthew Dillon bp->b_flags |= B_RELBUF; 70632b800e6SMatthew Dillon brelse(bp); 70732b800e6SMatthew Dillon } 70832b800e6SMatthew Dillon } 70932b800e6SMatthew Dillon break; 710512beabdSMatthew Dillon #if 0 71132b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 71232b800e6SMatthew Dillon /* 71332b800e6SMatthew Dillon * Indirect blocks may be in an INITIAL state. Use the 71432b800e6SMatthew Dillon * chain_lock() call to ensure that the buffer has been 71532b800e6SMatthew Dillon * instantiated (even though it is already locked the buffer 71632b800e6SMatthew Dillon * might not have been instantiated). 71732b800e6SMatthew Dillon * 71832b800e6SMatthew Dillon * Only write the buffer out if it is dirty, it is possible 71932b800e6SMatthew Dillon * the operating system had already written out the buffer. 72032b800e6SMatthew Dillon */ 7210dea3156SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 72232b800e6SMatthew Dillon KKASSERT(chain->bp != NULL); 72332b800e6SMatthew Dillon 72432b800e6SMatthew Dillon bp = chain->bp; 72532b800e6SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 72632b800e6SMatthew Dillon (bp->b_flags & B_DIRTY)) { 72732b800e6SMatthew Dillon bdwrite(chain->bp); 72832b800e6SMatthew Dillon } else { 72932b800e6SMatthew Dillon brelse(chain->bp); 73032b800e6SMatthew Dillon } 73132b800e6SMatthew Dillon chain->bp = NULL; 73232b800e6SMatthew Dillon chain->data = NULL; 7330dea3156SMatthew Dillon hammer2_chain_unlock(chain); 73432b800e6SMatthew Dillon break; 735512beabdSMatthew Dillon #endif 736512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 7371a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 7381a7cfe5aSMatthew Dillon /* 7391a7cfe5aSMatthew Dillon * Device-backed. Buffer will be flushed by the sync 7401a7cfe5aSMatthew Dillon * code XXX. 7411a7cfe5aSMatthew Dillon */ 742512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 7431a7cfe5aSMatthew Dillon break; 744512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 74532b800e6SMatthew Dillon default: 74632b800e6SMatthew Dillon /* 74732b800e6SMatthew Dillon * Embedded elements have to be flushed out. 7481a7cfe5aSMatthew Dillon * (Basically just BREF_TYPE_INODE). 74932b800e6SMatthew Dillon */ 750512beabdSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 75132b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 75232b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 75332b800e6SMatthew Dillon bref = &chain->bref; 75432b800e6SMatthew Dillon 75532b800e6SMatthew Dillon KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 7569061bde5SMatthew Dillon KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 757512beabdSMatthew Dillon HAMMER2_CHECK_ISCSI32 || 758512beabdSMatthew Dillon HAMMER2_DEC_CHECK(chain->bref.methods) == 759512beabdSMatthew Dillon HAMMER2_CHECK_FREEMAP); 76032b800e6SMatthew Dillon 76132b800e6SMatthew Dillon /* 76232b800e6SMatthew Dillon * The data is embedded, we have to acquire the 76332b800e6SMatthew Dillon * buffer cache buffer and copy the data into it. 76432b800e6SMatthew Dillon */ 765a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 766a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 767a98aa0b0SMatthew Dillon pbase = bref->data_off & ~pmask; 768a98aa0b0SMatthew Dillon boff = bref->data_off & (HAMMER2_OFF_MASK & pmask); 76932b800e6SMatthew Dillon 77032b800e6SMatthew Dillon /* 77132b800e6SMatthew Dillon * The getblk() optimization can only be used if the 77232b800e6SMatthew Dillon * physical block size matches the request. 77332b800e6SMatthew Dillon */ 774a98aa0b0SMatthew Dillon error = bread(hmp->devvp, pbase, psize, &bp); 77532b800e6SMatthew Dillon KKASSERT(error == 0); 776a98aa0b0SMatthew Dillon 77732b800e6SMatthew Dillon bdata = (char *)bp->b_data + boff; 77832b800e6SMatthew Dillon 77932b800e6SMatthew Dillon /* 78032b800e6SMatthew Dillon * Copy the data to the buffer, mark the buffer 78132b800e6SMatthew Dillon * dirty, and convert the chain to unmodified. 78232b800e6SMatthew Dillon */ 78332b800e6SMatthew Dillon bcopy(chain->data, bdata, chain->bytes); 78432b800e6SMatthew Dillon bp->b_flags |= B_CLUSTEROK; 78532b800e6SMatthew Dillon bdwrite(bp); 78632b800e6SMatthew Dillon bp = NULL; 787a98aa0b0SMatthew Dillon 788512beabdSMatthew Dillon switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 789512beabdSMatthew Dillon case HAMMER2_CHECK_FREEMAP: 790512beabdSMatthew Dillon chain->bref.check.freemap.icrc32 = 791512beabdSMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 792512beabdSMatthew Dillon break; 793512beabdSMatthew Dillon case HAMMER2_CHECK_ISCSI32: 79432b800e6SMatthew Dillon chain->bref.check.iscsi32.value = 79532b800e6SMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 796512beabdSMatthew Dillon break; 797512beabdSMatthew Dillon default: 798512beabdSMatthew Dillon panic("hammer2_flush_core: bad crc type"); 799512beabdSMatthew Dillon break; /* NOT REACHED */ 800512beabdSMatthew Dillon } 80132b800e6SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 80232b800e6SMatthew Dillon ++hammer2_iod_meta_write; 80332b800e6SMatthew Dillon else 80432b800e6SMatthew Dillon ++hammer2_iod_indr_write; 80532b800e6SMatthew Dillon } 80632b800e6SMatthew Dillon } 80732b800e6SMatthew Dillon 80832b800e6SMatthew Dillon /* 8090dea3156SMatthew Dillon * Flush helper scan1 (recursive) 8100dea3156SMatthew Dillon * 8110dea3156SMatthew Dillon * Flushes the children of the caller's chain (parent) and updates 812ea155208SMatthew Dillon * the blockref, restricted by sync_tid. 8130dea3156SMatthew Dillon * 8140dea3156SMatthew Dillon * Ripouts during the loop should not cause any problems. Because we are 8150dea3156SMatthew Dillon * flushing to a synchronization point, modification races will occur after 8160dea3156SMatthew Dillon * sync_tid and do not have to be flushed anyway. 817ea155208SMatthew Dillon * 818ea155208SMatthew Dillon * It is also ok if the parent is chain_duplicate()'d while unlocked because 819ea155208SMatthew Dillon * the delete/duplication will install a delete_tid that is still larger than 820ea155208SMatthew Dillon * our current sync_tid. 82132b800e6SMatthew Dillon */ 8220dea3156SMatthew Dillon static int 8230dea3156SMatthew Dillon hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 82432b800e6SMatthew Dillon { 8250dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 826cd189b1eSMatthew Dillon hammer2_trans_t *trans = info->trans; 8270dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 8280dea3156SMatthew Dillon int diddeferral; 82932b800e6SMatthew Dillon 8300dea3156SMatthew Dillon /* 8310dea3156SMatthew Dillon * We should only need to recurse if SUBMODIFIED is set, but as 8328853dfb5SMatthew Dillon * a safety also recurse if MODIFIED is also set. 8338853dfb5SMatthew Dillon * 8348853dfb5SMatthew Dillon * Return early if neither bit is set. We must re-assert the 8358853dfb5SMatthew Dillon * SUBMODIFIED flag in the parent if any child covered by the 8368853dfb5SMatthew Dillon * parent (via delete_tid) is skipped. 8370dea3156SMatthew Dillon */ 838731b2a84SMatthew Dillon if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 839731b2a84SMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 8400dea3156SMatthew Dillon return (0); 84132b800e6SMatthew Dillon } 842cd189b1eSMatthew Dillon if (child->modify_tid > trans->sync_tid) { 8438853dfb5SMatthew Dillon if (parent->delete_tid > trans->sync_tid) { 8448853dfb5SMatthew Dillon atomic_set_int(&parent->flags, 8458853dfb5SMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED); 8468853dfb5SMatthew Dillon } 847cd189b1eSMatthew Dillon return (0); 848cd189b1eSMatthew Dillon } 849cd189b1eSMatthew Dillon 850ea155208SMatthew Dillon hammer2_chain_ref(child); 8510dea3156SMatthew Dillon spin_unlock(&parent->core->cst.spin); 8520dea3156SMatthew Dillon 8530dea3156SMatthew Dillon /* 8540dea3156SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 8550dea3156SMatthew Dillon * unlock it in order to lock the child. Re-check the flags before 8560dea3156SMatthew Dillon * continuing. 8570dea3156SMatthew Dillon */ 8580dea3156SMatthew Dillon hammer2_chain_unlock(parent); 8590dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 8600dea3156SMatthew Dillon 861731b2a84SMatthew Dillon if ((child->flags & (HAMMER2_CHAIN_MODIFIED | 862731b2a84SMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED)) == 0) { 8630dea3156SMatthew Dillon hammer2_chain_unlock(child); 864ea155208SMatthew Dillon hammer2_chain_drop(child); 865ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 8660dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 8670dea3156SMatthew Dillon return (0); 8680dea3156SMatthew Dillon } 869cd189b1eSMatthew Dillon if (child->modify_tid > trans->sync_tid) { 870cd189b1eSMatthew Dillon hammer2_chain_unlock(child); 871cd189b1eSMatthew Dillon hammer2_chain_drop(child); 872cd189b1eSMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 873cd189b1eSMatthew Dillon spin_lock(&parent->core->cst.spin); 8748853dfb5SMatthew Dillon if (parent->delete_tid > trans->sync_tid) { 8758853dfb5SMatthew Dillon atomic_set_int(&parent->flags, 8768853dfb5SMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED); 8778853dfb5SMatthew Dillon } 878cd189b1eSMatthew Dillon return (0); 879cd189b1eSMatthew Dillon } 8800dea3156SMatthew Dillon 8810dea3156SMatthew Dillon /* 882ea155208SMatthew Dillon * The DESTROYED flag can only be initially set on an unreferenced 883ea155208SMatthew Dillon * deleted inode and will propagate downward via the mechanic below. 884ea155208SMatthew Dillon * Such inode chains have been deleted for good and should no longer 885ea155208SMatthew Dillon * be subject to delete/duplication. 8860dea3156SMatthew Dillon * 8870dea3156SMatthew Dillon * This optimization allows the inode reclaim (destroy unlinked file 8880dea3156SMatthew Dillon * on vnode reclamation after last close) to be flagged by just 889ea155208SMatthew Dillon * setting HAMMER2_CHAIN_DESTROYED at the top level and then will 890ea155208SMatthew Dillon * cause the chains to be terminated and related buffers to be 891ea155208SMatthew Dillon * invalidated and not flushed out. 892ea155208SMatthew Dillon * 893ea155208SMatthew Dillon * We have to be careful not to propagate the DESTROYED flag if 894ea155208SMatthew Dillon * the destruction occurred after our flush sync_tid. 8950dea3156SMatthew Dillon */ 8960dea3156SMatthew Dillon if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 8979797e933SMatthew Dillon (child->flags & HAMMER2_CHAIN_DELETED) && 8980dea3156SMatthew Dillon (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 899731b2a84SMatthew Dillon atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROYED | 9000dea3156SMatthew Dillon HAMMER2_CHAIN_SUBMODIFIED); 9010dea3156SMatthew Dillon } 9020dea3156SMatthew Dillon 9030dea3156SMatthew Dillon /* 9040dea3156SMatthew Dillon * Recurse and collect deferral data. 9050dea3156SMatthew Dillon */ 9060dea3156SMatthew Dillon diddeferral = info->diddeferral; 9070dea3156SMatthew Dillon ++info->depth; 9080dea3156SMatthew Dillon hammer2_chain_flush_core(info, child); 9099797e933SMatthew Dillon #if FLUSH_DEBUG 9109797e933SMatthew Dillon kprintf("flush_core_done parent=%p flags=%08x child=%p.%d %08x\n", 9119797e933SMatthew Dillon parent, parent->flags, child, child->bref.type, child->flags); 9129797e933SMatthew Dillon #endif 9130dea3156SMatthew Dillon --info->depth; 9140dea3156SMatthew Dillon info->diddeferral += diddeferral; 9150dea3156SMatthew Dillon 9168853dfb5SMatthew Dillon if (child->flags & HAMMER2_CHAIN_SUBMODIFIED) 9178853dfb5SMatthew Dillon atomic_set_int(&parent->flags, HAMMER2_CHAIN_SUBMODIFIED); 9188853dfb5SMatthew Dillon 9190dea3156SMatthew Dillon hammer2_chain_unlock(child); 920ea155208SMatthew Dillon hammer2_chain_drop(child); 9210dea3156SMatthew Dillon 922ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 9230dea3156SMatthew Dillon 9240dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 9250dea3156SMatthew Dillon return (0); 9260dea3156SMatthew Dillon } 9270dea3156SMatthew Dillon 9280dea3156SMatthew Dillon /* 9290dea3156SMatthew Dillon * Flush helper scan2 (non-recursive) 9300dea3156SMatthew Dillon * 9310dea3156SMatthew Dillon * This pass on a chain's children propagates any MOVED or DELETED 932ea155208SMatthew Dillon * elements back up the chain towards the root after those elements have 933ea155208SMatthew Dillon * been fully flushed. Unlike scan1, this function is NOT recursive and 934ea155208SMatthew Dillon * the parent remains locked across the entire scan. 9350dea3156SMatthew Dillon * 936ea155208SMatthew Dillon * NOTE! We must re-set SUBMODIFIED on the parent(s) as appropriate, and 937ea155208SMatthew Dillon * due to the above conditions it is possible to do this and still 938ea155208SMatthew Dillon * have some children flagged MOVED depending on the synchronization. 939ea155208SMatthew Dillon * 940ea155208SMatthew Dillon * NOTE! A deletion is a visbility issue, there can still be referenced to 941ea155208SMatthew Dillon * deleted elements (for example, to an unlinked file which is still 942ea155208SMatthew Dillon * open), and there can also be multiple chains pointing to the same 943ea155208SMatthew Dillon * bref where some are deleted and some are not (for example due to 944ea155208SMatthew Dillon * a rename). So a chain marked for deletion is basically considered 945a864c5d9SMatthew Dillon * to be live until it is explicitly destroyed or until its ref-count 946a864c5d9SMatthew Dillon * reaches zero (also implying that MOVED and MODIFIED are clear). 9470dea3156SMatthew Dillon */ 9480dea3156SMatthew Dillon static int 9490dea3156SMatthew Dillon hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 9500dea3156SMatthew Dillon { 9510dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 9520dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 953731b2a84SMatthew Dillon hammer2_chain_core_t *above = child->above; 954*a5913bdfSMatthew Dillon hammer2_mount_t *hmp = child->hmp; 955a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 9560dea3156SMatthew Dillon hammer2_blockref_t *base; 9570dea3156SMatthew Dillon int count; 958ea155208SMatthew Dillon 959ea155208SMatthew Dillon /* 9609797e933SMatthew Dillon * Inodes with stale children that have been converted to DIRECTDATA 9619797e933SMatthew Dillon * mode (file extension or hardlink conversion typically) need to 9629797e933SMatthew Dillon * skipped right now before we start messing with a non-existant 9639797e933SMatthew Dillon * block table. 9649797e933SMatthew Dillon */ 96509dd2dfeSMatthew Dillon #if 0 9669797e933SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 9679797e933SMatthew Dillon (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { 9689797e933SMatthew Dillon #if FLUSH_DEBUG 9699797e933SMatthew Dillon kprintf("B"); 9709797e933SMatthew Dillon #endif 9719797e933SMatthew Dillon goto finalize; 9729797e933SMatthew Dillon } 97309dd2dfeSMatthew Dillon #endif 9749797e933SMatthew Dillon 9759797e933SMatthew Dillon /* 976731b2a84SMatthew Dillon * Ignore children created after our flush point, treating them as 977cd189b1eSMatthew Dillon * if they did not exist). These children will not cause the parent 978cd189b1eSMatthew Dillon * to be updated. 979731b2a84SMatthew Dillon * 980cd189b1eSMatthew Dillon * When we encounter such children and the parent chain has not been 981cd189b1eSMatthew Dillon * deleted, delete/duplicated, or delete/duplicated-for-move, then 982cd189b1eSMatthew Dillon * the parent may be used to funnel through several flush points. 983cd189b1eSMatthew Dillon * We must re-set the SUBMODIFIED flag in the parent to ensure that 984cd189b1eSMatthew Dillon * those flushes have visbility. A simple test of delete_tid suffices 985cd189b1eSMatthew Dillon * to determine if the parent spans beyond our current flush. 986731b2a84SMatthew Dillon */ 987731b2a84SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 988731b2a84SMatthew Dillon #if FLUSH_DEBUG 989731b2a84SMatthew Dillon kprintf("E"); 990731b2a84SMatthew Dillon #endif 991731b2a84SMatthew Dillon goto finalize; 992731b2a84SMatthew Dillon } 993731b2a84SMatthew Dillon 994731b2a84SMatthew Dillon /* 995731b2a84SMatthew Dillon * Ignore children which have not changed. The parent's block table 996731b2a84SMatthew Dillon * is already correct. 997ea155208SMatthew Dillon */ 998ea155208SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 9999797e933SMatthew Dillon #if FLUSH_DEBUG 10009797e933SMatthew Dillon kprintf("D"); 10019797e933SMatthew Dillon #endif 10020dea3156SMatthew Dillon goto finalize; 10030dea3156SMatthew Dillon } 1004ea155208SMatthew Dillon 1005731b2a84SMatthew Dillon 1006ea155208SMatthew Dillon hammer2_chain_ref(child); 1007731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 10080dea3156SMatthew Dillon 10090dea3156SMatthew Dillon /* 10100dea3156SMatthew Dillon * The MOVED bit implies an additional reference which prevents 10110dea3156SMatthew Dillon * the child from being destroyed out from under our operation 10120dea3156SMatthew Dillon * so we can lock the child safely without worrying about it 10130dea3156SMatthew Dillon * getting ripped up (?). 1014ea155208SMatthew Dillon * 1015ea155208SMatthew Dillon * We can only update parents where child->parent matches. The 1016ea155208SMatthew Dillon * child->parent link will migrate along the chain but the flush 1017ea155208SMatthew Dillon * order must be enforced absolutely. Parent reflushed after the 1018ea155208SMatthew Dillon * child has passed them by should skip due to the modify_tid test. 10190dea3156SMatthew Dillon */ 10200dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 10210dea3156SMatthew Dillon 10220dea3156SMatthew Dillon /* 10230dea3156SMatthew Dillon * The parent's blockref to the child must be deleted or updated. 10240dea3156SMatthew Dillon * 10250dea3156SMatthew Dillon * This point is not reached on successful DESTROYED optimizations 1026a864c5d9SMatthew Dillon * but can be reached on recursive deletions and restricted flushes. 1027ea155208SMatthew Dillon * 10284a59bd3eSMatthew Dillon * Because flushes are ordered we do not have to make a 10294a59bd3eSMatthew Dillon * modify/duplicate of indirect blocks. That is, the flush 10304a59bd3eSMatthew Dillon * code does not have to kmalloc or duplicate anything. We 10314a59bd3eSMatthew Dillon * can adjust the indirect block table in-place and reuse the 10324a59bd3eSMatthew Dillon * chain. It IS possible that the chain has already been duplicated 10334a59bd3eSMatthew Dillon * or may wind up being duplicated on-the-fly by modifying code 10344a59bd3eSMatthew Dillon * on the frontend. We simply use the original and ignore such 10354a59bd3eSMatthew Dillon * chains. However, it does mean we can't clear the MOVED bit. 10364a59bd3eSMatthew Dillon * 1037ea155208SMatthew Dillon * XXX recursive deletions not optimized. 10380dea3156SMatthew Dillon */ 1039a864c5d9SMatthew Dillon hammer2_chain_modify(trans, &parent, 10404a59bd3eSMatthew Dillon HAMMER2_MODIFY_NO_MODIFY_TID | 10414a59bd3eSMatthew Dillon HAMMER2_MODIFY_ASSERTNOCOPY); 10420dea3156SMatthew Dillon 10430dea3156SMatthew Dillon switch(parent->bref.type) { 10440dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 1045ea155208SMatthew Dillon /* 1046ea155208SMatthew Dillon * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we 1047ea155208SMatthew Dillon * properly duplicate the inode headers and do proper flush 1048ea155208SMatthew Dillon * range checks (all the children should be beyond the flush 1049ea155208SMatthew Dillon * point). For now just don't sync the non-applicable 1050ea155208SMatthew Dillon * children. 1051ea155208SMatthew Dillon * 1052ea155208SMatthew Dillon * XXX Can also occur due to hardlink consolidation. We 1053ea155208SMatthew Dillon * set OPFLAG_DIRECTDATA to prevent the indirect and data 1054ea155208SMatthew Dillon * blocks from syncing ot the hardlink pointer. 1055ea155208SMatthew Dillon */ 1056ea155208SMatthew Dillon #if 0 10570dea3156SMatthew Dillon KKASSERT((parent->data->ipdata.op_flags & 10580dea3156SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0); 1059ea155208SMatthew Dillon #endif 106009dd2dfeSMatthew Dillon #if 0 1061ea155208SMatthew Dillon if (parent->data->ipdata.op_flags & 1062ea155208SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) { 1063ea155208SMatthew Dillon base = NULL; 106409dd2dfeSMatthew Dillon } else 106509dd2dfeSMatthew Dillon #endif 106609dd2dfeSMatthew Dillon { 10670dea3156SMatthew Dillon base = &parent->data->ipdata.u.blockset.blockref[0]; 10680dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 1069ea155208SMatthew Dillon } 10700dea3156SMatthew Dillon break; 10710dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 10721a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 10730dea3156SMatthew Dillon if (parent->data) { 107493f3933aSMatthew Dillon base = &parent->data->npdata[0]; 10750dea3156SMatthew Dillon } else { 10760dea3156SMatthew Dillon base = NULL; 10770dea3156SMatthew Dillon KKASSERT(child->flags & HAMMER2_CHAIN_DELETED); 10780dea3156SMatthew Dillon } 10790dea3156SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 10800dea3156SMatthew Dillon break; 10810dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 10820dea3156SMatthew Dillon base = &hmp->voldata.sroot_blockset.blockref[0]; 10830dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 10840dea3156SMatthew Dillon break; 10851a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 108693f3933aSMatthew Dillon base = &parent->data->npdata[0]; 10871a7cfe5aSMatthew Dillon count = HAMMER2_SET_COUNT; 10881a7cfe5aSMatthew Dillon break; 10890dea3156SMatthew Dillon default: 10900dea3156SMatthew Dillon base = NULL; 10910dea3156SMatthew Dillon count = 0; 10920dea3156SMatthew Dillon panic("hammer2_chain_get: " 10930dea3156SMatthew Dillon "unrecognized blockref type: %d", 10940dea3156SMatthew Dillon parent->bref.type); 10950dea3156SMatthew Dillon } 10960dea3156SMatthew Dillon 10970dea3156SMatthew Dillon /* 10980dea3156SMatthew Dillon * Update the parent's blockref table and propagate mirror_tid. 1099d5fabb70SMatthew Dillon * 1100731b2a84SMatthew Dillon * NOTE! Children with modify_tid's beyond our flush point are 1101731b2a84SMatthew Dillon * considered to not exist for the purposes of updating the 1102731b2a84SMatthew Dillon * parent's blockref array. 1103d5fabb70SMatthew Dillon * 1104731b2a84SMatthew Dillon * NOTE! Updates to a parent's blockref table do not adjust the 1105731b2a84SMatthew Dillon * parent's bref.modify_tid, only its bref.mirror_tid. 11060dea3156SMatthew Dillon */ 1107731b2a84SMatthew Dillon KKASSERT(child->index >= 0); 1108731b2a84SMatthew Dillon if (child->delete_tid <= trans->sync_tid) { 11090dea3156SMatthew Dillon if (base) { 11100dea3156SMatthew Dillon KKASSERT(child->index < count); 11110dea3156SMatthew Dillon bzero(&base[child->index], sizeof(child->bref)); 111293f3933aSMatthew Dillon } 1113ea155208SMatthew Dillon if (info->mirror_tid < child->delete_tid) 1114ea155208SMatthew Dillon info->mirror_tid = child->delete_tid; 11150dea3156SMatthew Dillon } else { 11160dea3156SMatthew Dillon if (base) { 11170dea3156SMatthew Dillon KKASSERT(child->index < count); 11180dea3156SMatthew Dillon base[child->index] = child->bref; 111993f3933aSMatthew Dillon } 1120ea155208SMatthew Dillon if (info->mirror_tid < child->modify_tid) 1121ea155208SMatthew Dillon info->mirror_tid = child->modify_tid; 11220dea3156SMatthew Dillon } 11230dea3156SMatthew Dillon 1124ea155208SMatthew Dillon if (info->mirror_tid < child->bref.mirror_tid) { 1125ea155208SMatthew Dillon info->mirror_tid = child->bref.mirror_tid; 11260dea3156SMatthew Dillon } 11271a7cfe5aSMatthew Dillon if ((parent->bref.type == HAMMER2_BREF_TYPE_VOLUME || 11281a7cfe5aSMatthew Dillon parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP) && 11290dea3156SMatthew Dillon hmp->voldata.mirror_tid < child->bref.mirror_tid) { 11300dea3156SMatthew Dillon hmp->voldata.mirror_tid = child->bref.mirror_tid; 11310dea3156SMatthew Dillon } 11320dea3156SMatthew Dillon 11330dea3156SMatthew Dillon /* 1134731b2a84SMatthew Dillon * When can we safely clear the MOVED flag? Flushes down duplicate 1135731b2a84SMatthew Dillon * paths can occur out of order, for example if an inode is moved 1136731b2a84SMatthew Dillon * as part of a hardlink consolidation or if an inode is moved into 1137731b2a84SMatthew Dillon * an indirect block indexed before the inode. 1138cd189b1eSMatthew Dillon * 1139cd189b1eSMatthew Dillon * Only clear MOVED once all possible parents have been flushed. 11400dea3156SMatthew Dillon */ 11419797e933SMatthew Dillon if (child->flags & HAMMER2_CHAIN_MOVED) { 1142cd189b1eSMatthew Dillon hammer2_chain_t *scan; 1143731b2a84SMatthew Dillon int ok = 1; 1144731b2a84SMatthew Dillon 1145731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 11468853dfb5SMatthew Dillon for (scan = above->first_parent; 11478853dfb5SMatthew Dillon scan; 1148731b2a84SMatthew Dillon scan = scan->next_parent) { 114909dd2dfeSMatthew Dillon /* 115009dd2dfeSMatthew Dillon * XXX weird code also checked at the top of scan2, 115109dd2dfeSMatthew Dillon * I would like to fix this by detaching the core 115209dd2dfeSMatthew Dillon * on initial hardlink consolidation (1->2 nlinks). 115309dd2dfeSMatthew Dillon */ 115409dd2dfeSMatthew Dillon #if 0 115509dd2dfeSMatthew Dillon if (scan->bref.type == HAMMER2_BREF_TYPE_INODE && 115609dd2dfeSMatthew Dillon (scan->data->ipdata.op_flags & 115709dd2dfeSMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA)) { 115809dd2dfeSMatthew Dillon continue; 115909dd2dfeSMatthew Dillon } 116009dd2dfeSMatthew Dillon #endif 1161cd189b1eSMatthew Dillon if (scan->flags & HAMMER2_CHAIN_SUBMODIFIED) { 1162731b2a84SMatthew Dillon ok = 0; 1163731b2a84SMatthew Dillon break; 1164731b2a84SMatthew Dillon } 1165731b2a84SMatthew Dillon } 1166731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 1167731b2a84SMatthew Dillon if (ok) { 1168ea155208SMatthew Dillon atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 1169ea155208SMatthew Dillon hammer2_chain_drop(child); /* flag */ 1170ea155208SMatthew Dillon } 11710dea3156SMatthew Dillon } 11720dea3156SMatthew Dillon 11730dea3156SMatthew Dillon /* 11740dea3156SMatthew Dillon * Unlock the child. This can wind up dropping the child's 11750dea3156SMatthew Dillon * last ref, removing it from the parent's RB tree, and deallocating 11760dea3156SMatthew Dillon * the structure. The RB_SCAN() our caller is doing handles the 11770dea3156SMatthew Dillon * situation. 11780dea3156SMatthew Dillon */ 11790dea3156SMatthew Dillon hammer2_chain_unlock(child); 1180ea155208SMatthew Dillon hammer2_chain_drop(child); 1181731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 11829797e933SMatthew Dillon #if FLUSH_DEBUG 11839797e933SMatthew Dillon kprintf("F"); 11849797e933SMatthew Dillon #endif 11850dea3156SMatthew Dillon 11860dea3156SMatthew Dillon /* 11870dea3156SMatthew Dillon * The parent cleared SUBMODIFIED prior to the scan. If the child 11880dea3156SMatthew Dillon * still requires a flush (possibly due to being outside the current 11890dea3156SMatthew Dillon * synchronization zone), we must re-set SUBMODIFIED on the way back 11900dea3156SMatthew Dillon * up. 11910dea3156SMatthew Dillon */ 11920dea3156SMatthew Dillon finalize: 11939797e933SMatthew Dillon #if FLUSH_DEBUG 1194cd189b1eSMatthew Dillon kprintf("G child %p 08x\n", child, child->flags); 11959797e933SMatthew Dillon #endif 11960dea3156SMatthew Dillon return (0); 119732b800e6SMatthew Dillon } 1198