132b800e6SMatthew Dillon /* 20dea3156SMatthew Dillon * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3532b800e6SMatthew Dillon 3632b800e6SMatthew Dillon #include <sys/cdefs.h> 3732b800e6SMatthew Dillon #include <sys/param.h> 3832b800e6SMatthew Dillon #include <sys/systm.h> 3932b800e6SMatthew Dillon #include <sys/types.h> 4032b800e6SMatthew Dillon #include <sys/lock.h> 4132b800e6SMatthew Dillon #include <sys/uuid.h> 4232b800e6SMatthew Dillon 4332b800e6SMatthew Dillon #include "hammer2.h" 4432b800e6SMatthew Dillon 4532b800e6SMatthew Dillon /* 4632b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 4732b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 4832b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 4932b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 5032b800e6SMatthew Dillon * processes. 5132b800e6SMatthew Dillon */ 5232b800e6SMatthew Dillon struct hammer2_flush_info { 530dea3156SMatthew Dillon hammer2_chain_t *parent; 540dea3156SMatthew Dillon hammer2_trans_t *trans; 5532b800e6SMatthew Dillon int depth; 560dea3156SMatthew Dillon int diddeferral; 571897c66eSMatthew Dillon int pass; 581897c66eSMatthew Dillon int cache_index; 591897c66eSMatthew Dillon struct h2_flush_deferral_list flush_list; 600dea3156SMatthew Dillon hammer2_tid_t sync_tid; /* flush synchronization point */ 610dea3156SMatthew Dillon hammer2_tid_t mirror_tid; /* collect mirror TID updates */ 6232b800e6SMatthew Dillon }; 6332b800e6SMatthew Dillon 6432b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 6532b800e6SMatthew Dillon 660dea3156SMatthew Dillon static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 67*a7720be7SMatthew Dillon hammer2_chain_t **chainp); 680dea3156SMatthew Dillon static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 690dea3156SMatthew Dillon static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 7091abd410SMatthew Dillon static void hammer2_rollup_stats(hammer2_chain_t *parent, 7191abd410SMatthew Dillon hammer2_chain_t *child, int how); 7232b800e6SMatthew Dillon 7393f3933aSMatthew Dillon #if 0 7493f3933aSMatthew Dillon static __inline 7593f3933aSMatthew Dillon void 7693f3933aSMatthew Dillon hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 7793f3933aSMatthew Dillon int how) 7893f3933aSMatthew Dillon { 7993f3933aSMatthew Dillon hammer2_key_t bytes; 8093f3933aSMatthew Dillon 8193f3933aSMatthew Dillon if (bref->type != 0) { 8293f3933aSMatthew Dillon bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 8393f3933aSMatthew Dillon if (bref->type == HAMMER2_BREF_TYPE_INODE) 8493f3933aSMatthew Dillon info->inode_count += how; 8593f3933aSMatthew Dillon if (how < 0) 8693f3933aSMatthew Dillon info->data_count -= bytes; 8793f3933aSMatthew Dillon else 8893f3933aSMatthew Dillon info->data_count += bytes; 8993f3933aSMatthew Dillon } 9093f3933aSMatthew Dillon } 9193f3933aSMatthew Dillon #endif 9293f3933aSMatthew Dillon 9332b800e6SMatthew Dillon /* 940dea3156SMatthew Dillon * Transaction support functions for writing to the filesystem. 950dea3156SMatthew Dillon * 960dea3156SMatthew Dillon * Initializing a new transaction allocates a transaction ID. We 970dea3156SMatthew Dillon * don't bother marking the volume header MODIFIED. Instead, the volume 98a02dfba1SMatthew Dillon * will be synchronized at a later time as part of a larger flush sequence. 990dea3156SMatthew Dillon * 100d001f460SMatthew Dillon * Non-flush transactions can typically run concurrently. However if 101d001f460SMatthew Dillon * there are non-flush transaction both before AND after a flush trans, 102d001f460SMatthew Dillon * the transactions after stall until the ones before finish. 103d001f460SMatthew Dillon * 104d001f460SMatthew Dillon * Non-flush transactions occuring after a flush pointer can run concurrently 105d001f460SMatthew Dillon * with that flush. They only have to wait for transactions prior to the 106d001f460SMatthew Dillon * flush trans to complete before they unstall. 107d001f460SMatthew Dillon * 108355d67fcSMatthew Dillon * WARNING! Transaction ids are only allocated when the transaction becomes 109355d67fcSMatthew Dillon * active, which allows other transactions to insert ahead of us 110355d67fcSMatthew Dillon * if we are forced to block (only bioq transactions do that). 111355d67fcSMatthew Dillon * 1120dea3156SMatthew Dillon * WARNING! Modifications to the root volume cannot dup the root volume 1130dea3156SMatthew Dillon * header to handle synchronization points, so alloc_tid can 1140dea3156SMatthew Dillon * wind up (harmlessly) more advanced on flush. 1150dea3156SMatthew Dillon */ 1160dea3156SMatthew Dillon void 117a5913bdfSMatthew Dillon hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, int flags) 1180dea3156SMatthew Dillon { 119a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 120d001f460SMatthew Dillon hammer2_trans_t *scan; 121d001f460SMatthew Dillon 1220dea3156SMatthew Dillon bzero(trans, sizeof(*trans)); 123a5913bdfSMatthew Dillon trans->pmp = pmp; 1247bed8d7eSMatthew Dillon hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 125d001f460SMatthew Dillon 1260dea3156SMatthew Dillon hammer2_voldata_lock(hmp); 127d001f460SMatthew Dillon trans->flags = flags; 128d001f460SMatthew Dillon trans->td = curthread; 1291897c66eSMatthew Dillon /*trans->delete_gen = 0;*/ /* multiple deletions within trans */ 130d001f460SMatthew Dillon 131d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 132d001f460SMatthew Dillon /* 133355d67fcSMatthew Dillon * If multiple flushes are trying to run we have to 134355d67fcSMatthew Dillon * wait until it is our turn, then set curflush to 135355d67fcSMatthew Dillon * indicate that a flush is now pending (but not 136355d67fcSMatthew Dillon * necessarily active yet). 137355d67fcSMatthew Dillon * 138355d67fcSMatthew Dillon * NOTE: Do not set trans->blocked here. 139355d67fcSMatthew Dillon */ 140355d67fcSMatthew Dillon ++hmp->flushcnt; 141355d67fcSMatthew Dillon while (hmp->curflush != NULL) { 142355d67fcSMatthew Dillon lksleep(&hmp->curflush, &hmp->voldatalk, 143355d67fcSMatthew Dillon 0, "h2multf", hz); 144355d67fcSMatthew Dillon } 145355d67fcSMatthew Dillon hmp->curflush = trans; 146355d67fcSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 147355d67fcSMatthew Dillon 148355d67fcSMatthew Dillon /* 149d001f460SMatthew Dillon * If we are a flush we have to wait for all transactions 150d001f460SMatthew Dillon * prior to our flush synchronization point to complete 151d001f460SMatthew Dillon * before we can start our flush. 152355d67fcSMatthew Dillon * 153355d67fcSMatthew Dillon * Most importantly, this includes bioq flushes. 154355d67fcSMatthew Dillon * 155355d67fcSMatthew Dillon * NOTE: Do not set trans->blocked here. 156d001f460SMatthew Dillon */ 157d001f460SMatthew Dillon while (TAILQ_FIRST(&hmp->transq) != trans) { 158d001f460SMatthew Dillon lksleep(&trans->sync_tid, &hmp->voldatalk, 159d001f460SMatthew Dillon 0, "h2syncw", hz); 160a02dfba1SMatthew Dillon } 161a02dfba1SMatthew Dillon 162a02dfba1SMatthew Dillon /* 163355d67fcSMatthew Dillon * don't assign sync_tid until we become the running 164*a7720be7SMatthew Dillon * flush. last_flush_tid and topo_flush_tid eare used 165*a7720be7SMatthew Dillon * to determine when a copy-on-write (aka delete-duplicate) 166*a7720be7SMatthew Dillon * is required. 167355d67fcSMatthew Dillon */ 168*a7720be7SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid; 169*a7720be7SMatthew Dillon hmp->voldata.alloc_tid += 2; 170355d67fcSMatthew Dillon hmp->topo_flush_tid = trans->sync_tid; 171355d67fcSMatthew Dillon 172355d67fcSMatthew Dillon /* 173d001f460SMatthew Dillon * Once we become the running flush we can wakeup anyone 174355d67fcSMatthew Dillon * who blocked on us, up to the next flush. That is, 175355d67fcSMatthew Dillon * our flush can run concurrent with frontend operations. 176a02dfba1SMatthew Dillon */ 177d001f460SMatthew Dillon scan = trans; 178d001f460SMatthew Dillon while ((scan = TAILQ_NEXT(scan, entry)) != NULL) { 179d001f460SMatthew Dillon if (scan->flags & HAMMER2_TRANS_ISFLUSH) 180d001f460SMatthew Dillon break; 181d001f460SMatthew Dillon if (scan->blocked == 0) 182d001f460SMatthew Dillon break; 183d001f460SMatthew Dillon scan->blocked = 0; 184d001f460SMatthew Dillon wakeup(&scan->blocked); 185a02dfba1SMatthew Dillon } 186355d67fcSMatthew Dillon } else if ((flags & HAMMER2_TRANS_BUFCACHE) && hmp->curflush) { 187355d67fcSMatthew Dillon /* 188*a7720be7SMatthew Dillon * We cannot block if we are the bioq thread. 189355d67fcSMatthew Dillon * 190*a7720be7SMatthew Dillon * When possible we steal the flush's TID and flush buffers 191*a7720be7SMatthew Dillon * as part of the larger filesystem flush. The flush will 192*a7720be7SMatthew Dillon * interlock against buffer cache transactions when INVFSYNC 193*a7720be7SMatthew Dillon * is set. 194*a7720be7SMatthew Dillon * 195*a7720be7SMatthew Dillon * NOTE: Transactions are not ordered by sync_tid on the 196*a7720be7SMatthew Dillon * transq. Append to avoid confusion. Other waiting 197*a7720be7SMatthew Dillon * flushes will have not added themselves to transq 198*a7720be7SMatthew Dillon * yet. 199355d67fcSMatthew Dillon */ 200355d67fcSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 201*a7720be7SMatthew Dillon if ((scan = hmp->curflush) != NULL) { 202*a7720be7SMatthew Dillon if (scan->flags & HAMMER2_TRANS_INVFSYNC) { 203*a7720be7SMatthew Dillon trans->sync_tid = scan->sync_tid; 204*a7720be7SMatthew Dillon } else { 205355d67fcSMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid++; 206*a7720be7SMatthew Dillon } 207*a7720be7SMatthew Dillon } else { 208*a7720be7SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid++; 209*a7720be7SMatthew Dillon } 210a02dfba1SMatthew Dillon } else { 211a02dfba1SMatthew Dillon /* 212355d67fcSMatthew Dillon * If this is a normal transaction and not a flush, or 213355d67fcSMatthew Dillon * if this is a bioq transaction and no flush is pending, 214355d67fcSMatthew Dillon * we can queue normally. 215d001f460SMatthew Dillon * 216355d67fcSMatthew Dillon * Normal transactions must block while a pending flush is 217355d67fcSMatthew Dillon * waiting for prior transactions to complete. Once the 218355d67fcSMatthew Dillon * pending flush becomes active we can run concurrently 219355d67fcSMatthew Dillon * with it. 220a02dfba1SMatthew Dillon */ 221355d67fcSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 222355d67fcSMatthew Dillon scan = TAILQ_FIRST(&hmp->transq); 223355d67fcSMatthew Dillon if (hmp->curflush && hmp->curflush != scan) { 224d001f460SMatthew Dillon trans->blocked = 1; 225d001f460SMatthew Dillon while (trans->blocked) { 226d001f460SMatthew Dillon lksleep(&trans->blocked, &hmp->voldatalk, 227d001f460SMatthew Dillon 0, "h2trans", hz); 228d001f460SMatthew Dillon } 229d001f460SMatthew Dillon } 230355d67fcSMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid++; 231a02dfba1SMatthew Dillon } 232a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 2330dea3156SMatthew Dillon } 2340dea3156SMatthew Dillon 235*a7720be7SMatthew Dillon /* 236*a7720be7SMatthew Dillon * Clear the flag that allowed buffer cache flushes to steal the 237*a7720be7SMatthew Dillon * main flush's transaction id and wait for any in-progress BC flushes 238*a7720be7SMatthew Dillon * to finish. 239*a7720be7SMatthew Dillon */ 240*a7720be7SMatthew Dillon void 241*a7720be7SMatthew Dillon hammer2_trans_clear_invfsync(hammer2_trans_t *trans) 242*a7720be7SMatthew Dillon { 243*a7720be7SMatthew Dillon hammer2_mount_t *hmp = trans->pmp->cluster.chains[0]->hmp; 244*a7720be7SMatthew Dillon 245*a7720be7SMatthew Dillon hammer2_bioq_sync(trans->pmp); 246*a7720be7SMatthew Dillon atomic_clear_int(&trans->flags, HAMMER2_TRANS_INVFSYNC); 247*a7720be7SMatthew Dillon if (TAILQ_FIRST(&hmp->transq) != trans) { 248*a7720be7SMatthew Dillon hammer2_voldata_lock(hmp); 249*a7720be7SMatthew Dillon while (TAILQ_FIRST(&hmp->transq) != trans) { 250*a7720be7SMatthew Dillon tsleep(&trans->sync_tid, 0, "h2flbw", 0); 251*a7720be7SMatthew Dillon } 252*a7720be7SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 253*a7720be7SMatthew Dillon } 254*a7720be7SMatthew Dillon hammer2_bioq_sync(trans->pmp); 255*a7720be7SMatthew Dillon ++trans->sync_tid; 256*a7720be7SMatthew Dillon hmp->topo_flush_tid = trans->sync_tid; 257*a7720be7SMatthew Dillon } 258*a7720be7SMatthew Dillon 2590dea3156SMatthew Dillon void 2600dea3156SMatthew Dillon hammer2_trans_done(hammer2_trans_t *trans) 2610dea3156SMatthew Dillon { 262a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 263d001f460SMatthew Dillon hammer2_trans_t *scan; 264*a7720be7SMatthew Dillon int wasathead; 265a02dfba1SMatthew Dillon 2667bed8d7eSMatthew Dillon hmp = trans->pmp->cluster.chains[0]->hmp; 267a5913bdfSMatthew Dillon 268a02dfba1SMatthew Dillon hammer2_voldata_lock(hmp); 269*a7720be7SMatthew Dillon wasathead = (TAILQ_FIRST(&hmp->transq) == trans); 270d001f460SMatthew Dillon TAILQ_REMOVE(&hmp->transq, trans, entry); 271*a7720be7SMatthew Dillon 272d001f460SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) { 273d001f460SMatthew Dillon --hmp->flushcnt; 274d001f460SMatthew Dillon if (hmp->flushcnt) { 275355d67fcSMatthew Dillon /* 276355d67fcSMatthew Dillon * If we were a flush then wakeup anyone waiting on 277355d67fcSMatthew Dillon * curflush (i.e. other flushes that want to run). 278355d67fcSMatthew Dillon */ 279355d67fcSMatthew Dillon hmp->curflush = NULL; 280355d67fcSMatthew Dillon wakeup(&hmp->curflush); 281d001f460SMatthew Dillon } else { 2824a59bd3eSMatthew Dillon /* 283*a7720be7SMatthew Dillon * Cycle the flush_tid. 2844a59bd3eSMatthew Dillon */ 285d001f460SMatthew Dillon hmp->curflush = NULL; 286d001f460SMatthew Dillon } 287*a7720be7SMatthew Dillon hmp->last_flush_tid = hmp->topo_flush_tid; 288*a7720be7SMatthew Dillon hmp->topo_flush_tid = HAMMER2_MAX_TID; 289d001f460SMatthew Dillon } else { 290d001f460SMatthew Dillon /* 291d001f460SMatthew Dillon * If we are not a flush but a flush is now at the head 292d001f460SMatthew Dillon * of the queue and we were previously blocking it, 293d001f460SMatthew Dillon * we can now unblock it. 294*a7720be7SMatthew Dillon * 295*a7720be7SMatthew Dillon * Special case where sync_tid == scan->sync_tid occurs 296*a7720be7SMatthew Dillon * when buffer flush is issued while a normal flush is 297*a7720be7SMatthew Dillon * running (and in the correct stager), which is typically 298*a7720be7SMatthew Dillon * semi-synchronous but not always. 299d001f460SMatthew Dillon */ 300d001f460SMatthew Dillon if (hmp->flushcnt && 301d001f460SMatthew Dillon (scan = TAILQ_FIRST(&hmp->transq)) != NULL && 302*a7720be7SMatthew Dillon wasathead && 303d001f460SMatthew Dillon (scan->flags & HAMMER2_TRANS_ISFLUSH)) { 304d001f460SMatthew Dillon wakeup(&scan->sync_tid); 305a02dfba1SMatthew Dillon } 306a02dfba1SMatthew Dillon } 307a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 308a02dfba1SMatthew Dillon } 309a02dfba1SMatthew Dillon 3100dea3156SMatthew Dillon /* 3110dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 3120dea3156SMatthew Dillon * synchronization point (sync_tid), propagating parent chain modifications 3130dea3156SMatthew Dillon * and mirror_tid updates back up as needed. Since we are recursing downward 3140dea3156SMatthew Dillon * we do not have to deal with the complexities of multi-homed chains (chains 3150dea3156SMatthew Dillon * with multiple parents). 3160dea3156SMatthew Dillon * 3170dea3156SMatthew Dillon * Caller must have interlocked against any non-flush-related modifying 3180dea3156SMatthew Dillon * operations in progress whos modify_tid values are less than or equal 3190dea3156SMatthew Dillon * to the passed sync_tid. 3200dea3156SMatthew Dillon * 3210dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 3220dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 3230dea3156SMatthew Dillon * these sorts of synchronization points. 3240dea3156SMatthew Dillon * 32532b800e6SMatthew Dillon * This routine can be called from several places but the most important 32632b800e6SMatthew Dillon * is from the hammer2_vop_reclaim() function. We want to try to completely 32732b800e6SMatthew Dillon * clean out the inode structure to prevent disconnected inodes from 3280dea3156SMatthew Dillon * building up and blowing out the kmalloc pool. However, it is not actually 3290dea3156SMatthew Dillon * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 3300dea3156SMatthew Dillon * capability. 33132b800e6SMatthew Dillon * 3320dea3156SMatthew Dillon * chain is locked on call and will remain locked on return. If a flush 3330dea3156SMatthew Dillon * occured, the chain's MOVED bit will be set indicating that its parent 3340dea3156SMatthew Dillon * (which is not part of the flush) should be updated. 33532b800e6SMatthew Dillon */ 33632b800e6SMatthew Dillon void 337*a7720be7SMatthew Dillon hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) 33832b800e6SMatthew Dillon { 339*a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 34032b800e6SMatthew Dillon hammer2_chain_t *scan; 341731b2a84SMatthew Dillon hammer2_chain_core_t *core; 34232b800e6SMatthew Dillon hammer2_flush_info_t info; 34332b800e6SMatthew Dillon 34432b800e6SMatthew Dillon /* 34532b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 34632b800e6SMatthew Dillon * 34732b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 34832b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 34932b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 35032b800e6SMatthew Dillon * for re-execution after the stack has been popped. 35132b800e6SMatthew Dillon */ 35232b800e6SMatthew Dillon bzero(&info, sizeof(info)); 35332b800e6SMatthew Dillon TAILQ_INIT(&info.flush_list); 3540dea3156SMatthew Dillon info.trans = trans; 3550dea3156SMatthew Dillon info.sync_tid = trans->sync_tid; 3560dea3156SMatthew Dillon info.mirror_tid = 0; 3571897c66eSMatthew Dillon info.cache_index = -1; 35832b800e6SMatthew Dillon 359731b2a84SMatthew Dillon core = chain->core; 360731b2a84SMatthew Dillon 361*a7720be7SMatthew Dillon /* 362*a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 363*a7720be7SMatthew Dillon * chain. 364*a7720be7SMatthew Dillon */ 365*a7720be7SMatthew Dillon hammer2_chain_ref(chain); 366*a7720be7SMatthew Dillon 3670dea3156SMatthew Dillon for (;;) { 36832b800e6SMatthew Dillon /* 3690dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 3700dea3156SMatthew Dillon * can leave MOVED set for these chains, which will be 3710dea3156SMatthew Dillon * handled when we [re]flush chain after the unwind. 37232b800e6SMatthew Dillon */ 37332b800e6SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 37432b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 37532b800e6SMatthew Dillon TAILQ_REMOVE(&info.flush_list, scan, flush_node); 37632b800e6SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 37732b800e6SMatthew Dillon 37832b800e6SMatthew Dillon /* 37932b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 38032b800e6SMatthew Dillon * recursion on the deferred elements. 38132b800e6SMatthew Dillon */ 38232b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 38332b800e6SMatthew Dillon kprintf("defered flush %p\n", scan); 3840dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 385*a7720be7SMatthew Dillon hammer2_chain_flush(trans, &scan); 3860dea3156SMatthew Dillon hammer2_chain_unlock(scan); 3870dea3156SMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 38832b800e6SMatthew Dillon } 38932b800e6SMatthew Dillon 39032b800e6SMatthew Dillon /* 3918853dfb5SMatthew Dillon * Flush pass1 on root. 39232b800e6SMatthew Dillon */ 3930dea3156SMatthew Dillon info.diddeferral = 0; 394*a7720be7SMatthew Dillon hammer2_chain_flush_core(&info, &chain); 3959797e933SMatthew Dillon #if FLUSH_DEBUG 3969797e933SMatthew Dillon kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 3979797e933SMatthew Dillon chain, chain->bref.type, chain->flags); 3989797e933SMatthew Dillon #endif 39932b800e6SMatthew Dillon 40032b800e6SMatthew Dillon /* 4010dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 40232b800e6SMatthew Dillon */ 4030dea3156SMatthew Dillon if (TAILQ_EMPTY(&info.flush_list)) 40432b800e6SMatthew Dillon break; 40532b800e6SMatthew Dillon } 406*a7720be7SMatthew Dillon hammer2_chain_drop(chain); 407*a7720be7SMatthew Dillon *chainp = chain; 40832b800e6SMatthew Dillon } 40932b800e6SMatthew Dillon 410476d2aadSMatthew Dillon /* 411ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 412*a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 413*a7720be7SMatthew Dillon * locked and will have an extra ref on return. 414*a7720be7SMatthew Dillon * 415*a7720be7SMatthew Dillon * This function is keyed off of the update_tid bit but must make 416*a7720be7SMatthew Dillon * fine-grained choices based on the synchronization point we are flushing to. 4170dea3156SMatthew Dillon * 4180dea3156SMatthew Dillon * If the flush accomplished any work chain will be flagged MOVED 4190dea3156SMatthew Dillon * indicating a copy-on-write propagation back up is required. 4200dea3156SMatthew Dillon * Deep sub-nodes may also have been entered onto the deferral list. 4210dea3156SMatthew Dillon * MOVED is never set on the volume root. 4220dea3156SMatthew Dillon * 4230dea3156SMatthew Dillon * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 4240dea3156SMatthew Dillon * only when a chain is specifically modified, and not updated 4250dea3156SMatthew Dillon * for copy-on-write propagations. MODIFIED is set on any modification 4260dea3156SMatthew Dillon * including copy-on-write propagations. 427476d2aadSMatthew Dillon */ 42832b800e6SMatthew Dillon static void 429*a7720be7SMatthew Dillon hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp) 43032b800e6SMatthew Dillon { 431*a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 4320dea3156SMatthew Dillon hammer2_mount_t *hmp; 43332b800e6SMatthew Dillon hammer2_blockref_t *bref; 43432b800e6SMatthew Dillon hammer2_off_t pbase; 435a98aa0b0SMatthew Dillon hammer2_off_t pmask; 4369b6b3df4SMatthew Dillon #if 0 437a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 4389b6b3df4SMatthew Dillon #endif 439731b2a84SMatthew Dillon hammer2_chain_core_t *core; 440a98aa0b0SMatthew Dillon size_t psize; 44132b800e6SMatthew Dillon size_t boff; 44232b800e6SMatthew Dillon char *bdata; 44332b800e6SMatthew Dillon struct buf *bp; 44432b800e6SMatthew Dillon int error; 44532b800e6SMatthew Dillon int wasmodified; 4460dea3156SMatthew Dillon int diddeferral = 0; 44732b800e6SMatthew Dillon 448a5913bdfSMatthew Dillon hmp = chain->hmp; 44932b800e6SMatthew Dillon 4509797e933SMatthew Dillon #if FLUSH_DEBUG 4519797e933SMatthew Dillon if (info->parent) 4529797e933SMatthew Dillon kprintf("flush_core %p->%p.%d %08x (%s)\n", 4539797e933SMatthew Dillon info->parent, chain, chain->bref.type, 4549797e933SMatthew Dillon chain->flags, 4559797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 4569797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 4579797e933SMatthew Dillon else 4589797e933SMatthew Dillon kprintf("flush_core NULL->%p.%d %08x (%s)\n", 4599797e933SMatthew Dillon chain, chain->bref.type, 4609797e933SMatthew Dillon chain->flags, 4619797e933SMatthew Dillon ((chain->bref.type == HAMMER2_BREF_TYPE_INODE) ? 4629797e933SMatthew Dillon chain->data->ipdata.filename : "?")); 4639797e933SMatthew Dillon #endif 46432b800e6SMatthew Dillon /* 465731b2a84SMatthew Dillon * Ignore chains modified beyond the current flush point. These 466731b2a84SMatthew Dillon * will be treated as if they did not exist. 467ea155208SMatthew Dillon */ 468ea155208SMatthew Dillon if (chain->modify_tid > info->sync_tid) 469ea155208SMatthew Dillon return; 470731b2a84SMatthew Dillon 471731b2a84SMatthew Dillon core = chain->core; 472ea155208SMatthew Dillon 473*a7720be7SMatthew Dillon #if 0 474*a7720be7SMatthew Dillon kprintf("PUSH %p.%d %08x mirror=%016jx\n", chain, chain->bref.type, chain->flags, chain->bref.mirror_tid); 475*a7720be7SMatthew Dillon #endif 476*a7720be7SMatthew Dillon 477ea155208SMatthew Dillon /* 478*a7720be7SMatthew Dillon * If update_tid triggers we recurse the flush and adjust the 47932b800e6SMatthew Dillon * blockrefs accordingly. 48032b800e6SMatthew Dillon * 481*a7720be7SMatthew Dillon * NOTE: Looping on update_tid can prevent a flush from ever 48232b800e6SMatthew Dillon * finishing in the face of filesystem activity. 483*a7720be7SMatthew Dillon * 484*a7720be7SMatthew Dillon * NOTE: We must recurse whether chain is flagged DELETED or not. 485*a7720be7SMatthew Dillon * However, if it is flagged DELETED we limit sync_tid to 486*a7720be7SMatthew Dillon * delete_tid to ensure that the chain's bref.mirror_tid is 487*a7720be7SMatthew Dillon * not fully updated and causes it to miss the non-DELETED 488*a7720be7SMatthew Dillon * path. 48932b800e6SMatthew Dillon */ 490*a7720be7SMatthew Dillon if (chain->bref.mirror_tid < core->update_tid) { 4910dea3156SMatthew Dillon hammer2_chain_t *saved_parent; 492ea155208SMatthew Dillon hammer2_tid_t saved_mirror; 4931897c66eSMatthew Dillon hammer2_chain_layer_t *layer; 49432b800e6SMatthew Dillon 49532b800e6SMatthew Dillon /* 496*a7720be7SMatthew Dillon * Races will bump update_tid above trans->sync_tid causing 497*a7720be7SMatthew Dillon * us to catch the issue in a later flush. We do not update 498*a7720be7SMatthew Dillon * update_tid if a deferral (or error XXX) occurs. 49932b800e6SMatthew Dillon * 50032b800e6SMatthew Dillon * We don't want to set our chain to MODIFIED gratuitously. 50132b800e6SMatthew Dillon * 50232b800e6SMatthew Dillon * We need an extra ref on chain because we are going to 50332b800e6SMatthew Dillon * release its lock temporarily in our child loop. 50432b800e6SMatthew Dillon */ 50532b800e6SMatthew Dillon 50632b800e6SMatthew Dillon /* 5070dea3156SMatthew Dillon * Run two passes. The first pass handles MODIFIED and 508*a7720be7SMatthew Dillon * update_tid recursions while the second pass handles 509*a7720be7SMatthew Dillon * MOVED chains on the way back up. 51032b800e6SMatthew Dillon * 5110dea3156SMatthew Dillon * If the stack gets too deep we defer scan1, but must 5120dea3156SMatthew Dillon * be sure to still run scan2 if on the next loop the 5130dea3156SMatthew Dillon * deferred chain has been flushed and now needs MOVED 5140dea3156SMatthew Dillon * handling on the way back up. 51532b800e6SMatthew Dillon * 5160dea3156SMatthew Dillon * Scan1 is recursive. 51732b800e6SMatthew Dillon * 5180dea3156SMatthew Dillon * NOTE: The act of handling a modified/submodified chain can 5190dea3156SMatthew Dillon * cause the MOVED Flag to be set. It can also be set 5200dea3156SMatthew Dillon * via hammer2_chain_delete() and in other situations. 5210dea3156SMatthew Dillon * 5220dea3156SMatthew Dillon * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 5230dea3156SMatthew Dillon * because children can be physically removed during 5240dea3156SMatthew Dillon * the scan. 52532b800e6SMatthew Dillon */ 5260dea3156SMatthew Dillon saved_parent = info->parent; 527ea155208SMatthew Dillon saved_mirror = info->mirror_tid; 5280dea3156SMatthew Dillon info->parent = chain; 529ea155208SMatthew Dillon info->mirror_tid = chain->bref.mirror_tid; 53032b800e6SMatthew Dillon 5310dea3156SMatthew Dillon if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 5320dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 5330dea3156SMatthew Dillon hammer2_chain_ref(chain); 5340dea3156SMatthew Dillon TAILQ_INSERT_TAIL(&info->flush_list, 5350dea3156SMatthew Dillon chain, flush_node); 5360dea3156SMatthew Dillon atomic_set_int(&chain->flags, 5370dea3156SMatthew Dillon HAMMER2_CHAIN_DEFERRED); 5380dea3156SMatthew Dillon } 5390dea3156SMatthew Dillon diddeferral = 1; 54032b800e6SMatthew Dillon } else { 5410dea3156SMatthew Dillon info->diddeferral = 0; 542731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 54351a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 5441897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 5451897c66eSMatthew Dillon h2_layer_list, entry) { 5461897c66eSMatthew Dillon ++layer->refs; 54751a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 5481897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 5490dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan1, info); 5501897c66eSMatthew Dillon --layer->refs; 5510dea3156SMatthew Dillon diddeferral += info->diddeferral; 55232b800e6SMatthew Dillon } 5531897c66eSMatthew Dillon spin_unlock(&core->cst.spin); 5541897c66eSMatthew Dillon } 55532b800e6SMatthew Dillon 556*a7720be7SMatthew Dillon KKASSERT(info->parent == chain); 557*a7720be7SMatthew Dillon 55832b800e6SMatthew Dillon /* 5590dea3156SMatthew Dillon * Handle successfully flushed children who are in the MOVED 5600dea3156SMatthew Dillon * state on the way back up the recursion. This can have 5610dea3156SMatthew Dillon * the side-effect of clearing MOVED. 56232b800e6SMatthew Dillon * 563*a7720be7SMatthew Dillon * Scan2 may replace info->parent. If it does it will also 564*a7720be7SMatthew Dillon * replace the extra ref we made. 565*a7720be7SMatthew Dillon * 5660dea3156SMatthew Dillon * Scan2 is non-recursive. 56732b800e6SMatthew Dillon */ 568cd189b1eSMatthew Dillon if (diddeferral) { 569731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 5701897c66eSMatthew Dillon } else { 5711897c66eSMatthew Dillon spin_lock(&core->cst.spin); 57251a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 5731897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 5741897c66eSMatthew Dillon h2_layer_list, entry) { 5751897c66eSMatthew Dillon info->pass = 1; 5761897c66eSMatthew Dillon ++layer->refs; 57751a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 5781897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 5790dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 5801897c66eSMatthew Dillon info->pass = 2; 5811897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 5821897c66eSMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 5831897c66eSMatthew Dillon --layer->refs; 584*a7720be7SMatthew Dillon KKASSERT(info->parent->core == core); 58532b800e6SMatthew Dillon } 5860dea3156SMatthew Dillon 5870dea3156SMatthew Dillon /* 588*a7720be7SMatthew Dillon * Mirror_tid propagates all changes. It is also used 589*a7720be7SMatthew Dillon * in scan2 to determine when a chain must be applied 590*a7720be7SMatthew Dillon * to the related block table. 591ea155208SMatthew Dillon */ 592*a7720be7SMatthew Dillon #if 0 593*a7720be7SMatthew Dillon kprintf("chainA %p.%d set parent bref mirror_tid %016jx -> %016jx\n", 594*a7720be7SMatthew Dillon info->parent, info->parent->bref.type, 595*a7720be7SMatthew Dillon info->mirror_tid, info->parent->bref.mirror_tid); 596*a7720be7SMatthew Dillon #endif 597*a7720be7SMatthew Dillon KKASSERT(info->parent->bref.mirror_tid <= 598*a7720be7SMatthew Dillon info->mirror_tid); 599*a7720be7SMatthew Dillon info->parent->bref.mirror_tid = info->mirror_tid; 600*a7720be7SMatthew Dillon } 601*a7720be7SMatthew Dillon 602*a7720be7SMatthew Dillon /* 603*a7720be7SMatthew Dillon * chain may have been replaced. 604*a7720be7SMatthew Dillon */ 605*a7720be7SMatthew Dillon #if 0 606*a7720be7SMatthew Dillon if (info->parent != *chainp) 607*a7720be7SMatthew Dillon kprintf("SWITCH PARENT %p->%p\n", 608*a7720be7SMatthew Dillon *chainp, info->parent); 609*a7720be7SMatthew Dillon #endif 610*a7720be7SMatthew Dillon chain = info->parent; 611*a7720be7SMatthew Dillon *chainp = chain; 612*a7720be7SMatthew Dillon 613*a7720be7SMatthew Dillon hammer2_chain_layer_check_locked(chain->hmp, core); 614*a7720be7SMatthew Dillon spin_unlock(&core->cst.spin); 615*a7720be7SMatthew Dillon 616*a7720be7SMatthew Dillon info->mirror_tid = saved_mirror; 617*a7720be7SMatthew Dillon info->parent = saved_parent; 618*a7720be7SMatthew Dillon KKASSERT(chain->refs > 1); 619*a7720be7SMatthew Dillon } 620*a7720be7SMatthew Dillon 621*a7720be7SMatthew Dillon #if 0 622*a7720be7SMatthew Dillon kprintf("POP %p.%d\n", chain, chain->bref.type); 623*a7720be7SMatthew Dillon #endif 624ea155208SMatthew Dillon 625ea155208SMatthew Dillon /* 6260dea3156SMatthew Dillon * Rollup diddeferral for caller. Note direct assignment, not +=. 6270dea3156SMatthew Dillon */ 6280dea3156SMatthew Dillon info->diddeferral = diddeferral; 6290dea3156SMatthew Dillon 6300dea3156SMatthew Dillon /* 6310dea3156SMatthew Dillon * Do not flush chain if there were any deferrals. It will be 6320dea3156SMatthew Dillon * retried later after the deferrals are independently handled. 6330dea3156SMatthew Dillon */ 6340dea3156SMatthew Dillon if (diddeferral) { 6350dea3156SMatthew Dillon if (hammer2_debug & 0x0008) { 6360dea3156SMatthew Dillon kprintf("%*.*s} %p/%d %04x (deferred)", 6370dea3156SMatthew Dillon info->depth, info->depth, "", 6380dea3156SMatthew Dillon chain, chain->refs, chain->flags); 63932b800e6SMatthew Dillon } 64032b800e6SMatthew Dillon return; 64132b800e6SMatthew Dillon } 64232b800e6SMatthew Dillon 64332b800e6SMatthew Dillon /* 644731b2a84SMatthew Dillon * If we encounter a deleted chain within our flush we can clear 645731b2a84SMatthew Dillon * the MODIFIED bit and avoid flushing it whether it has been 64693f3933aSMatthew Dillon * destroyed or not. We must make sure that the chain is flagged 64793f3933aSMatthew Dillon * MOVED in this situation so the parent picks up the deletion. 64891abd410SMatthew Dillon * 649*a7720be7SMatthew Dillon * Since this chain will now never be written to disk we need to 650*a7720be7SMatthew Dillon * adjust bref.mirror_tid such that it does not prevent sub-chains 651*a7720be7SMatthew Dillon * from clearing their MOVED bits. 652*a7720be7SMatthew Dillon * 653*a7720be7SMatthew Dillon * NOTE: scan2 has already executed above so statistics have 65491abd410SMatthew Dillon * already been rolled up. 655*a7720be7SMatthew Dillon * 656*a7720be7SMatthew Dillon * NOTE: Deletions do not prevent flush recursion as a deleted 657*a7720be7SMatthew Dillon * inode (removed file) which is still open may still require 658*a7720be7SMatthew Dillon * on-media storage to be able to clean related pages out from 659*a7720be7SMatthew Dillon * the system caches. 660*a7720be7SMatthew Dillon * 661*a7720be7SMatthew Dillon * NOTE: Even though this chain will not issue write I/O, we must 662*a7720be7SMatthew Dillon * still update chain->bref.mirror_tid for flush management 663*a7720be7SMatthew Dillon * purposes. 66432b800e6SMatthew Dillon */ 665731b2a84SMatthew Dillon if (chain->delete_tid <= info->sync_tid) { 666731b2a84SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 6671a7cfe5aSMatthew Dillon if (chain->bp) { 6681a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 669731b2a84SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 6701a7cfe5aSMatthew Dillon } 67193f3933aSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 67293f3933aSMatthew Dillon hammer2_chain_ref(chain); 67393f3933aSMatthew Dillon atomic_set_int(&chain->flags, 67493f3933aSMatthew Dillon HAMMER2_CHAIN_MOVED); 67593f3933aSMatthew Dillon } 676731b2a84SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 677*a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 678*a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 679731b2a84SMatthew Dillon hammer2_chain_drop(chain); 680731b2a84SMatthew Dillon } 681*a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 682*a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 683731b2a84SMatthew Dillon return; 684731b2a84SMatthew Dillon } 685731b2a84SMatthew Dillon #if 0 6860dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DESTROYED) && 687a864c5d9SMatthew Dillon (chain->flags & HAMMER2_CHAIN_DELETED) && 688a864c5d9SMatthew Dillon (trans->flags & HAMMER2_TRANS_RESTRICTED) == 0) { 689a864c5d9SMatthew Dillon /* 690a864c5d9SMatthew Dillon * Throw-away the MODIFIED flag 691a864c5d9SMatthew Dillon */ 6920dea3156SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 6931a7cfe5aSMatthew Dillon if (chain->bp) { 6941a7cfe5aSMatthew Dillon if (chain->bytes == chain->bp->b_bufsize) 6950dea3156SMatthew Dillon chain->bp->b_flags |= B_INVAL|B_RELBUF; 6961a7cfe5aSMatthew Dillon } 6970dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 6980dea3156SMatthew Dillon hammer2_chain_drop(chain); 6990dea3156SMatthew Dillon } 7000dea3156SMatthew Dillon return; 7010dea3156SMatthew Dillon } 702731b2a84SMatthew Dillon #endif 7030dea3156SMatthew Dillon 7040dea3156SMatthew Dillon /* 705ea155208SMatthew Dillon * A degenerate flush might not have flushed anything and thus not 706ea155208SMatthew Dillon * processed modified blocks on the way back up. Detect the case. 7070dea3156SMatthew Dillon * 7080dea3156SMatthew Dillon * Note that MOVED can be set without MODIFIED being set due to 7090dea3156SMatthew Dillon * a deletion, in which case it is handled by Scan2 later on. 7100dea3156SMatthew Dillon * 7110dea3156SMatthew Dillon * Both bits can be set along with DELETED due to a deletion if 7120dea3156SMatthew Dillon * modified data within the synchronization zone and the chain 7130dea3156SMatthew Dillon * was then deleted beyond the zone, in which case we still have 714ea155208SMatthew Dillon * to flush for synchronization point consistency. Otherwise though 715ea155208SMatthew Dillon * DELETED and MODIFIED are treated as separate flags. 7160dea3156SMatthew Dillon */ 717*a7720be7SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 718*a7720be7SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 719*a7720be7SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 7200dea3156SMatthew Dillon return; 721*a7720be7SMatthew Dillon } 7220dea3156SMatthew Dillon 7230dea3156SMatthew Dillon /* 7240dea3156SMatthew Dillon * Issue flush. 7250dea3156SMatthew Dillon * 7260dea3156SMatthew Dillon * A DESTROYED node that reaches this point must be flushed for 7270dea3156SMatthew Dillon * synchronization point consistency. 7280dea3156SMatthew Dillon */ 7290dea3156SMatthew Dillon 7300dea3156SMatthew Dillon /* 7310dea3156SMatthew Dillon * Update mirror_tid, clear MODIFIED, and set MOVED. 7320dea3156SMatthew Dillon * 7330dea3156SMatthew Dillon * The caller will update the parent's reference to this chain 7340dea3156SMatthew Dillon * by testing MOVED as long as the modification was in-bounds. 7350dea3156SMatthew Dillon * 7360dea3156SMatthew Dillon * MOVED is never set on the volume root as there is no parent 7370dea3156SMatthew Dillon * to adjust. 7380dea3156SMatthew Dillon */ 739*a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 740*a7720be7SMatthew Dillon kprintf("Flush %p.%d %016jx/%d sync_tid %016jx\n", 741*a7720be7SMatthew Dillon chain, chain->bref.type, 742*a7720be7SMatthew Dillon chain->bref.key, chain->bref.keybits, 743*a7720be7SMatthew Dillon info->sync_tid); 744*a7720be7SMatthew Dillon } 745*a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 746*a7720be7SMatthew Dillon Debugger("Flush hell"); 747*a7720be7SMatthew Dillon } 7480dea3156SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 7490dea3156SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 7500dea3156SMatthew Dillon wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0; 7510dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 7520dea3156SMatthew Dillon if (chain == &hmp->vchain) 7530dea3156SMatthew Dillon kprintf("(FLUSHED VOLUME HEADER)\n"); 7541a7cfe5aSMatthew Dillon if (chain == &hmp->fchain) 7551a7cfe5aSMatthew Dillon kprintf("(FLUSHED FREEMAP HEADER)\n"); 7560dea3156SMatthew Dillon 7570dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) || 7581a7cfe5aSMatthew Dillon chain == &hmp->vchain || 7591a7cfe5aSMatthew Dillon chain == &hmp->fchain) { 76032b800e6SMatthew Dillon /* 76132b800e6SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared. 762d7bfb2cbSMatthew Dillon * Net is -0 or -1 ref depending. 76332b800e6SMatthew Dillon */ 76432b800e6SMatthew Dillon if (wasmodified) 7650dea3156SMatthew Dillon hammer2_chain_drop(chain); 76632b800e6SMatthew Dillon } else { 76732b800e6SMatthew Dillon /* 768d7bfb2cbSMatthew Dillon * Drop the ref from the MODIFIED bit we cleared and 769d7bfb2cbSMatthew Dillon * set a ref for the MOVED bit we are setting. Net 770d7bfb2cbSMatthew Dillon * is +0 or +1 ref depending. 77132b800e6SMatthew Dillon */ 77232b800e6SMatthew Dillon if (wasmodified == 0) 7730dea3156SMatthew Dillon hammer2_chain_ref(chain); 77432b800e6SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 77532b800e6SMatthew Dillon } 77632b800e6SMatthew Dillon 77732b800e6SMatthew Dillon /* 77832b800e6SMatthew Dillon * If this is part of a recursive flush we can go ahead and write 779ea155208SMatthew Dillon * out the buffer cache buffer and pass a new bref back up the chain 780ea155208SMatthew Dillon * via the MOVED bit. 78132b800e6SMatthew Dillon * 782ea155208SMatthew Dillon * Volume headers are NOT flushed here as they require special 783ea155208SMatthew Dillon * processing. 78432b800e6SMatthew Dillon */ 78532b800e6SMatthew Dillon switch(chain->bref.type) { 7861a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 7871a7cfe5aSMatthew Dillon hammer2_modify_volume(hmp); 7881a7cfe5aSMatthew Dillon break; 78932b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 79032b800e6SMatthew Dillon /* 7911a7cfe5aSMatthew Dillon * We should flush the free block table before we calculate 7921a7cfe5aSMatthew Dillon * CRCs and copy voldata -> volsync. 79393f3933aSMatthew Dillon * 79493f3933aSMatthew Dillon * To prevent SMP races, fchain must remain locked until 79593f3933aSMatthew Dillon * voldata is copied to volsync. 7961a7cfe5aSMatthew Dillon */ 7971a7cfe5aSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 798*a7720be7SMatthew Dillon if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 799*a7720be7SMatthew Dillon hmp->voldata.mirror_tid < hmp->fchain.core->update_tid) { 8001a7cfe5aSMatthew Dillon /* this will modify vchain as a side effect */ 801*a7720be7SMatthew Dillon hammer2_chain_t *tmp = &hmp->fchain; 802*a7720be7SMatthew Dillon hammer2_chain_flush(info->trans, &tmp); 803*a7720be7SMatthew Dillon KKASSERT(tmp == &hmp->fchain); 8041a7cfe5aSMatthew Dillon } 8051a7cfe5aSMatthew Dillon 8061a7cfe5aSMatthew Dillon /* 80732b800e6SMatthew Dillon * The volume header is flushed manually by the syncer, not 808ea155208SMatthew Dillon * here. All we do is adjust the crc's. 80932b800e6SMatthew Dillon */ 81032b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 81132b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 81232b800e6SMatthew Dillon kprintf("volume header mirror_tid %jd\n", 81332b800e6SMatthew Dillon hmp->voldata.mirror_tid); 81432b800e6SMatthew Dillon 81532b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 81632b800e6SMatthew Dillon hammer2_icrc32( 81732b800e6SMatthew Dillon (char *)&hmp->voldata + 81832b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 81932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 82032b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 82132b800e6SMatthew Dillon hammer2_icrc32( 82232b800e6SMatthew Dillon (char *)&hmp->voldata + 82332b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 82432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 82532b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 82632b800e6SMatthew Dillon hammer2_icrc32( 82732b800e6SMatthew Dillon (char *)&hmp->voldata + 82832b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 82932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 83032b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 8310dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 83293f3933aSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 83332b800e6SMatthew Dillon break; 83432b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 83532b800e6SMatthew Dillon /* 83632b800e6SMatthew Dillon * Data elements have already been flushed via the logical 83732b800e6SMatthew Dillon * file buffer cache. Their hash was set in the bref by 83832b800e6SMatthew Dillon * the vop_write code. 83932b800e6SMatthew Dillon * 840ea155208SMatthew Dillon * Make sure any device buffer(s) have been flushed out here. 841ea155208SMatthew Dillon * (there aren't usually any to flush). 84232b800e6SMatthew Dillon */ 843a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 844a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 845a98aa0b0SMatthew Dillon pbase = chain->bref.data_off & ~pmask; 846a98aa0b0SMatthew Dillon boff = chain->bref.data_off & (HAMMER2_OFF_MASK & pmask); 84732b800e6SMatthew Dillon 848a98aa0b0SMatthew Dillon bp = getblk(hmp->devvp, pbase, psize, GETBLK_NOWAIT, 0); 84932b800e6SMatthew Dillon if (bp) { 85032b800e6SMatthew Dillon if ((bp->b_flags & (B_CACHE | B_DIRTY)) == 85132b800e6SMatthew Dillon (B_CACHE | B_DIRTY)) { 85232b800e6SMatthew Dillon cluster_awrite(bp); 85332b800e6SMatthew Dillon } else { 85432b800e6SMatthew Dillon bp->b_flags |= B_RELBUF; 85532b800e6SMatthew Dillon brelse(bp); 85632b800e6SMatthew Dillon } 85732b800e6SMatthew Dillon } 85832b800e6SMatthew Dillon break; 859512beabdSMatthew Dillon #if 0 86032b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 86132b800e6SMatthew Dillon /* 86232b800e6SMatthew Dillon * Indirect blocks may be in an INITIAL state. Use the 86332b800e6SMatthew Dillon * chain_lock() call to ensure that the buffer has been 86432b800e6SMatthew Dillon * instantiated (even though it is already locked the buffer 86532b800e6SMatthew Dillon * might not have been instantiated). 86632b800e6SMatthew Dillon * 86732b800e6SMatthew Dillon * Only write the buffer out if it is dirty, it is possible 86832b800e6SMatthew Dillon * the operating system had already written out the buffer. 86932b800e6SMatthew Dillon */ 8700dea3156SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 87132b800e6SMatthew Dillon KKASSERT(chain->bp != NULL); 87232b800e6SMatthew Dillon 87332b800e6SMatthew Dillon bp = chain->bp; 87432b800e6SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) || 87532b800e6SMatthew Dillon (bp->b_flags & B_DIRTY)) { 87632b800e6SMatthew Dillon bdwrite(chain->bp); 87732b800e6SMatthew Dillon } else { 87832b800e6SMatthew Dillon brelse(chain->bp); 87932b800e6SMatthew Dillon } 88032b800e6SMatthew Dillon chain->bp = NULL; 88132b800e6SMatthew Dillon chain->data = NULL; 8820dea3156SMatthew Dillon hammer2_chain_unlock(chain); 88332b800e6SMatthew Dillon break; 884512beabdSMatthew Dillon #endif 885512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 8861a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 8871a7cfe5aSMatthew Dillon /* 8881a7cfe5aSMatthew Dillon * Device-backed. Buffer will be flushed by the sync 8891a7cfe5aSMatthew Dillon * code XXX. 8901a7cfe5aSMatthew Dillon */ 891512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 8921a7cfe5aSMatthew Dillon break; 893512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 89432b800e6SMatthew Dillon default: 89532b800e6SMatthew Dillon /* 89632b800e6SMatthew Dillon * Embedded elements have to be flushed out. 8971a7cfe5aSMatthew Dillon * (Basically just BREF_TYPE_INODE). 89832b800e6SMatthew Dillon */ 899512beabdSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 90032b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 90132b800e6SMatthew Dillon KKASSERT(chain->bp == NULL); 90232b800e6SMatthew Dillon bref = &chain->bref; 90332b800e6SMatthew Dillon 90432b800e6SMatthew Dillon KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 9059061bde5SMatthew Dillon KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 906512beabdSMatthew Dillon HAMMER2_CHECK_ISCSI32 || 907512beabdSMatthew Dillon HAMMER2_DEC_CHECK(chain->bref.methods) == 908512beabdSMatthew Dillon HAMMER2_CHECK_FREEMAP); 90932b800e6SMatthew Dillon 91032b800e6SMatthew Dillon /* 91132b800e6SMatthew Dillon * The data is embedded, we have to acquire the 91232b800e6SMatthew Dillon * buffer cache buffer and copy the data into it. 91332b800e6SMatthew Dillon */ 914a98aa0b0SMatthew Dillon psize = hammer2_devblksize(chain->bytes); 915a98aa0b0SMatthew Dillon pmask = (hammer2_off_t)psize - 1; 916a98aa0b0SMatthew Dillon pbase = bref->data_off & ~pmask; 917a98aa0b0SMatthew Dillon boff = bref->data_off & (HAMMER2_OFF_MASK & pmask); 91832b800e6SMatthew Dillon 91932b800e6SMatthew Dillon /* 92032b800e6SMatthew Dillon * The getblk() optimization can only be used if the 92132b800e6SMatthew Dillon * physical block size matches the request. 92232b800e6SMatthew Dillon */ 923a98aa0b0SMatthew Dillon error = bread(hmp->devvp, pbase, psize, &bp); 92432b800e6SMatthew Dillon KKASSERT(error == 0); 925a98aa0b0SMatthew Dillon 92632b800e6SMatthew Dillon bdata = (char *)bp->b_data + boff; 92732b800e6SMatthew Dillon 92832b800e6SMatthew Dillon /* 92932b800e6SMatthew Dillon * Copy the data to the buffer, mark the buffer 93032b800e6SMatthew Dillon * dirty, and convert the chain to unmodified. 93132b800e6SMatthew Dillon */ 93232b800e6SMatthew Dillon bcopy(chain->data, bdata, chain->bytes); 93332b800e6SMatthew Dillon bp->b_flags |= B_CLUSTEROK; 93432b800e6SMatthew Dillon bdwrite(bp); 93532b800e6SMatthew Dillon bp = NULL; 936a98aa0b0SMatthew Dillon 937512beabdSMatthew Dillon switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 938512beabdSMatthew Dillon case HAMMER2_CHECK_FREEMAP: 939512beabdSMatthew Dillon chain->bref.check.freemap.icrc32 = 940512beabdSMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 941512beabdSMatthew Dillon break; 942512beabdSMatthew Dillon case HAMMER2_CHECK_ISCSI32: 94332b800e6SMatthew Dillon chain->bref.check.iscsi32.value = 94432b800e6SMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 945512beabdSMatthew Dillon break; 946512beabdSMatthew Dillon default: 947512beabdSMatthew Dillon panic("hammer2_flush_core: bad crc type"); 948512beabdSMatthew Dillon break; /* NOT REACHED */ 949512beabdSMatthew Dillon } 95032b800e6SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 95132b800e6SMatthew Dillon ++hammer2_iod_meta_write; 95232b800e6SMatthew Dillon else 95332b800e6SMatthew Dillon ++hammer2_iod_indr_write; 95432b800e6SMatthew Dillon } 95532b800e6SMatthew Dillon } 95632b800e6SMatthew Dillon 95732b800e6SMatthew Dillon /* 9580dea3156SMatthew Dillon * Flush helper scan1 (recursive) 9590dea3156SMatthew Dillon * 9600dea3156SMatthew Dillon * Flushes the children of the caller's chain (parent) and updates 961ea155208SMatthew Dillon * the blockref, restricted by sync_tid. 9620dea3156SMatthew Dillon * 9630dea3156SMatthew Dillon * Ripouts during the loop should not cause any problems. Because we are 9640dea3156SMatthew Dillon * flushing to a synchronization point, modification races will occur after 9650dea3156SMatthew Dillon * sync_tid and do not have to be flushed anyway. 966ea155208SMatthew Dillon * 967ea155208SMatthew Dillon * It is also ok if the parent is chain_duplicate()'d while unlocked because 968ea155208SMatthew Dillon * the delete/duplication will install a delete_tid that is still larger than 969ea155208SMatthew Dillon * our current sync_tid. 97032b800e6SMatthew Dillon */ 9710dea3156SMatthew Dillon static int 9720dea3156SMatthew Dillon hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 97332b800e6SMatthew Dillon { 9740dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 975cd189b1eSMatthew Dillon hammer2_trans_t *trans = info->trans; 9760dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 9770dea3156SMatthew Dillon int diddeferral; 97832b800e6SMatthew Dillon 9790dea3156SMatthew Dillon /* 980*a7720be7SMatthew Dillon * We only need to recurse if MODIFIED is set or 981*a7720be7SMatthew Dillon * child->bref.mirror_tid has not caught up to update_tid. 9820dea3156SMatthew Dillon */ 983*a7720be7SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 984*a7720be7SMatthew Dillon child->bref.mirror_tid >= child->core->update_tid) { 9850dea3156SMatthew Dillon return (0); 98632b800e6SMatthew Dillon } 987*a7720be7SMatthew Dillon if (child->modify_tid > trans->sync_tid) 988cd189b1eSMatthew Dillon return (0); 989cd189b1eSMatthew Dillon 990ea155208SMatthew Dillon hammer2_chain_ref(child); 9910dea3156SMatthew Dillon spin_unlock(&parent->core->cst.spin); 9920dea3156SMatthew Dillon 9930dea3156SMatthew Dillon /* 9940dea3156SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 9950dea3156SMatthew Dillon * unlock it in order to lock the child. Re-check the flags before 9960dea3156SMatthew Dillon * continuing. 9970dea3156SMatthew Dillon */ 9980dea3156SMatthew Dillon hammer2_chain_unlock(parent); 9990dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 10000dea3156SMatthew Dillon 1001*a7720be7SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 1002*a7720be7SMatthew Dillon child->bref.mirror_tid >= child->core->update_tid) { 10030dea3156SMatthew Dillon hammer2_chain_unlock(child); 1004ea155208SMatthew Dillon hammer2_chain_drop(child); 1005ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 10060dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 10070dea3156SMatthew Dillon return (0); 10080dea3156SMatthew Dillon } 1009cd189b1eSMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1010cd189b1eSMatthew Dillon hammer2_chain_unlock(child); 1011cd189b1eSMatthew Dillon hammer2_chain_drop(child); 1012cd189b1eSMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1013cd189b1eSMatthew Dillon spin_lock(&parent->core->cst.spin); 1014cd189b1eSMatthew Dillon return (0); 1015cd189b1eSMatthew Dillon } 10160dea3156SMatthew Dillon 10170dea3156SMatthew Dillon /* 1018ea155208SMatthew Dillon * The DESTROYED flag can only be initially set on an unreferenced 1019ea155208SMatthew Dillon * deleted inode and will propagate downward via the mechanic below. 1020ea155208SMatthew Dillon * Such inode chains have been deleted for good and should no longer 1021ea155208SMatthew Dillon * be subject to delete/duplication. 10220dea3156SMatthew Dillon * 10230dea3156SMatthew Dillon * This optimization allows the inode reclaim (destroy unlinked file 10240dea3156SMatthew Dillon * on vnode reclamation after last close) to be flagged by just 1025ea155208SMatthew Dillon * setting HAMMER2_CHAIN_DESTROYED at the top level and then will 1026ea155208SMatthew Dillon * cause the chains to be terminated and related buffers to be 1027ea155208SMatthew Dillon * invalidated and not flushed out. 1028ea155208SMatthew Dillon * 1029ea155208SMatthew Dillon * We have to be careful not to propagate the DESTROYED flag if 1030ea155208SMatthew Dillon * the destruction occurred after our flush sync_tid. 10310dea3156SMatthew Dillon */ 10320dea3156SMatthew Dillon if ((parent->flags & HAMMER2_CHAIN_DESTROYED) && 10339797e933SMatthew Dillon (child->flags & HAMMER2_CHAIN_DELETED) && 10340dea3156SMatthew Dillon (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) { 1035*a7720be7SMatthew Dillon atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROYED); 1036*a7720be7SMatthew Dillon /* 1037*a7720be7SMatthew Dillon * Force downward recursion by bringing update_tid up to 1038*a7720be7SMatthew Dillon * at least sync_tid. Parent's mirror_tid has not yet 1039*a7720be7SMatthew Dillon * been updated. 1040*a7720be7SMatthew Dillon * 1041*a7720be7SMatthew Dillon * Vnode reclamation may have forced update_tid to MAX_TID. 1042*a7720be7SMatthew Dillon * In this situation bring it down to something reasonable 1043*a7720be7SMatthew Dillon * so the elements being destroyed can be retired. 1044*a7720be7SMatthew Dillon */ 1045*a7720be7SMatthew Dillon spin_lock(&child->core->cst.spin); 1046*a7720be7SMatthew Dillon if (child->core->update_tid < trans->sync_tid || 1047*a7720be7SMatthew Dillon child->core->update_tid == HAMMER2_MAX_TID) { 1048*a7720be7SMatthew Dillon child->core->update_tid = trans->sync_tid; 1049*a7720be7SMatthew Dillon } 1050*a7720be7SMatthew Dillon spin_unlock(&child->core->cst.spin); 10510dea3156SMatthew Dillon } 10520dea3156SMatthew Dillon 10530dea3156SMatthew Dillon /* 10540dea3156SMatthew Dillon * Recurse and collect deferral data. 10550dea3156SMatthew Dillon */ 10560dea3156SMatthew Dillon diddeferral = info->diddeferral; 10570dea3156SMatthew Dillon ++info->depth; 1058*a7720be7SMatthew Dillon hammer2_chain_flush_core(info, &child); 10599797e933SMatthew Dillon #if FLUSH_DEBUG 10609797e933SMatthew Dillon kprintf("flush_core_done parent=%p flags=%08x child=%p.%d %08x\n", 10619797e933SMatthew Dillon parent, parent->flags, child, child->bref.type, child->flags); 10629797e933SMatthew Dillon #endif 1063*a7720be7SMatthew Dillon /* 1064*a7720be7SMatthew Dillon * NOTE: If child failed to fully synchronize, child's bref.mirror_tid 1065*a7720be7SMatthew Dillon * will not have been updated. Bumping diddeferral prevents 1066*a7720be7SMatthew Dillon * the parent chain from updating bref.mirror_tid on the way 1067*a7720be7SMatthew Dillon * back up in order to force a retry later. 1068*a7720be7SMatthew Dillon */ 1069*a7720be7SMatthew Dillon if (child->bref.mirror_tid < child->core->update_tid) 1070*a7720be7SMatthew Dillon ++diddeferral; 1071*a7720be7SMatthew Dillon 10720dea3156SMatthew Dillon --info->depth; 10730dea3156SMatthew Dillon info->diddeferral += diddeferral; 10740dea3156SMatthew Dillon hammer2_chain_unlock(child); 1075ea155208SMatthew Dillon hammer2_chain_drop(child); 10760dea3156SMatthew Dillon 1077ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 10780dea3156SMatthew Dillon 10790dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 10801897c66eSMatthew Dillon 10810dea3156SMatthew Dillon return (0); 10820dea3156SMatthew Dillon } 10830dea3156SMatthew Dillon 10840dea3156SMatthew Dillon /* 10850dea3156SMatthew Dillon * Flush helper scan2 (non-recursive) 10860dea3156SMatthew Dillon * 10870dea3156SMatthew Dillon * This pass on a chain's children propagates any MOVED or DELETED 1088ea155208SMatthew Dillon * elements back up the chain towards the root after those elements have 1089ea155208SMatthew Dillon * been fully flushed. Unlike scan1, this function is NOT recursive and 1090ea155208SMatthew Dillon * the parent remains locked across the entire scan. 10910dea3156SMatthew Dillon * 10921897c66eSMatthew Dillon * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. 10931897c66eSMatthew Dillon * We have to do this so base[] elements can be deleted in pass 1 to make 10941897c66eSMatthew Dillon * room for adding new elements in pass 2. 10951897c66eSMatthew Dillon * 109691abd410SMatthew Dillon * This function also rolls up storage statistics. 109791abd410SMatthew Dillon * 10981897c66eSMatthew Dillon * NOTE! A deletion is a visbility issue, there can still be references to 1099ea155208SMatthew Dillon * deleted elements (for example, to an unlinked file which is still 1100ea155208SMatthew Dillon * open), and there can also be multiple chains pointing to the same 1101ea155208SMatthew Dillon * bref where some are deleted and some are not (for example due to 1102ea155208SMatthew Dillon * a rename). So a chain marked for deletion is basically considered 1103a864c5d9SMatthew Dillon * to be live until it is explicitly destroyed or until its ref-count 1104a864c5d9SMatthew Dillon * reaches zero (also implying that MOVED and MODIFIED are clear). 11050dea3156SMatthew Dillon */ 11060dea3156SMatthew Dillon static int 11070dea3156SMatthew Dillon hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 11080dea3156SMatthew Dillon { 11090dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 11100dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 1111731b2a84SMatthew Dillon hammer2_chain_core_t *above = child->above; 1112a5913bdfSMatthew Dillon hammer2_mount_t *hmp = child->hmp; 1113a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 11140dea3156SMatthew Dillon hammer2_blockref_t *base; 11150dea3156SMatthew Dillon int count; 11161897c66eSMatthew Dillon int ok; 1117ea155208SMatthew Dillon 1118ea155208SMatthew Dillon /* 11199797e933SMatthew Dillon * Inodes with stale children that have been converted to DIRECTDATA 11209797e933SMatthew Dillon * mode (file extension or hardlink conversion typically) need to 11219797e933SMatthew Dillon * skipped right now before we start messing with a non-existant 11229797e933SMatthew Dillon * block table. 11239797e933SMatthew Dillon */ 112409dd2dfeSMatthew Dillon #if 0 11259797e933SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE && 11269797e933SMatthew Dillon (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA)) { 11279797e933SMatthew Dillon goto finalize; 11289797e933SMatthew Dillon } 112909dd2dfeSMatthew Dillon #endif 11309797e933SMatthew Dillon 11319797e933SMatthew Dillon /* 1132731b2a84SMatthew Dillon * Ignore children created after our flush point, treating them as 1133cd189b1eSMatthew Dillon * if they did not exist). These children will not cause the parent 1134cd189b1eSMatthew Dillon * to be updated. 1135731b2a84SMatthew Dillon * 1136*a7720be7SMatthew Dillon * Children deleted after our flush point are treated as having been 1137*a7720be7SMatthew Dillon * created for the purposes of the flush. The parent's update_tid 1138*a7720be7SMatthew Dillon * will already be higher than our trans->sync_tid so the flush path 1139*a7720be7SMatthew Dillon * is left intact. 1140*a7720be7SMatthew Dillon * 1141cd189b1eSMatthew Dillon * When we encounter such children and the parent chain has not been 1142cd189b1eSMatthew Dillon * deleted, delete/duplicated, or delete/duplicated-for-move, then 1143cd189b1eSMatthew Dillon * the parent may be used to funnel through several flush points. 1144*a7720be7SMatthew Dillon * These chains will still be visible to later flushes due to having 1145*a7720be7SMatthew Dillon * a higher update_tid than we can set in the current flush. 1146731b2a84SMatthew Dillon */ 1147731b2a84SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1148731b2a84SMatthew Dillon goto finalize; 1149731b2a84SMatthew Dillon } 1150731b2a84SMatthew Dillon 1151731b2a84SMatthew Dillon /* 1152731b2a84SMatthew Dillon * Ignore children which have not changed. The parent's block table 1153731b2a84SMatthew Dillon * is already correct. 11541897c66eSMatthew Dillon * 11551897c66eSMatthew Dillon * XXX The MOVED bit is only cleared when all multi-homed parents 11561897c66eSMatthew Dillon * have flushed, creating a situation where a re-flush can occur 11571897c66eSMatthew Dillon * via a parent which has already flushed. The hammer2_base_*() 11581897c66eSMatthew Dillon * functions currently have a hack to deal with this case but 11591897c66eSMatthew Dillon * we need something better. 1160ea155208SMatthew Dillon */ 1161ea155208SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 11620dea3156SMatthew Dillon goto finalize; 11630dea3156SMatthew Dillon } 1164ea155208SMatthew Dillon 11651897c66eSMatthew Dillon /* 11661897c66eSMatthew Dillon * Make sure child is referenced before we unlock. 11671897c66eSMatthew Dillon */ 1168ea155208SMatthew Dillon hammer2_chain_ref(child); 1169731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 11700dea3156SMatthew Dillon 11710dea3156SMatthew Dillon /* 11721897c66eSMatthew Dillon * Parent reflushed after the child has passed them by should skip 11731897c66eSMatthew Dillon * due to the modify_tid test. XXX 11740dea3156SMatthew Dillon */ 11750dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 11761897c66eSMatthew Dillon KKASSERT(child->above == above); 11771897c66eSMatthew Dillon KKASSERT(parent->core == above); 11780dea3156SMatthew Dillon 11790dea3156SMatthew Dillon /* 11800dea3156SMatthew Dillon * The parent's blockref to the child must be deleted or updated. 11810dea3156SMatthew Dillon * 11820dea3156SMatthew Dillon * This point is not reached on successful DESTROYED optimizations 1183a864c5d9SMatthew Dillon * but can be reached on recursive deletions and restricted flushes. 1184ea155208SMatthew Dillon * 1185*a7720be7SMatthew Dillon * The chain_modify here may delete-duplicate the block. This can 1186*a7720be7SMatthew Dillon * cause a multitude of issues if the block was already modified 1187*a7720be7SMatthew Dillon * by a later (post-flush) transaction. Primarily blockrefs in 1188*a7720be7SMatthew Dillon * the later block can be out-of-date, so if the situation occurs 1189*a7720be7SMatthew Dillon * we can't throw away the MOVED bit on the current blocks until 1190*a7720be7SMatthew Dillon * the later blocks are flushed (so as to be able to regenerate all 1191*a7720be7SMatthew Dillon * the changes that were made). 1192*a7720be7SMatthew Dillon * 11934a59bd3eSMatthew Dillon * Because flushes are ordered we do not have to make a 11944a59bd3eSMatthew Dillon * modify/duplicate of indirect blocks. That is, the flush 11954a59bd3eSMatthew Dillon * code does not have to kmalloc or duplicate anything. We 11964a59bd3eSMatthew Dillon * can adjust the indirect block table in-place and reuse the 11974a59bd3eSMatthew Dillon * chain. It IS possible that the chain has already been duplicated 11984a59bd3eSMatthew Dillon * or may wind up being duplicated on-the-fly by modifying code 11994a59bd3eSMatthew Dillon * on the frontend. We simply use the original and ignore such 12004a59bd3eSMatthew Dillon * chains. However, it does mean we can't clear the MOVED bit. 12014a59bd3eSMatthew Dillon * 1202ea155208SMatthew Dillon * XXX recursive deletions not optimized. 12030dea3156SMatthew Dillon */ 1204*a7720be7SMatthew Dillon hammer2_chain_modify(trans, &parent, HAMMER2_MODIFY_NO_MODIFY_TID); 1205*a7720be7SMatthew Dillon if (info->parent != parent) { 1206*a7720be7SMatthew Dillon /* extra ref from flush_core */ 1207*a7720be7SMatthew Dillon hammer2_chain_drop(info->parent); 1208*a7720be7SMatthew Dillon info->parent = parent; 1209*a7720be7SMatthew Dillon hammer2_chain_ref(info->parent); 1210*a7720be7SMatthew Dillon } 12110dea3156SMatthew Dillon 12120dea3156SMatthew Dillon switch(parent->bref.type) { 12130dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 1214ea155208SMatthew Dillon /* 1215ea155208SMatthew Dillon * XXX Should assert that OPFLAG_DIRECTDATA is 0 once we 1216ea155208SMatthew Dillon * properly duplicate the inode headers and do proper flush 1217ea155208SMatthew Dillon * range checks (all the children should be beyond the flush 1218ea155208SMatthew Dillon * point). For now just don't sync the non-applicable 1219ea155208SMatthew Dillon * children. 1220ea155208SMatthew Dillon * 1221ea155208SMatthew Dillon * XXX Can also occur due to hardlink consolidation. We 1222ea155208SMatthew Dillon * set OPFLAG_DIRECTDATA to prevent the indirect and data 1223ea155208SMatthew Dillon * blocks from syncing ot the hardlink pointer. 1224ea155208SMatthew Dillon */ 1225ea155208SMatthew Dillon #if 0 12260dea3156SMatthew Dillon KKASSERT((parent->data->ipdata.op_flags & 12270dea3156SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0); 1228ea155208SMatthew Dillon #endif 122909dd2dfeSMatthew Dillon #if 0 12301897c66eSMatthew Dillon if (parent->data->ipdata.op_flags & HAMMER2_OPFLAG_DIRECTDATA) { 1231ea155208SMatthew Dillon base = NULL; 123209dd2dfeSMatthew Dillon } else 123309dd2dfeSMatthew Dillon #endif 123409dd2dfeSMatthew Dillon { 12350dea3156SMatthew Dillon base = &parent->data->ipdata.u.blockset.blockref[0]; 12360dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 1237ea155208SMatthew Dillon } 12380dea3156SMatthew Dillon break; 12390dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 12401a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 12410dea3156SMatthew Dillon if (parent->data) { 124293f3933aSMatthew Dillon base = &parent->data->npdata[0]; 12430dea3156SMatthew Dillon } else { 12440dea3156SMatthew Dillon base = NULL; 12450dea3156SMatthew Dillon KKASSERT(child->flags & HAMMER2_CHAIN_DELETED); 12460dea3156SMatthew Dillon } 12470dea3156SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 12480dea3156SMatthew Dillon break; 12490dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 12500dea3156SMatthew Dillon base = &hmp->voldata.sroot_blockset.blockref[0]; 12510dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 12520dea3156SMatthew Dillon break; 12531a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 125493f3933aSMatthew Dillon base = &parent->data->npdata[0]; 12551a7cfe5aSMatthew Dillon count = HAMMER2_SET_COUNT; 12561a7cfe5aSMatthew Dillon break; 12570dea3156SMatthew Dillon default: 12580dea3156SMatthew Dillon base = NULL; 12590dea3156SMatthew Dillon count = 0; 12601897c66eSMatthew Dillon panic("hammer2_chain_flush_scan2: " 12610dea3156SMatthew Dillon "unrecognized blockref type: %d", 12620dea3156SMatthew Dillon parent->bref.type); 12630dea3156SMatthew Dillon } 12640dea3156SMatthew Dillon 12650dea3156SMatthew Dillon /* 12661897c66eSMatthew Dillon * Don't bother updating a deleted parent's blockrefs (caller will 12671897c66eSMatthew Dillon * optimize-out the disk write). Note that this is not optional, 12681897c66eSMatthew Dillon * a deleted parent's blockref array might not be synchronized at 12691897c66eSMatthew Dillon * all so calling hammer2_base*() functions could result in a panic. 12701897c66eSMatthew Dillon * 12711897c66eSMatthew Dillon * Otherwise, we need to be COUNTEDBREFS synchronized for the 12721897c66eSMatthew Dillon * hammer2_base_*() functions. 12731897c66eSMatthew Dillon */ 12741897c66eSMatthew Dillon if (parent->delete_tid <= trans->sync_tid) 12751897c66eSMatthew Dillon base = NULL; 1276c057466cSMatthew Dillon else if ((parent->core->flags & HAMMER2_CORE_COUNTEDBREFS) == 0) 127751a0d27cSMatthew Dillon hammer2_chain_countbrefs(parent, base, count); 12781897c66eSMatthew Dillon 12791897c66eSMatthew Dillon /* 12800dea3156SMatthew Dillon * Update the parent's blockref table and propagate mirror_tid. 1281d5fabb70SMatthew Dillon * 1282731b2a84SMatthew Dillon * NOTE! Children with modify_tid's beyond our flush point are 1283731b2a84SMatthew Dillon * considered to not exist for the purposes of updating the 1284731b2a84SMatthew Dillon * parent's blockref array. 1285d5fabb70SMatthew Dillon * 1286731b2a84SMatthew Dillon * NOTE! Updates to a parent's blockref table do not adjust the 1287731b2a84SMatthew Dillon * parent's bref.modify_tid, only its bref.mirror_tid. 1288*a7720be7SMatthew Dillon * 1289*a7720be7SMatthew Dillon * NOTE! chain->modify_tid vs chain->bref.modify_tid. The chain's 1290*a7720be7SMatthew Dillon * internal modify_tid is always updated based on creation 1291*a7720be7SMatthew Dillon * or delete-duplicate. However, the bref.modify_tid is NOT 1292*a7720be7SMatthew Dillon * updated due to simple blockref updates. 12930dea3156SMatthew Dillon */ 1294*a7720be7SMatthew Dillon #if 0 1295*a7720be7SMatthew Dillon kprintf("chain %p->%p pass %d trans %016jx sync %p.%d %016jx/%d C=%016jx D=%016jx PMIRROR %016jx\n", 1296*a7720be7SMatthew Dillon parent, child, 1297*a7720be7SMatthew Dillon info->pass, trans->sync_tid, 1298*a7720be7SMatthew Dillon child, child->bref.type, 1299*a7720be7SMatthew Dillon child->bref.key, child->bref.keybits, 1300*a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, parent->bref.mirror_tid); 1301*a7720be7SMatthew Dillon #endif 1302*a7720be7SMatthew Dillon 13031897c66eSMatthew Dillon if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { 13041897c66eSMatthew Dillon /* 1305*a7720be7SMatthew Dillon * Deleting. The block array is expected to contain the 1306*a7720be7SMatthew Dillon * child's entry if: 1307*a7720be7SMatthew Dillon * 1308*a7720be7SMatthew Dillon * (1) The deletion occurred after the parent's block table 1309*a7720be7SMatthew Dillon * was last synchronized (delete_tid), and 1310*a7720be7SMatthew Dillon * 1311*a7720be7SMatthew Dillon * (2) The creation occurred before or during the parent's 1312*a7720be7SMatthew Dillon * last block table synchronization. 13131897c66eSMatthew Dillon */ 13141897c66eSMatthew Dillon ok = 1; 1315*a7720be7SMatthew Dillon if (base && 1316*a7720be7SMatthew Dillon child->delete_tid > parent->bref.mirror_tid && 1317*a7720be7SMatthew Dillon child->modify_tid <= parent->bref.mirror_tid) { 131891abd410SMatthew Dillon hammer2_rollup_stats(parent, child, -1); 13191897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1320*a7720be7SMatthew Dillon #if 0 1321*a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1322*a7720be7SMatthew Dillon "flg=%08x %016jx/%d delete\n", 1323*a7720be7SMatthew Dillon trans->sync_tid, 1324*a7720be7SMatthew Dillon parent, parent->bref.type, 1325*a7720be7SMatthew Dillon child, child->bref.type, 1326*a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1327*a7720be7SMatthew Dillon child->flags, 1328*a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1329*a7720be7SMatthew Dillon #endif 133051a0d27cSMatthew Dillon hammer2_base_delete(parent, base, count, 1331*a7720be7SMatthew Dillon &info->cache_index, child); 13321897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 133393f3933aSMatthew Dillon } 1334ea155208SMatthew Dillon if (info->mirror_tid < child->delete_tid) 1335ea155208SMatthew Dillon info->mirror_tid = child->delete_tid; 13361897c66eSMatthew Dillon } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { 13371897c66eSMatthew Dillon /* 1338*a7720be7SMatthew Dillon * Inserting. The block array is expected to NOT contain 1339*a7720be7SMatthew Dillon * the child's entry if: 1340*a7720be7SMatthew Dillon * 1341*a7720be7SMatthew Dillon * (1) The creation occurred after the parent's block table 1342*a7720be7SMatthew Dillon * was last synchronized (modify_tid), and 1343*a7720be7SMatthew Dillon * 1344*a7720be7SMatthew Dillon * (2) The child is not being deleted in the same 1345*a7720be7SMatthew Dillon * transaction. 13461897c66eSMatthew Dillon */ 13471897c66eSMatthew Dillon ok = 1; 1348*a7720be7SMatthew Dillon if (base && 1349*a7720be7SMatthew Dillon child->modify_tid > parent->bref.mirror_tid && 1350*a7720be7SMatthew Dillon child->delete_tid > trans->sync_tid) { 13511897c66eSMatthew Dillon hammer2_rollup_stats(parent, child, 1); 13521897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1353*a7720be7SMatthew Dillon #if 0 1354*a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1355*a7720be7SMatthew Dillon "flg=%08x %016jx/%d insert\n", 1356*a7720be7SMatthew Dillon trans->sync_tid, 1357*a7720be7SMatthew Dillon parent, parent->bref.type, 1358*a7720be7SMatthew Dillon child, child->bref.type, 1359*a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1360*a7720be7SMatthew Dillon child->flags, 1361*a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1362*a7720be7SMatthew Dillon #endif 136351a0d27cSMatthew Dillon hammer2_base_insert(parent, base, count, 1364*a7720be7SMatthew Dillon &info->cache_index, child); 13651897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 136693f3933aSMatthew Dillon } 1367ea155208SMatthew Dillon if (info->mirror_tid < child->modify_tid) 1368ea155208SMatthew Dillon info->mirror_tid = child->modify_tid; 13691897c66eSMatthew Dillon } else { 13701897c66eSMatthew Dillon ok = 0; 13710dea3156SMatthew Dillon } 13720dea3156SMatthew Dillon 1373ea155208SMatthew Dillon if (info->mirror_tid < child->bref.mirror_tid) { 1374ea155208SMatthew Dillon info->mirror_tid = child->bref.mirror_tid; 13750dea3156SMatthew Dillon } 13761a7cfe5aSMatthew Dillon if ((parent->bref.type == HAMMER2_BREF_TYPE_VOLUME || 13771a7cfe5aSMatthew Dillon parent->bref.type == HAMMER2_BREF_TYPE_FREEMAP) && 13780dea3156SMatthew Dillon hmp->voldata.mirror_tid < child->bref.mirror_tid) { 13790dea3156SMatthew Dillon hmp->voldata.mirror_tid = child->bref.mirror_tid; 13800dea3156SMatthew Dillon } 13810dea3156SMatthew Dillon 13820dea3156SMatthew Dillon /* 13831897c66eSMatthew Dillon * Only clear MOVED once all possible parents have been flushed. 13841897c66eSMatthew Dillon * 1385731b2a84SMatthew Dillon * When can we safely clear the MOVED flag? Flushes down duplicate 1386731b2a84SMatthew Dillon * paths can occur out of order, for example if an inode is moved 1387731b2a84SMatthew Dillon * as part of a hardlink consolidation or if an inode is moved into 1388731b2a84SMatthew Dillon * an indirect block indexed before the inode. 13890dea3156SMatthew Dillon */ 13901897c66eSMatthew Dillon if (ok && (child->flags & HAMMER2_CHAIN_MOVED)) { 1391cd189b1eSMatthew Dillon hammer2_chain_t *scan; 1392*a7720be7SMatthew Dillon 1393*a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1394*a7720be7SMatthew Dillon kprintf("CHECKMOVED %p (parent=%p)", child, parent); 1395731b2a84SMatthew Dillon 1396731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 13971897c66eSMatthew Dillon TAILQ_FOREACH(scan, &above->ownerq, core_entry) { 139809dd2dfeSMatthew Dillon /* 1399*a7720be7SMatthew Dillon * Can't destroy the child until all parent's have 1400*a7720be7SMatthew Dillon * synchronized with its move. 1401*a7720be7SMatthew Dillon * 1402*a7720be7SMatthew Dillon * NOTE: A deleted parent will synchronize with a 1403*a7720be7SMatthew Dillon * child's move without bothering to update 1404*a7720be7SMatthew Dillon * its brefs. 140509dd2dfeSMatthew Dillon */ 1406*a7720be7SMatthew Dillon if (scan == parent || 1407*a7720be7SMatthew Dillon scan->delete_tid <= trans->sync_tid) 140809dd2dfeSMatthew Dillon continue; 1409*a7720be7SMatthew Dillon if (scan->bref.mirror_tid < child->modify_tid) { 1410*a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1411*a7720be7SMatthew Dillon kprintf("(fail scan %p %016jx/%016jx)", 1412*a7720be7SMatthew Dillon scan, scan->bref.mirror_tid, 1413*a7720be7SMatthew Dillon child->modify_tid); 1414731b2a84SMatthew Dillon ok = 0; 1415731b2a84SMatthew Dillon } 1416731b2a84SMatthew Dillon } 1417*a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1418*a7720be7SMatthew Dillon kprintf("\n"); 1419731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 1420731b2a84SMatthew Dillon if (ok) { 1421*a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1422*a7720be7SMatthew Dillon kprintf("clear moved %p.%d %016jx/%d\n", 1423*a7720be7SMatthew Dillon child, child->bref.type, 1424*a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1425ea155208SMatthew Dillon atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 1426ea155208SMatthew Dillon hammer2_chain_drop(child); /* flag */ 1427*a7720be7SMatthew Dillon } else { 1428*a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1429*a7720be7SMatthew Dillon kprintf("keep moved %p.%d %016jx/%d\n", 1430*a7720be7SMatthew Dillon child, child->bref.type, 1431*a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1432ea155208SMatthew Dillon } 14330dea3156SMatthew Dillon } 14340dea3156SMatthew Dillon 14350dea3156SMatthew Dillon /* 14360dea3156SMatthew Dillon * Unlock the child. This can wind up dropping the child's 14370dea3156SMatthew Dillon * last ref, removing it from the parent's RB tree, and deallocating 14380dea3156SMatthew Dillon * the structure. The RB_SCAN() our caller is doing handles the 14390dea3156SMatthew Dillon * situation. 14400dea3156SMatthew Dillon */ 14410dea3156SMatthew Dillon hammer2_chain_unlock(child); 1442ea155208SMatthew Dillon hammer2_chain_drop(child); 1443731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 14440dea3156SMatthew Dillon 14450dea3156SMatthew Dillon /* 1446*a7720be7SMatthew Dillon * The parent may have been delete-duplicated. 14470dea3156SMatthew Dillon */ 1448*a7720be7SMatthew Dillon info->parent = parent; 14490dea3156SMatthew Dillon finalize: 14500dea3156SMatthew Dillon return (0); 145132b800e6SMatthew Dillon } 145291abd410SMatthew Dillon 145391abd410SMatthew Dillon static 145491abd410SMatthew Dillon void 145591abd410SMatthew Dillon hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) 145691abd410SMatthew Dillon { 14571897c66eSMatthew Dillon #if 0 145891abd410SMatthew Dillon hammer2_chain_t *grandp; 14591897c66eSMatthew Dillon #endif 146091abd410SMatthew Dillon 146191abd410SMatthew Dillon parent->data_count += child->data_count; 146291abd410SMatthew Dillon parent->inode_count += child->inode_count; 146391abd410SMatthew Dillon child->data_count = 0; 146491abd410SMatthew Dillon child->inode_count = 0; 146591abd410SMatthew Dillon if (how < 0) { 146691abd410SMatthew Dillon parent->data_count -= child->bytes; 146791abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 146891abd410SMatthew Dillon parent->inode_count -= 1; 14699ec04660SMatthew Dillon #if 0 14709ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 147191abd410SMatthew Dillon parent->data_count -= child->data->ipdata.data_count; 147291abd410SMatthew Dillon parent->inode_count -= child->data->ipdata.inode_count; 14739ec04660SMatthew Dillon #endif 147491abd410SMatthew Dillon } 147591abd410SMatthew Dillon } else if (how > 0) { 147691abd410SMatthew Dillon parent->data_count += child->bytes; 147791abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 147891abd410SMatthew Dillon parent->inode_count += 1; 14799ec04660SMatthew Dillon #if 0 14809ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 148191abd410SMatthew Dillon parent->data_count += child->data->ipdata.data_count; 148291abd410SMatthew Dillon parent->inode_count += child->data->ipdata.inode_count; 14839ec04660SMatthew Dillon #endif 148491abd410SMatthew Dillon } 148591abd410SMatthew Dillon } 148691abd410SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 148791abd410SMatthew Dillon parent->data->ipdata.data_count += parent->data_count; 148891abd410SMatthew Dillon parent->data->ipdata.inode_count += parent->inode_count; 14891897c66eSMatthew Dillon #if 0 149091abd410SMatthew Dillon for (grandp = parent->above->first_parent; 149191abd410SMatthew Dillon grandp; 149291abd410SMatthew Dillon grandp = grandp->next_parent) { 149391abd410SMatthew Dillon grandp->data_count += parent->data_count; 149491abd410SMatthew Dillon grandp->inode_count += parent->inode_count; 149591abd410SMatthew Dillon } 14961897c66eSMatthew Dillon #endif 149791abd410SMatthew Dillon parent->data_count = 0; 149891abd410SMatthew Dillon parent->inode_count = 0; 149991abd410SMatthew Dillon } 150091abd410SMatthew Dillon } 1501