132b800e6SMatthew Dillon /* 20dea3156SMatthew Dillon * Copyright (c) 2011-2013 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3532b800e6SMatthew Dillon 3632b800e6SMatthew Dillon #include <sys/cdefs.h> 3732b800e6SMatthew Dillon #include <sys/param.h> 3832b800e6SMatthew Dillon #include <sys/systm.h> 3932b800e6SMatthew Dillon #include <sys/types.h> 4032b800e6SMatthew Dillon #include <sys/lock.h> 4132b800e6SMatthew Dillon #include <sys/uuid.h> 4232b800e6SMatthew Dillon 4332b800e6SMatthew Dillon #include "hammer2.h" 4432b800e6SMatthew Dillon 45925e4ad1SMatthew Dillon #define FLUSH_DEBUG 0 46925e4ad1SMatthew Dillon 4732b800e6SMatthew Dillon /* 4832b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 4932b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 5032b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 5132b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 5232b800e6SMatthew Dillon * processes. 5332b800e6SMatthew Dillon */ 5432b800e6SMatthew Dillon struct hammer2_flush_info { 550dea3156SMatthew Dillon hammer2_chain_t *parent; 560dea3156SMatthew Dillon hammer2_trans_t *trans; 5732b800e6SMatthew Dillon int depth; 580dea3156SMatthew Dillon int diddeferral; 591897c66eSMatthew Dillon int pass; 601897c66eSMatthew Dillon int cache_index; 61a4dc31e0SMatthew Dillon int domodify; 621897c66eSMatthew Dillon struct h2_flush_deferral_list flush_list; 630dea3156SMatthew Dillon hammer2_tid_t sync_tid; /* flush synchronization point */ 6432b800e6SMatthew Dillon }; 6532b800e6SMatthew Dillon 6632b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 6732b800e6SMatthew Dillon 680dea3156SMatthew Dillon static void hammer2_chain_flush_core(hammer2_flush_info_t *info, 69a7720be7SMatthew Dillon hammer2_chain_t **chainp); 700dea3156SMatthew Dillon static int hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data); 710dea3156SMatthew Dillon static int hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data); 720924b3f8SMatthew Dillon static void hammer2_flush_core_update(hammer2_chain_core_t *core, 730924b3f8SMatthew Dillon hammer2_flush_info_t *info); 7491abd410SMatthew Dillon static void hammer2_rollup_stats(hammer2_chain_t *parent, 7591abd410SMatthew Dillon hammer2_chain_t *child, int how); 7632b800e6SMatthew Dillon 77623d43d4SMatthew Dillon /* 78623d43d4SMatthew Dillon * Can we ignore a chain for the purposes of flushing modifications 79623d43d4SMatthew Dillon * to the media? 800924b3f8SMatthew Dillon * 81*59c5121aSMatthew Dillon * This code is now degenerate. We used to have to distinguish between 82044541cdSMatthew Dillon * deleted chains and deleted chains associated with inodes that were 83044541cdSMatthew Dillon * still open. This mechanic has been fixed so the function is now 84044541cdSMatthew Dillon * a simple test. 85623d43d4SMatthew Dillon */ 86623d43d4SMatthew Dillon static __inline 87623d43d4SMatthew Dillon int 88623d43d4SMatthew Dillon h2ignore_deleted(hammer2_flush_info_t *info, hammer2_chain_t *chain) 89623d43d4SMatthew Dillon { 90044541cdSMatthew Dillon return (chain->delete_tid <= info->sync_tid); 91623d43d4SMatthew Dillon } 92623d43d4SMatthew Dillon 9393f3933aSMatthew Dillon #if 0 9493f3933aSMatthew Dillon static __inline 9593f3933aSMatthew Dillon void 9693f3933aSMatthew Dillon hammer2_updatestats(hammer2_flush_info_t *info, hammer2_blockref_t *bref, 9793f3933aSMatthew Dillon int how) 9893f3933aSMatthew Dillon { 9993f3933aSMatthew Dillon hammer2_key_t bytes; 10093f3933aSMatthew Dillon 10193f3933aSMatthew Dillon if (bref->type != 0) { 10293f3933aSMatthew Dillon bytes = 1 << (bref->data_off & HAMMER2_OFF_MASK_RADIX); 10393f3933aSMatthew Dillon if (bref->type == HAMMER2_BREF_TYPE_INODE) 10493f3933aSMatthew Dillon info->inode_count += how; 10593f3933aSMatthew Dillon if (how < 0) 10693f3933aSMatthew Dillon info->data_count -= bytes; 10793f3933aSMatthew Dillon else 10893f3933aSMatthew Dillon info->data_count += bytes; 10993f3933aSMatthew Dillon } 11093f3933aSMatthew Dillon } 11193f3933aSMatthew Dillon #endif 11293f3933aSMatthew Dillon 11332b800e6SMatthew Dillon /* 1140dea3156SMatthew Dillon * Transaction support functions for writing to the filesystem. 1150dea3156SMatthew Dillon * 11610136ab6SMatthew Dillon * Initializing a new transaction allocates a transaction ID. Typically 11710136ab6SMatthew Dillon * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can 11810136ab6SMatthew Dillon * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single 11910136ab6SMatthew Dillon * media target. The latter mode is used by the recovery code. 12010136ab6SMatthew Dillon * 121623d43d4SMatthew Dillon * TWO TRANSACTION IDs can run concurrently, where one is a flush and the 122623d43d4SMatthew Dillon * other is a set of any number of concurrent filesystem operations. We 123623d43d4SMatthew Dillon * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops> 124623d43d4SMatthew Dillon * or we can have <running_flush> + <concurrent_fs_ops>. 1250dea3156SMatthew Dillon * 126623d43d4SMatthew Dillon * During a flush, new fs_ops are only blocked until the fs_ops prior to 127623d43d4SMatthew Dillon * the flush complete. The new fs_ops can then run concurrent with the flush. 128d001f460SMatthew Dillon * 129623d43d4SMatthew Dillon * Buffer-cache transactions operate as fs_ops but never block. A 130623d43d4SMatthew Dillon * buffer-cache flush will run either before or after the current pending 131623d43d4SMatthew Dillon * flush depending on its state. 132d001f460SMatthew Dillon * 133623d43d4SMatthew Dillon * sync_tid vs real_tid. For flush transactions ONLY, the flush operation 134623d43d4SMatthew Dillon * actually uses two transaction ids, one for the flush operation itself, 135623d43d4SMatthew Dillon * and <N+1> for any freemap allocations made as a side-effect. real_tid 136623d43d4SMatthew Dillon * is fixed at <N>, sync_tid is adjusted dynamically as-needed. 137355d67fcSMatthew Dillon * 138623d43d4SMatthew Dillon * NOTE: The sync_tid for a flush's freemap allocation will match the 139623d43d4SMatthew Dillon * sync_tid of the following <concurrent_fs_ops> transaction(s). 140623d43d4SMatthew Dillon * The freemap topology will be out-of-step by one transaction id 141623d43d4SMatthew Dillon * in order to give the flusher a stable freemap topology to flush 142623d43d4SMatthew Dillon * out. This is fixed up at mount-time using a quick incremental 143623d43d4SMatthew Dillon * scan. 1440dea3156SMatthew Dillon */ 1450dea3156SMatthew Dillon void 14610136ab6SMatthew Dillon hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfsmount_t *pmp, 14710136ab6SMatthew Dillon hammer2_mount_t *hmp, int flags) 1480dea3156SMatthew Dillon { 149a4dc31e0SMatthew Dillon hammer2_trans_t *head; 150d001f460SMatthew Dillon 1510dea3156SMatthew Dillon bzero(trans, sizeof(*trans)); 15210136ab6SMatthew Dillon if (pmp) { 153a5913bdfSMatthew Dillon trans->pmp = pmp; 15410136ab6SMatthew Dillon KKASSERT(hmp == NULL); 1557bed8d7eSMatthew Dillon hmp = pmp->cluster.chains[0]->hmp; /* XXX */ 15610136ab6SMatthew Dillon } else { 15710136ab6SMatthew Dillon trans->hmp_single = hmp; 15810136ab6SMatthew Dillon KKASSERT(hmp); 15910136ab6SMatthew Dillon } 160d001f460SMatthew Dillon 1610dea3156SMatthew Dillon hammer2_voldata_lock(hmp); 162d001f460SMatthew Dillon trans->flags = flags; 163d001f460SMatthew Dillon trans->td = curthread; 1641897c66eSMatthew Dillon /*trans->delete_gen = 0;*/ /* multiple deletions within trans */ 165d001f460SMatthew Dillon 166d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 167d001f460SMatthew Dillon /* 168355d67fcSMatthew Dillon * If multiple flushes are trying to run we have to 169a4dc31e0SMatthew Dillon * wait until it is our turn. All flushes are serialized. 170355d67fcSMatthew Dillon * 171a4dc31e0SMatthew Dillon * We queue ourselves and then wait to become the head 172a4dc31e0SMatthew Dillon * of the queue, allowing all prior flushes to complete. 173623d43d4SMatthew Dillon * 174623d43d4SMatthew Dillon * A unique transaction id is required to avoid confusion 175623d43d4SMatthew Dillon * when updating the block tables. 176355d67fcSMatthew Dillon */ 177355d67fcSMatthew Dillon ++hmp->flushcnt; 178623d43d4SMatthew Dillon ++hmp->voldata.alloc_tid; 179623d43d4SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid; 180a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 181623d43d4SMatthew Dillon ++hmp->voldata.alloc_tid; 182355d67fcSMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 183a4dc31e0SMatthew Dillon if (TAILQ_FIRST(&hmp->transq) != trans) { 184d001f460SMatthew Dillon trans->blocked = 1; 185d001f460SMatthew Dillon while (trans->blocked) { 186a4dc31e0SMatthew Dillon lksleep(&trans->sync_tid, &hmp->voldatalk, 187a4dc31e0SMatthew Dillon 0, "h2multf", hz); 188d001f460SMatthew Dillon } 189d001f460SMatthew Dillon } 190a4dc31e0SMatthew Dillon } else if (hmp->flushcnt == 0) { 191a7720be7SMatthew Dillon /* 192a4dc31e0SMatthew Dillon * No flushes are pending, we can go. 193a7720be7SMatthew Dillon */ 194a4dc31e0SMatthew Dillon TAILQ_INSERT_TAIL(&hmp->transq, trans, entry); 195a4dc31e0SMatthew Dillon trans->sync_tid = hmp->voldata.alloc_tid; 196a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 197a7720be7SMatthew Dillon 198a4dc31e0SMatthew Dillon /* XXX improve/optimize inode allocation */ 199a4dc31e0SMatthew Dillon } else { 200a4dc31e0SMatthew Dillon /* 201a4dc31e0SMatthew Dillon * One or more flushes are pending. We insert after 202a4dc31e0SMatthew Dillon * the current flush and may block. We have priority 203a4dc31e0SMatthew Dillon * over any flushes that are not the current flush. 204a4dc31e0SMatthew Dillon * 205a4dc31e0SMatthew Dillon * TRANS_BUFCACHE transactions cannot block. 206a4dc31e0SMatthew Dillon */ 207a4dc31e0SMatthew Dillon TAILQ_FOREACH(head, &hmp->transq, entry) { 208a4dc31e0SMatthew Dillon if (head->flags & HAMMER2_TRANS_ISFLUSH) 209a4dc31e0SMatthew Dillon break; 210a7720be7SMatthew Dillon } 211a4dc31e0SMatthew Dillon KKASSERT(head); 212a4dc31e0SMatthew Dillon TAILQ_INSERT_AFTER(&hmp->transq, head, trans, entry); 213a4dc31e0SMatthew Dillon trans->sync_tid = head->real_tid + 1; 214a4dc31e0SMatthew Dillon trans->real_tid = trans->sync_tid; 215a4dc31e0SMatthew Dillon 216a4dc31e0SMatthew Dillon if ((trans->flags & HAMMER2_TRANS_BUFCACHE) == 0) { 217a4dc31e0SMatthew Dillon if (TAILQ_FIRST(&hmp->transq) != head) { 218a4dc31e0SMatthew Dillon trans->blocked = 1; 219a4dc31e0SMatthew Dillon while (trans->blocked) { 220a4dc31e0SMatthew Dillon lksleep(&trans->sync_tid, 221a4dc31e0SMatthew Dillon &hmp->voldatalk, 0, 222a4dc31e0SMatthew Dillon "h2multf", hz); 223a4dc31e0SMatthew Dillon } 224a4dc31e0SMatthew Dillon } 225a4dc31e0SMatthew Dillon } 226a4dc31e0SMatthew Dillon } 227044541cdSMatthew Dillon if (flags & HAMMER2_TRANS_NEWINODE) { 228044541cdSMatthew Dillon if (hmp->voldata.inode_tid < HAMMER2_INODE_START) 229044541cdSMatthew Dillon hmp->voldata.inode_tid = HAMMER2_INODE_START; 230a4dc31e0SMatthew Dillon trans->inode_tid = hmp->voldata.inode_tid++; 231044541cdSMatthew Dillon } 232a7720be7SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 233a7720be7SMatthew Dillon } 234a7720be7SMatthew Dillon 2350dea3156SMatthew Dillon void 2360dea3156SMatthew Dillon hammer2_trans_done(hammer2_trans_t *trans) 2370dea3156SMatthew Dillon { 238a5913bdfSMatthew Dillon hammer2_mount_t *hmp; 239a4dc31e0SMatthew Dillon hammer2_trans_t *head; 240d001f460SMatthew Dillon hammer2_trans_t *scan; 241a02dfba1SMatthew Dillon 24210136ab6SMatthew Dillon if (trans->pmp) 2437bed8d7eSMatthew Dillon hmp = trans->pmp->cluster.chains[0]->hmp; 24410136ab6SMatthew Dillon else 24510136ab6SMatthew Dillon hmp = trans->hmp_single; 246a5913bdfSMatthew Dillon 247a4dc31e0SMatthew Dillon /* 248a4dc31e0SMatthew Dillon * Remove and adjust flushcnt 249a4dc31e0SMatthew Dillon */ 250a02dfba1SMatthew Dillon hammer2_voldata_lock(hmp); 251d001f460SMatthew Dillon TAILQ_REMOVE(&hmp->transq, trans, entry); 252a4dc31e0SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) 253d001f460SMatthew Dillon --hmp->flushcnt; 254a4dc31e0SMatthew Dillon 255355d67fcSMatthew Dillon /* 256a4dc31e0SMatthew Dillon * Unblock the head of the queue and any additional transactions 257a4dc31e0SMatthew Dillon * up to the next flush. 258355d67fcSMatthew Dillon */ 259a4dc31e0SMatthew Dillon head = TAILQ_FIRST(&hmp->transq); 260a4dc31e0SMatthew Dillon if (head && head->blocked) { 261a4dc31e0SMatthew Dillon head->blocked = 0; 262a4dc31e0SMatthew Dillon wakeup(&head->sync_tid); 263a4dc31e0SMatthew Dillon 264a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(head, entry); 265a4dc31e0SMatthew Dillon while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { 266925e4ad1SMatthew Dillon if (scan->blocked) { 267a4dc31e0SMatthew Dillon scan->blocked = 0; 268d001f460SMatthew Dillon wakeup(&scan->sync_tid); 269925e4ad1SMatthew Dillon } 270a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(scan, entry); 271a02dfba1SMatthew Dillon } 272a02dfba1SMatthew Dillon } 273a02dfba1SMatthew Dillon hammer2_voldata_unlock(hmp, 0); 274a02dfba1SMatthew Dillon } 275a02dfba1SMatthew Dillon 2760dea3156SMatthew Dillon /* 2770dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 2780dea3156SMatthew Dillon * synchronization point (sync_tid), propagating parent chain modifications 2790dea3156SMatthew Dillon * and mirror_tid updates back up as needed. Since we are recursing downward 2800dea3156SMatthew Dillon * we do not have to deal with the complexities of multi-homed chains (chains 2810dea3156SMatthew Dillon * with multiple parents). 2820dea3156SMatthew Dillon * 2830dea3156SMatthew Dillon * Caller must have interlocked against any non-flush-related modifying 2840dea3156SMatthew Dillon * operations in progress whos modify_tid values are less than or equal 2850dea3156SMatthew Dillon * to the passed sync_tid. 2860dea3156SMatthew Dillon * 2870dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 2880dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 2890dea3156SMatthew Dillon * these sorts of synchronization points. 2900dea3156SMatthew Dillon * 29132b800e6SMatthew Dillon * This routine can be called from several places but the most important 29232b800e6SMatthew Dillon * is from the hammer2_vop_reclaim() function. We want to try to completely 29332b800e6SMatthew Dillon * clean out the inode structure to prevent disconnected inodes from 2940dea3156SMatthew Dillon * building up and blowing out the kmalloc pool. However, it is not actually 2950dea3156SMatthew Dillon * necessary to flush reclaimed inodes to maintain HAMMER2's crash recovery 2960dea3156SMatthew Dillon * capability. 29732b800e6SMatthew Dillon * 2980dea3156SMatthew Dillon * chain is locked on call and will remain locked on return. If a flush 2990dea3156SMatthew Dillon * occured, the chain's MOVED bit will be set indicating that its parent 300053e752cSMatthew Dillon * (which is not part of the flush) should be updated. The chain may be 301053e752cSMatthew Dillon * replaced by the call. 30232b800e6SMatthew Dillon */ 30332b800e6SMatthew Dillon void 304a7720be7SMatthew Dillon hammer2_chain_flush(hammer2_trans_t *trans, hammer2_chain_t **chainp) 30532b800e6SMatthew Dillon { 306a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 30732b800e6SMatthew Dillon hammer2_chain_t *scan; 308731b2a84SMatthew Dillon hammer2_chain_core_t *core; 30932b800e6SMatthew Dillon hammer2_flush_info_t info; 310925e4ad1SMatthew Dillon int loops; 31132b800e6SMatthew Dillon 31232b800e6SMatthew Dillon /* 31332b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 31432b800e6SMatthew Dillon * 31532b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 31632b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 31732b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 31832b800e6SMatthew Dillon * for re-execution after the stack has been popped. 31932b800e6SMatthew Dillon */ 32032b800e6SMatthew Dillon bzero(&info, sizeof(info)); 32132b800e6SMatthew Dillon TAILQ_INIT(&info.flush_list); 3220dea3156SMatthew Dillon info.trans = trans; 3230dea3156SMatthew Dillon info.sync_tid = trans->sync_tid; 3241897c66eSMatthew Dillon info.cache_index = -1; 32532b800e6SMatthew Dillon 326731b2a84SMatthew Dillon core = chain->core; 327a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 328925e4ad1SMatthew Dillon kprintf("CHAIN FLUSH trans %p.%016jx chain %p.%d mod %016jx upd %016jx\n", trans, trans->sync_tid, chain, chain->bref.type, chain->modify_tid, core->update_lo); 329a4dc31e0SMatthew Dillon #endif 330731b2a84SMatthew Dillon 331a7720be7SMatthew Dillon /* 332a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 333a7720be7SMatthew Dillon * chain. 334a7720be7SMatthew Dillon */ 335a7720be7SMatthew Dillon hammer2_chain_ref(chain); 336925e4ad1SMatthew Dillon loops = 0; 337a7720be7SMatthew Dillon 3380dea3156SMatthew Dillon for (;;) { 33932b800e6SMatthew Dillon /* 3400dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 3410dea3156SMatthew Dillon * can leave MOVED set for these chains, which will be 3420dea3156SMatthew Dillon * handled when we [re]flush chain after the unwind. 34332b800e6SMatthew Dillon */ 34432b800e6SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) { 34532b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 34632b800e6SMatthew Dillon TAILQ_REMOVE(&info.flush_list, scan, flush_node); 34732b800e6SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 34832b800e6SMatthew Dillon 34932b800e6SMatthew Dillon /* 35032b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 35132b800e6SMatthew Dillon * recursion on the deferred elements. 352053e752cSMatthew Dillon * 353053e752cSMatthew Dillon * NOTE: hammer2_chain_flush() may replace scan. 35432b800e6SMatthew Dillon */ 35532b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 356053e752cSMatthew Dillon kprintf("deferred flush %p\n", scan); 3570dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 358053e752cSMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 359a7720be7SMatthew Dillon hammer2_chain_flush(trans, &scan); 3600dea3156SMatthew Dillon hammer2_chain_unlock(scan); 36132b800e6SMatthew Dillon } 36232b800e6SMatthew Dillon 36332b800e6SMatthew Dillon /* 364925e4ad1SMatthew Dillon * [re]flush chain. 36532b800e6SMatthew Dillon */ 3660dea3156SMatthew Dillon info.diddeferral = 0; 367a7720be7SMatthew Dillon hammer2_chain_flush_core(&info, &chain); 3689797e933SMatthew Dillon #if FLUSH_DEBUG 3699797e933SMatthew Dillon kprintf("flush_core_done parent=<base> chain=%p.%d %08x\n", 3709797e933SMatthew Dillon chain, chain->bref.type, chain->flags); 3719797e933SMatthew Dillon #endif 37232b800e6SMatthew Dillon 37332b800e6SMatthew Dillon /* 3740dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 37532b800e6SMatthew Dillon */ 3760dea3156SMatthew Dillon if (TAILQ_EMPTY(&info.flush_list)) 37732b800e6SMatthew Dillon break; 378925e4ad1SMatthew Dillon 379925e4ad1SMatthew Dillon if (++loops % 1000 == 0) { 380925e4ad1SMatthew Dillon kprintf("hammer2_chain_flush: excessive loops on %p\n", 381925e4ad1SMatthew Dillon chain); 382925e4ad1SMatthew Dillon if (hammer2_debug & 0x100000) 383925e4ad1SMatthew Dillon Debugger("hell4"); 384925e4ad1SMatthew Dillon } 38532b800e6SMatthew Dillon } 386a7720be7SMatthew Dillon hammer2_chain_drop(chain); 387a7720be7SMatthew Dillon *chainp = chain; 38832b800e6SMatthew Dillon } 38932b800e6SMatthew Dillon 390476d2aadSMatthew Dillon /* 391ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 392a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 393a7720be7SMatthew Dillon * locked and will have an extra ref on return. 394a7720be7SMatthew Dillon * 3950dea3156SMatthew Dillon * If the flush accomplished any work chain will be flagged MOVED 3960dea3156SMatthew Dillon * indicating a copy-on-write propagation back up is required. 3970dea3156SMatthew Dillon * Deep sub-nodes may also have been entered onto the deferral list. 3980dea3156SMatthew Dillon * MOVED is never set on the volume root. 3990dea3156SMatthew Dillon * 4000dea3156SMatthew Dillon * NOTE: modify_tid is different from MODIFIED. modify_tid is updated 4010dea3156SMatthew Dillon * only when a chain is specifically modified, and not updated 4020dea3156SMatthew Dillon * for copy-on-write propagations. MODIFIED is set on any modification 4030dea3156SMatthew Dillon * including copy-on-write propagations. 404925e4ad1SMatthew Dillon * 405925e4ad1SMatthew Dillon * NOTE: We are responsible for updating chain->bref.mirror_tid and 406925e4ad1SMatthew Dillon * core->update_lo The caller is responsible for processing us into 407925e4ad1SMatthew Dillon * our parent (if any). 408925e4ad1SMatthew Dillon * 409925e4ad1SMatthew Dillon * We are also responsible for updating chain->core->update_lo to 410925e4ad1SMatthew Dillon * prevent repeated recursions due to deferrals. 4110924b3f8SMatthew Dillon * 4120924b3f8SMatthew Dillon * WARNING! bref.mirror_tid may only be updated if either the MODIFIED bit 4130924b3f8SMatthew Dillon * is already zero or if we clear it. 4140924b3f8SMatthew Dillon * 415476d2aadSMatthew Dillon */ 41632b800e6SMatthew Dillon static void 417a7720be7SMatthew Dillon hammer2_chain_flush_core(hammer2_flush_info_t *info, hammer2_chain_t **chainp) 41832b800e6SMatthew Dillon { 419a7720be7SMatthew Dillon hammer2_chain_t *chain = *chainp; 4200924b3f8SMatthew Dillon hammer2_chain_t *saved_parent; 4210dea3156SMatthew Dillon hammer2_mount_t *hmp; 422731b2a84SMatthew Dillon hammer2_chain_core_t *core; 42391caa51cSMatthew Dillon #if 0 42491caa51cSMatthew Dillon hammer2_blockref_t *bref; 42532b800e6SMatthew Dillon char *bdata; 426fdf62707SMatthew Dillon hammer2_io_t *dio; 42732b800e6SMatthew Dillon int error; 42891caa51cSMatthew Dillon #endif 429925e4ad1SMatthew Dillon int diddeferral; 43032b800e6SMatthew Dillon 431a5913bdfSMatthew Dillon hmp = chain->hmp; 432925e4ad1SMatthew Dillon core = chain->core; 433925e4ad1SMatthew Dillon diddeferral = info->diddeferral; 43432b800e6SMatthew Dillon 43532b800e6SMatthew Dillon /* 436925e4ad1SMatthew Dillon * Check if we even have any work to do. 437925e4ad1SMatthew Dillon * 438925e4ad1SMatthew Dillon * We do not update core->update_lo because there might be other 439925e4ad1SMatthew Dillon * paths to the core and we haven't actually checked it. 440925e4ad1SMatthew Dillon * 441925e4ad1SMatthew Dillon * This bit of code is capable of short-cutting entire sub-trees 442925e4ad1SMatthew Dillon * if they have not been touched. 443925e4ad1SMatthew Dillon */ 444925e4ad1SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 445925e4ad1SMatthew Dillon (core->update_lo >= info->sync_tid || 446925e4ad1SMatthew Dillon chain->bref.mirror_tid >= info->sync_tid || 447925e4ad1SMatthew Dillon chain->bref.mirror_tid >= core->update_hi)) { 448623d43d4SMatthew Dillon KKASSERT(chain->modify_tid <= info->sync_tid); 449623d43d4SMatthew Dillon /* don't update update_lo, there may be other paths to core */ 450623d43d4SMatthew Dillon /* don't update bref.mirror_tid, scan2 is not called */ 451925e4ad1SMatthew Dillon return; 452925e4ad1SMatthew Dillon } 453925e4ad1SMatthew Dillon 454925e4ad1SMatthew Dillon /* 4550924b3f8SMatthew Dillon * Ignore chains which have already been flushed through the current 4560924b3f8SMatthew Dillon * synchronization point. 4570924b3f8SMatthew Dillon */ 4580924b3f8SMatthew Dillon KKASSERT (chain->bref.mirror_tid <= info->sync_tid); 4590924b3f8SMatthew Dillon if (chain->bref.mirror_tid == info->sync_tid) { 4600924b3f8SMatthew Dillon /* do not update core->update_lo, there may be another path */ 4610924b3f8SMatthew Dillon return; 4620924b3f8SMatthew Dillon } 4630924b3f8SMatthew Dillon 4640924b3f8SMatthew Dillon /* 465731b2a84SMatthew Dillon * Ignore chains modified beyond the current flush point. These 466a4dc31e0SMatthew Dillon * will be treated as if they did not exist. Subchains with lower 467a4dc31e0SMatthew Dillon * modify_tid's will still be accessible via other parents. 468a4dc31e0SMatthew Dillon * 469925e4ad1SMatthew Dillon * Do not update bref.mirror_tid here, it will interfere with 470925e4ad1SMatthew Dillon * synchronization. e.g. inode flush tid 1, concurrent D-D tid 2, 471925e4ad1SMatthew Dillon * then later on inode flush tid 2. If we were to set mirror_tid 472925e4ad1SMatthew Dillon * to 1 during inode flush tid 1 the blockrefs would only be partially 473925e4ad1SMatthew Dillon * updated (and likely panic). 474925e4ad1SMatthew Dillon * 475925e4ad1SMatthew Dillon * Do not update core->update_lo here, there might be other paths 476925e4ad1SMatthew Dillon * to the core and we haven't actually flushed it. 477925e4ad1SMatthew Dillon * 478a4dc31e0SMatthew Dillon * (vchain and fchain are exceptions since they cannot be duplicated) 479ea155208SMatthew Dillon */ 480a4dc31e0SMatthew Dillon if (chain->modify_tid > info->sync_tid && 481a4dc31e0SMatthew Dillon chain != &hmp->fchain && chain != &hmp->vchain) { 482623d43d4SMatthew Dillon /* do not update bref.mirror_tid, scan2 ignores chain */ 483925e4ad1SMatthew Dillon /* do not update core->update_lo, there may be another path */ 484ea155208SMatthew Dillon return; 485a4dc31e0SMatthew Dillon } 486731b2a84SMatthew Dillon 4870924b3f8SMatthew Dillon saved_parent = info->parent; 4880924b3f8SMatthew Dillon info->parent = chain; 489925e4ad1SMatthew Dillon retry: 4900924b3f8SMatthew Dillon 491925e4ad1SMatthew Dillon /* 4920924b3f8SMatthew Dillon * Chains deleted as-of the flush synchronization point require some 4930924b3f8SMatthew Dillon * special early handling to avoid double flushing because multiple 4940924b3f8SMatthew Dillon * deletions are sometimes forced within the same transaction. 4950924b3f8SMatthew Dillon * Allowing the flush to proceed through more than one path can wind 4960924b3f8SMatthew Dillon * up updating the chain's block table twice and cause an assertion. 497925e4ad1SMatthew Dillon * 4980924b3f8SMatthew Dillon * We don't check the 'same transaction' part but simply punt in this 4990924b3f8SMatthew Dillon * situation. We must still check for multiple deletions, since any 5000924b3f8SMatthew Dillon * terminal (non-stale) deletion still requires processing to at 5010924b3f8SMatthew Dillon * least clean up the children, and also (for inodes) there might 5020924b3f8SMatthew Dillon * still be an open descriptor. 5030924b3f8SMatthew Dillon * 5040924b3f8SMatthew Dillon * Clear MODIFIED but set MOVED to ensure that the parent still 5050924b3f8SMatthew Dillon * deals with it. 506925e4ad1SMatthew Dillon */ 507925e4ad1SMatthew Dillon if (chain->delete_tid <= info->sync_tid && 508925e4ad1SMatthew Dillon (chain->flags & HAMMER2_CHAIN_DUPLICATED)) { 509925e4ad1SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 510623d43d4SMatthew Dillon #if 0 511fdf62707SMatthew Dillon /* 512623d43d4SMatthew Dillon * XXX should be able to invalidate the buffer here. 513623d43d4SMatthew Dillon * XXX problem if reused, snapshotted, or reactivated. 514fdf62707SMatthew Dillon */ 515fdf62707SMatthew Dillon if (chain->dio) { 516fdf62707SMatthew Dillon hammer2_io_setinval(chain->dio, chain->bytes); 517925e4ad1SMatthew Dillon } 518fdf62707SMatthew Dillon #endif 519925e4ad1SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 520925e4ad1SMatthew Dillon hammer2_chain_ref(chain); 521925e4ad1SMatthew Dillon atomic_set_int(&chain->flags, 522925e4ad1SMatthew Dillon HAMMER2_CHAIN_MOVED); 523925e4ad1SMatthew Dillon } 524925e4ad1SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 5250924b3f8SMatthew Dillon hammer2_chain_memory_wakeup(chain->pmp); 526925e4ad1SMatthew Dillon hammer2_chain_drop(chain); 527925e4ad1SMatthew Dillon } 52810136ab6SMatthew Dillon 52910136ab6SMatthew Dillon /* 53010136ab6SMatthew Dillon * Update mirror_tid, indicating that chain is synchronized 531623d43d4SMatthew Dillon * on its modification and block table. This probably isn't 532623d43d4SMatthew Dillon * needed since scan2 should ignore deleted chains anyway. 5330924b3f8SMatthew Dillon * 5340924b3f8SMatthew Dillon * NOTE: bref.mirror_tid cannot be updated 5350924b3f8SMatthew Dillon * unless MODIFIED is cleared or already 5360924b3f8SMatthew Dillon * clear. 53710136ab6SMatthew Dillon */ 538925e4ad1SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 539925e4ad1SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 540925e4ad1SMatthew Dillon /* do not update core->update_lo, there may be another path */ 5410924b3f8SMatthew Dillon info->parent = saved_parent; 542925e4ad1SMatthew Dillon return; 543925e4ad1SMatthew Dillon } 544ea155208SMatthew Dillon 545ea155208SMatthew Dillon /* 546925e4ad1SMatthew Dillon * Recurse if we are not up-to-date. Once we are done we will 547925e4ad1SMatthew Dillon * update update_lo if there were no deferrals. update_lo can become 548925e4ad1SMatthew Dillon * higher than update_hi and is used to prevent re-recursions during 549925e4ad1SMatthew Dillon * the same flush cycle. 55032b800e6SMatthew Dillon * 551925e4ad1SMatthew Dillon * update_hi was already checked and prevents initial recursions on 552925e4ad1SMatthew Dillon * subtrees which have not been modified. 553a7720be7SMatthew Dillon * 554a7720be7SMatthew Dillon * NOTE: We must recurse whether chain is flagged DELETED or not. 555a7720be7SMatthew Dillon * However, if it is flagged DELETED we limit sync_tid to 556a7720be7SMatthew Dillon * delete_tid to ensure that the chain's bref.mirror_tid is 557a7720be7SMatthew Dillon * not fully updated and causes it to miss the non-DELETED 558a7720be7SMatthew Dillon * path. 559925e4ad1SMatthew Dillon * 560925e4ad1SMatthew Dillon * NOTE: If a deferral occurs hammer2_chain_flush() will flush the 561925e4ad1SMatthew Dillon * deferred chain independently which will update it's 562925e4ad1SMatthew Dillon * bref.mirror_tid and prevent it from deferred again. 56332b800e6SMatthew Dillon */ 564925e4ad1SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid && 565925e4ad1SMatthew Dillon chain->bref.mirror_tid < core->update_hi) { 5661897c66eSMatthew Dillon hammer2_chain_layer_t *layer; 567a4dc31e0SMatthew Dillon int saved_domodify; 568a4dc31e0SMatthew Dillon int save_gen; 56932b800e6SMatthew Dillon 57032b800e6SMatthew Dillon /* 5710924b3f8SMatthew Dillon * Races will bump update_hi above trans->sync_tid and should 5720924b3f8SMatthew Dillon * not affect this test. 57332b800e6SMatthew Dillon * 57432b800e6SMatthew Dillon * We don't want to set our chain to MODIFIED gratuitously. 57532b800e6SMatthew Dillon * 57632b800e6SMatthew Dillon * We need an extra ref on chain because we are going to 57732b800e6SMatthew Dillon * release its lock temporarily in our child loop. 57832b800e6SMatthew Dillon */ 57932b800e6SMatthew Dillon 58032b800e6SMatthew Dillon /* 5810dea3156SMatthew Dillon * Run two passes. The first pass handles MODIFIED and 582925e4ad1SMatthew Dillon * update_lo recursions while the second pass handles 583a7720be7SMatthew Dillon * MOVED chains on the way back up. 58432b800e6SMatthew Dillon * 585925e4ad1SMatthew Dillon * If the stack gets too deep we defer the chain. Since 586925e4ad1SMatthew Dillon * hammer2_chain_core's can be shared at multiple levels 587925e4ad1SMatthew Dillon * in the tree, we may encounter a chain that we had already 588925e4ad1SMatthew Dillon * deferred. We could undefer it but it will probably just 589925e4ad1SMatthew Dillon * defer again so it is best to leave it deferred. 59032b800e6SMatthew Dillon * 5910dea3156SMatthew Dillon * Scan1 is recursive. 59232b800e6SMatthew Dillon * 5930dea3156SMatthew Dillon * NOTE: The act of handling a modified/submodified chain can 5940dea3156SMatthew Dillon * cause the MOVED Flag to be set. It can also be set 5950dea3156SMatthew Dillon * via hammer2_chain_delete() and in other situations. 5960dea3156SMatthew Dillon * 5970dea3156SMatthew Dillon * NOTE: RB_SCAN() must be used instead of RB_FOREACH() 5980dea3156SMatthew Dillon * because children can be physically removed during 5990dea3156SMatthew Dillon * the scan. 600a4dc31e0SMatthew Dillon * 601a4dc31e0SMatthew Dillon * NOTE: We would normally not care about insertions except 602a4dc31e0SMatthew Dillon * that some insertions might occur from the flush 603a4dc31e0SMatthew Dillon * itself, so loop on generation number changes. 60432b800e6SMatthew Dillon */ 605a4dc31e0SMatthew Dillon saved_domodify = info->domodify; 606a4dc31e0SMatthew Dillon info->domodify = 0; 60732b800e6SMatthew Dillon 608925e4ad1SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_DEFERRED) { 609925e4ad1SMatthew Dillon ++info->diddeferral; 610925e4ad1SMatthew Dillon } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 6110dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) { 6120dea3156SMatthew Dillon hammer2_chain_ref(chain); 6130dea3156SMatthew Dillon TAILQ_INSERT_TAIL(&info->flush_list, 6140dea3156SMatthew Dillon chain, flush_node); 6150dea3156SMatthew Dillon atomic_set_int(&chain->flags, 6160dea3156SMatthew Dillon HAMMER2_CHAIN_DEFERRED); 6170dea3156SMatthew Dillon } 618925e4ad1SMatthew Dillon ++info->diddeferral; 61932b800e6SMatthew Dillon } else { 620731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 62151a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 622a4dc31e0SMatthew Dillon do { 623a4dc31e0SMatthew Dillon save_gen = core->generation; 6241897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 6251897c66eSMatthew Dillon h2_layer_list, entry) { 6261897c66eSMatthew Dillon ++layer->refs; 62751a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 628a4dc31e0SMatthew Dillon RB_SCAN(hammer2_chain_tree, 629a4dc31e0SMatthew Dillon &layer->rbtree, 630a4dc31e0SMatthew Dillon NULL, hammer2_chain_flush_scan1, 631a4dc31e0SMatthew Dillon info); 6321897c66eSMatthew Dillon --layer->refs; 63332b800e6SMatthew Dillon } 634a4dc31e0SMatthew Dillon } while (core->generation != save_gen); 6351897c66eSMatthew Dillon spin_unlock(&core->cst.spin); 6361897c66eSMatthew Dillon } 63732b800e6SMatthew Dillon 638925e4ad1SMatthew Dillon if (info->parent != chain) { 639925e4ad1SMatthew Dillon kprintf("ZZZ\n"); 640925e4ad1SMatthew Dillon hammer2_chain_drop(chain); 641925e4ad1SMatthew Dillon hammer2_chain_ref(info->parent); 642925e4ad1SMatthew Dillon } 643925e4ad1SMatthew Dillon chain = info->parent; 644925e4ad1SMatthew Dillon 645925e4ad1SMatthew Dillon /* 6460924b3f8SMatthew Dillon * chain was unlocked during the scan1 recursion and may 6470924b3f8SMatthew Dillon * have been deleted, destroyed, or even synchronously 6480924b3f8SMatthew Dillon * flushed due to aliasing. 649623d43d4SMatthew Dillon * 6500924b3f8SMatthew Dillon * The flush continues normally in the first two places as 6510924b3f8SMatthew Dillon * the deletion or destruction does NOT affect the flush 6520924b3f8SMatthew Dillon * as-of the flush synchronization point. 653623d43d4SMatthew Dillon * 6540924b3f8SMatthew Dillon * We must detect the last case and avoid flushing chain twice. 655925e4ad1SMatthew Dillon */ 6560924b3f8SMatthew Dillon #if 0 657925e4ad1SMatthew Dillon if (chain->delete_tid <= info->sync_tid && 658925e4ad1SMatthew Dillon (chain->flags & HAMMER2_CHAIN_DUPLICATED)) { 659925e4ad1SMatthew Dillon kprintf("xxx\n"); 660925e4ad1SMatthew Dillon goto retry; 661925e4ad1SMatthew Dillon } 6620924b3f8SMatthew Dillon #endif 663925e4ad1SMatthew Dillon if (chain->bref.mirror_tid >= info->sync_tid || 664925e4ad1SMatthew Dillon chain->bref.mirror_tid >= core->update_hi) { 665925e4ad1SMatthew Dillon kprintf("yyy\n"); 666925e4ad1SMatthew Dillon goto retry; 667925e4ad1SMatthew Dillon } 668925e4ad1SMatthew Dillon 669a4dc31e0SMatthew Dillon /* 670904de023SMatthew Dillon * If any deferral occurred we must set domodify to 0 to avoid 671904de023SMatthew Dillon * potentially modifying the parent twice (now and when we run 672904de023SMatthew Dillon * the deferral list), as doing so could cause the blockref 673904de023SMatthew Dillon * update to run on a block array which has already been 674904de023SMatthew Dillon * updated. 675904de023SMatthew Dillon */ 676904de023SMatthew Dillon if (info->domodify && diddeferral != info->diddeferral) 677904de023SMatthew Dillon info->domodify = 0; 678904de023SMatthew Dillon 679904de023SMatthew Dillon /* 6800924b3f8SMatthew Dillon * THIS IS THE ONLY POINT IN THE FLUSH WHERE A PARENT IN THE 6810924b3f8SMatthew Dillon * NOMINAL TOPOLOGY, OTHER THAN FREEMAP ALLOCATIONS, IS 6820924b3f8SMatthew Dillon * MODIFIED. FREEMAP ALLOCATIONS WILL MODIFY THE FREEMAP 6830924b3f8SMatthew Dillon * TOPOLOGY WITH SYNC_TID+1 AND DO NOT AFFECT THE CURRENT 6840924b3f8SMatthew Dillon * FLUSH. 6850924b3f8SMatthew Dillon * 6860924b3f8SMatthew Dillon * Modifying the parent can create issues if the current 6870924b3f8SMatthew Dillon * parent is already in a modified state with an earlier 6880924b3f8SMatthew Dillon * transaction id. We want to avoid an endless flush loop 6890924b3f8SMatthew Dillon * on the original parent so we must clear its modified bit 6900924b3f8SMatthew Dillon * after creating the new parent, if they wind up being 6910924b3f8SMatthew Dillon * different. Care must also be taken to avoid flushing the 6920924b3f8SMatthew Dillon * same parent twice. 6930924b3f8SMatthew Dillon * 694904de023SMatthew Dillon * We are responsible for setting the parent into a modified 695904de023SMatthew Dillon * state before we scan the children to update the parent's 696904de023SMatthew Dillon * block table. This must essentially be done as an atomic 697904de023SMatthew Dillon * operation (the parent must remain locked throughout the 698623d43d4SMatthew Dillon * operation), otherwise other transactions can squeeze a 699623d43d4SMatthew Dillon * delete-duplicate in and create block table havoc. 700a4dc31e0SMatthew Dillon * 701904de023SMatthew Dillon * NOTE: Blockrefs are only updated on live chains. 702904de023SMatthew Dillon * 703904de023SMatthew Dillon * NOTE: Modifying the parent generally causes a 704904de023SMatthew Dillon * delete-duplicate to occur from within the flush 705904de023SMatthew Dillon * itself, with an allocation from the freemap occuring 706904de023SMatthew Dillon * as an additional side-effect. 707904de023SMatthew Dillon * 708904de023SMatthew Dillon * NOTE: If the parent was deleted our modified chain will 709904de023SMatthew Dillon * also be marked deleted, but since it inherits the 710904de023SMatthew Dillon * parent's delete_tid it will still appear to be 711904de023SMatthew Dillon * 'live' for the purposes of the flush. 712a4dc31e0SMatthew Dillon */ 713623d43d4SMatthew Dillon if (info->domodify && !h2ignore_deleted(info, chain)) { 714623d43d4SMatthew Dillon KKASSERT(chain->modify_tid < info->sync_tid); 715623d43d4SMatthew Dillon 716623d43d4SMatthew Dillon /* 717623d43d4SMatthew Dillon * The scan1 loop and/or flush_core is reentrant, 718623d43d4SMatthew Dillon * particularly when core->generation changes. To 719623d43d4SMatthew Dillon * avoid havoc we have to prevent repetitive 720623d43d4SMatthew Dillon * delete-duplicates of the same chain. 721623d43d4SMatthew Dillon * 722623d43d4SMatthew Dillon * After executing the modify set the original chain's 723623d43d4SMatthew Dillon * bref.mirror_tid to prevent any reentrancy during 724623d43d4SMatthew Dillon * the current flush cycle. 725623d43d4SMatthew Dillon */ 726a4dc31e0SMatthew Dillon hammer2_chain_modify(info->trans, &info->parent, 727a4dc31e0SMatthew Dillon HAMMER2_MODIFY_NO_MODIFY_TID); 728a4dc31e0SMatthew Dillon if (info->parent != chain) { 7290924b3f8SMatthew Dillon /* 7300924b3f8SMatthew Dillon * NOTE: bref.mirror_tid cannot be updated 7310924b3f8SMatthew Dillon * unless MODIFIED is cleared or already 7320924b3f8SMatthew Dillon * clear. 7330924b3f8SMatthew Dillon */ 7340924b3f8SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 7350924b3f8SMatthew Dillon atomic_clear_int(&chain->flags, 7360924b3f8SMatthew Dillon HAMMER2_CHAIN_MODIFIED); 7370924b3f8SMatthew Dillon hammer2_chain_memory_wakeup(chain->pmp); 7380924b3f8SMatthew Dillon hammer2_chain_drop(chain); 7390924b3f8SMatthew Dillon } 740623d43d4SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 741623d43d4SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 742a4dc31e0SMatthew Dillon hammer2_chain_drop(chain); 743a4dc31e0SMatthew Dillon hammer2_chain_ref(info->parent); 744a4dc31e0SMatthew Dillon } 745a4dc31e0SMatthew Dillon chain = info->parent; 746a4dc31e0SMatthew Dillon } 747a7720be7SMatthew Dillon 748925e4ad1SMatthew Dillon KKASSERT(chain == info->parent); 749925e4ad1SMatthew Dillon 75032b800e6SMatthew Dillon /* 7510dea3156SMatthew Dillon * Handle successfully flushed children who are in the MOVED 7520dea3156SMatthew Dillon * state on the way back up the recursion. This can have 7530dea3156SMatthew Dillon * the side-effect of clearing MOVED. 75432b800e6SMatthew Dillon * 755a7720be7SMatthew Dillon * Scan2 may replace info->parent. If it does it will also 756a7720be7SMatthew Dillon * replace the extra ref we made. 757a7720be7SMatthew Dillon * 7580dea3156SMatthew Dillon * Scan2 is non-recursive. 75932b800e6SMatthew Dillon */ 760925e4ad1SMatthew Dillon if (diddeferral != info->diddeferral) { 761731b2a84SMatthew Dillon spin_lock(&core->cst.spin); 7621897c66eSMatthew Dillon } else { 763521b4014SMatthew Dillon KKASSERT(chain == info->parent); 764521b4014SMatthew Dillon KKASSERT(info->domodify == 0 || 765521b4014SMatthew Dillon (chain->flags & HAMMER2_CHAIN_FLUSHED) == 0); 766521b4014SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_FLUSHED); 7671897c66eSMatthew Dillon spin_lock(&core->cst.spin); 76851a0d27cSMatthew Dillon KKASSERT(core->good == 0x1234 && core->sharecnt > 0); 769925e4ad1SMatthew Dillon KKASSERT(info->parent->core == core); 7701897c66eSMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 7711897c66eSMatthew Dillon h2_layer_list, entry) { 7721897c66eSMatthew Dillon info->pass = 1; 7731897c66eSMatthew Dillon ++layer->refs; 77451a0d27cSMatthew Dillon KKASSERT(layer->good == 0xABCD); 7751897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 7760dea3156SMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 7771897c66eSMatthew Dillon info->pass = 2; 7781897c66eSMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 7791897c66eSMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 7801897c66eSMatthew Dillon --layer->refs; 78132b800e6SMatthew Dillon } 782a7720be7SMatthew Dillon } 783a7720be7SMatthew Dillon 784a7720be7SMatthew Dillon /* 785a4dc31e0SMatthew Dillon * info->parent must not have been replaced again 786a7720be7SMatthew Dillon */ 787a4dc31e0SMatthew Dillon KKASSERT(info->parent == chain); 788a4dc31e0SMatthew Dillon 789a7720be7SMatthew Dillon *chainp = chain; 790a7720be7SMatthew Dillon 791a7720be7SMatthew Dillon hammer2_chain_layer_check_locked(chain->hmp, core); 792a7720be7SMatthew Dillon spin_unlock(&core->cst.spin); 793a7720be7SMatthew Dillon 7940924b3f8SMatthew Dillon /* 7950924b3f8SMatthew Dillon * Update the core only if no deferrals occurred. Otherwise 7960924b3f8SMatthew Dillon * we could end up clearing the MOVED bit in the children 7970924b3f8SMatthew Dillon * prematurely. 7980924b3f8SMatthew Dillon */ 7990924b3f8SMatthew Dillon if (diddeferral == info->diddeferral) 8000924b3f8SMatthew Dillon hammer2_flush_core_update(core, info); 8010924b3f8SMatthew Dillon 802a4dc31e0SMatthew Dillon info->domodify = saved_domodify; 803a7720be7SMatthew Dillon KKASSERT(chain->refs > 1); 804925e4ad1SMatthew Dillon } else { 805925e4ad1SMatthew Dillon /* 8060924b3f8SMatthew Dillon * Update the core, no deferrals occurred in this path. 807925e4ad1SMatthew Dillon */ 8080924b3f8SMatthew Dillon hammer2_flush_core_update(core, info); 809a7720be7SMatthew Dillon } 8100924b3f8SMatthew Dillon info->parent = saved_parent; 811a7720be7SMatthew Dillon 812a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 813925e4ad1SMatthew Dillon kprintf("POP %p.%d defer=%d\n", chain, chain->bref.type, diddeferral); 814a7720be7SMatthew Dillon #endif 815ea155208SMatthew Dillon 816ea155208SMatthew Dillon /* 8170924b3f8SMatthew Dillon * Do not flush the chain if there were any deferrals. It will be 8180dea3156SMatthew Dillon * retried later after the deferrals are independently handled. 819925e4ad1SMatthew Dillon * Do not update update_lo or bref.mirror_tid. 8200dea3156SMatthew Dillon */ 821925e4ad1SMatthew Dillon if (diddeferral != info->diddeferral) { 8220dea3156SMatthew Dillon if (hammer2_debug & 0x0008) { 8230dea3156SMatthew Dillon kprintf("%*.*s} %p/%d %04x (deferred)", 8240dea3156SMatthew Dillon info->depth, info->depth, "", 8250dea3156SMatthew Dillon chain, chain->refs, chain->flags); 82632b800e6SMatthew Dillon } 827925e4ad1SMatthew Dillon /* do not update core->update_lo */ 828623d43d4SMatthew Dillon /* do not update bref.mirror_tid */ 82932b800e6SMatthew Dillon return; 83032b800e6SMatthew Dillon } 83132b800e6SMatthew Dillon 8320924b3f8SMatthew Dillon KKASSERT(chain->bref.mirror_tid < info->sync_tid); 8330924b3f8SMatthew Dillon 83432b800e6SMatthew Dillon /* 835623d43d4SMatthew Dillon * Non-deferral path, chain is now deterministically being flushed. 836623d43d4SMatthew Dillon * We've finished running the recursion and the blockref update. 83791abd410SMatthew Dillon * 838623d43d4SMatthew Dillon * update bref.mirror_tid. update_lo has already been updated. 8390924b3f8SMatthew Dillon * 8400924b3f8SMatthew Dillon * After this point we MUST dipose of the MODIFIED bit on chain. 841623d43d4SMatthew Dillon */ 842623d43d4SMatthew Dillon if (chain->bref.mirror_tid < info->sync_tid) 843623d43d4SMatthew Dillon chain->bref.mirror_tid = info->sync_tid; 844623d43d4SMatthew Dillon 845623d43d4SMatthew Dillon /* 846623d43d4SMatthew Dillon * Deal with deleted and destroyed chains on the way back up. 847623d43d4SMatthew Dillon * 8480924b3f8SMatthew Dillon * Otherwise a deleted chain can be optimized by clearing MODIFIED 849623d43d4SMatthew Dillon * without bothering to write it out. 85003faa7d5SMatthew Dillon * 85103faa7d5SMatthew Dillon * NOTE: We optimize this by noting that only 'inode' chains require 85203faa7d5SMatthew Dillon * this treatment. When a file with an open descriptor is 85303faa7d5SMatthew Dillon * deleted only its inode is marked deleted. Other deletions, 85403faa7d5SMatthew Dillon * such as indirect block deletions, will no longer be visible 85503faa7d5SMatthew Dillon * to the live filesystem and do not need to be updated. 85632b800e6SMatthew Dillon */ 857623d43d4SMatthew Dillon if (h2ignore_deleted(info, chain)) { 85803faa7d5SMatthew Dillon /* 85903faa7d5SMatthew Dillon * At the moment we unconditionally set the MOVED bit because 86003faa7d5SMatthew Dillon * there are situations where it might not have been set due 86103faa7d5SMatthew Dillon * to similar delete-destroyed optimizations, and the parent 86203faa7d5SMatthew Dillon * of the parent still may need to be notified of the deletion. 86303faa7d5SMatthew Dillon */ 86493f3933aSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 86593f3933aSMatthew Dillon hammer2_chain_ref(chain); 86693f3933aSMatthew Dillon atomic_set_int(&chain->flags, 86793f3933aSMatthew Dillon HAMMER2_CHAIN_MOVED); 86893f3933aSMatthew Dillon } 8690dea3156SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 870623d43d4SMatthew Dillon #if 0 87103faa7d5SMatthew Dillon /* 872623d43d4SMatthew Dillon * XXX should be able to invalidate the buffer here. 873623d43d4SMatthew Dillon * XXX problem if reused, snapshotted, or reactivated. 87403faa7d5SMatthew Dillon */ 875fdf62707SMatthew Dillon if (chain->dio) { 876fdf62707SMatthew Dillon hammer2_io_setinval(chain->dio, chain->bytes); 8771a7cfe5aSMatthew Dillon } 878fdf62707SMatthew Dillon #endif 8790dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 8800924b3f8SMatthew Dillon hammer2_chain_memory_wakeup(chain->pmp); 8810dea3156SMatthew Dillon hammer2_chain_drop(chain); 8820dea3156SMatthew Dillon } 8830dea3156SMatthew Dillon return; 8840dea3156SMatthew Dillon } 8850dea3156SMatthew Dillon 8860dea3156SMatthew Dillon /* 887ea155208SMatthew Dillon * A degenerate flush might not have flushed anything and thus not 888ea155208SMatthew Dillon * processed modified blocks on the way back up. Detect the case. 889623d43d4SMatthew Dillon * 8900924b3f8SMatthew Dillon * This case can occur when a create, modify, and rename (to a 8910924b3f8SMatthew Dillon * different part of the topology) occurs in the same flush, 8920924b3f8SMatthew Dillon * resulting in a parent which effectively needs no modification. 8930dea3156SMatthew Dillon */ 894a7720be7SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MODIFIED) == 0) { 895ba8a9be0SMatthew Dillon #if 0 89610136ab6SMatthew Dillon kprintf("chain %p.%d %08x recursed but wasn't " 89710136ab6SMatthew Dillon "modified mirr=%016jx " 89810136ab6SMatthew Dillon "update_lo=%016jx synctid=%016jx\n", 89910136ab6SMatthew Dillon chain, chain->bref.type, chain->flags, 90010136ab6SMatthew Dillon chain->bref.mirror_tid, 90110136ab6SMatthew Dillon core->update_lo, info->sync_tid); 902ba8a9be0SMatthew Dillon #endif 90310136ab6SMatthew Dillon #if 0 904925e4ad1SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) == 0) { 905925e4ad1SMatthew Dillon hammer2_chain_ref(chain); 906925e4ad1SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 907925e4ad1SMatthew Dillon } 90810136ab6SMatthew Dillon #endif 9090dea3156SMatthew Dillon return; 910a7720be7SMatthew Dillon } 91110136ab6SMatthew Dillon 9120dea3156SMatthew Dillon /* 9130dea3156SMatthew Dillon * Issue flush. 9140dea3156SMatthew Dillon * 915044541cdSMatthew Dillon * A DELETED node that reaches this point must be flushed for 9160dea3156SMatthew Dillon * synchronization point consistency. 91710136ab6SMatthew Dillon * 91810136ab6SMatthew Dillon * Update bref.mirror_tid, clear MODIFIED, and set MOVED. 9190dea3156SMatthew Dillon * 9200dea3156SMatthew Dillon * The caller will update the parent's reference to this chain 9210dea3156SMatthew Dillon * by testing MOVED as long as the modification was in-bounds. 9220dea3156SMatthew Dillon * 9230dea3156SMatthew Dillon * MOVED is never set on the volume root as there is no parent 9240dea3156SMatthew Dillon * to adjust. 9250dea3156SMatthew Dillon */ 926a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 927623d43d4SMatthew Dillon kprintf("Flush %p.%d %016jx/%d sync_tid=%016jx data=%016jx\n", 928a7720be7SMatthew Dillon chain, chain->bref.type, 929a7720be7SMatthew Dillon chain->bref.key, chain->bref.keybits, 930623d43d4SMatthew Dillon info->sync_tid, chain->bref.data_off); 931a7720be7SMatthew Dillon } 932a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 933a7720be7SMatthew Dillon Debugger("Flush hell"); 934a7720be7SMatthew Dillon } 93510136ab6SMatthew Dillon 9360dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 9370924b3f8SMatthew Dillon hammer2_chain_memory_wakeup(chain->pmp); 9380dea3156SMatthew Dillon 9390dea3156SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_MOVED) || 9401a7cfe5aSMatthew Dillon chain == &hmp->vchain || 9411a7cfe5aSMatthew Dillon chain == &hmp->fchain) { 94232b800e6SMatthew Dillon /* 94310136ab6SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared, 94410136ab6SMatthew Dillon * net -1 ref. 94532b800e6SMatthew Dillon */ 9460dea3156SMatthew Dillon hammer2_chain_drop(chain); 94732b800e6SMatthew Dillon } else { 94832b800e6SMatthew Dillon /* 949d7bfb2cbSMatthew Dillon * Drop the ref from the MODIFIED bit we cleared and 95010136ab6SMatthew Dillon * set a ref for the MOVED bit we are setting. Net 0 refs. 95132b800e6SMatthew Dillon */ 95232b800e6SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED); 95332b800e6SMatthew Dillon } 95432b800e6SMatthew Dillon 95532b800e6SMatthew Dillon /* 95632b800e6SMatthew Dillon * If this is part of a recursive flush we can go ahead and write 957ea155208SMatthew Dillon * out the buffer cache buffer and pass a new bref back up the chain 958ea155208SMatthew Dillon * via the MOVED bit. 95932b800e6SMatthew Dillon * 960ea155208SMatthew Dillon * Volume headers are NOT flushed here as they require special 961ea155208SMatthew Dillon * processing. 96232b800e6SMatthew Dillon */ 96332b800e6SMatthew Dillon switch(chain->bref.type) { 9641a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 9651a7cfe5aSMatthew Dillon hammer2_modify_volume(hmp); 96610136ab6SMatthew Dillon hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid; 9671a7cfe5aSMatthew Dillon break; 96832b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 96932b800e6SMatthew Dillon /* 97010136ab6SMatthew Dillon * The free block table is flushed by hammer2_vfs_sync() 97110136ab6SMatthew Dillon * before it flushes vchain. We must still hold fchain 97210136ab6SMatthew Dillon * locked while copying voldata to volsync, however. 9731a7cfe5aSMatthew Dillon */ 9741a7cfe5aSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 97510136ab6SMatthew Dillon #if 0 976a7720be7SMatthew Dillon if ((hmp->fchain.flags & HAMMER2_CHAIN_MODIFIED) || 977925e4ad1SMatthew Dillon hmp->voldata.freemap_tid < info->trans->sync_tid) { 9781a7cfe5aSMatthew Dillon /* this will modify vchain as a side effect */ 979a7720be7SMatthew Dillon hammer2_chain_t *tmp = &hmp->fchain; 980a7720be7SMatthew Dillon hammer2_chain_flush(info->trans, &tmp); 981a7720be7SMatthew Dillon KKASSERT(tmp == &hmp->fchain); 9821a7cfe5aSMatthew Dillon } 98310136ab6SMatthew Dillon #endif 98410136ab6SMatthew Dillon 98510136ab6SMatthew Dillon /* 98610136ab6SMatthew Dillon * There is no parent to our root vchain and fchain to 98710136ab6SMatthew Dillon * synchronize the bref to, their updated mirror_tid's 98810136ab6SMatthew Dillon * must be synchronized to the volume header. 98910136ab6SMatthew Dillon */ 99010136ab6SMatthew Dillon hmp->voldata.mirror_tid = chain->bref.mirror_tid; 99110136ab6SMatthew Dillon /*hmp->voldata.freemap_tid = hmp->fchain.bref.mirror_tid;*/ 9921a7cfe5aSMatthew Dillon 9931a7cfe5aSMatthew Dillon /* 99432b800e6SMatthew Dillon * The volume header is flushed manually by the syncer, not 99510136ab6SMatthew Dillon * here. All we do here is adjust the crc's. 99632b800e6SMatthew Dillon */ 99732b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 998fdf62707SMatthew Dillon KKASSERT(chain->dio == NULL); 99932b800e6SMatthew Dillon 100032b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 100132b800e6SMatthew Dillon hammer2_icrc32( 100232b800e6SMatthew Dillon (char *)&hmp->voldata + 100332b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 100432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 100532b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 100632b800e6SMatthew Dillon hammer2_icrc32( 100732b800e6SMatthew Dillon (char *)&hmp->voldata + 100832b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 100932b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 101032b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 101132b800e6SMatthew Dillon hammer2_icrc32( 101232b800e6SMatthew Dillon (char *)&hmp->voldata + 101332b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 101432b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 101532b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 10160dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 101793f3933aSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 101832b800e6SMatthew Dillon break; 101932b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 102032b800e6SMatthew Dillon /* 102132b800e6SMatthew Dillon * Data elements have already been flushed via the logical 102232b800e6SMatthew Dillon * file buffer cache. Their hash was set in the bref by 102332b800e6SMatthew Dillon * the vop_write code. 102432b800e6SMatthew Dillon * 1025ea155208SMatthew Dillon * Make sure any device buffer(s) have been flushed out here. 1026ea155208SMatthew Dillon * (there aren't usually any to flush). 102732b800e6SMatthew Dillon */ 1028fdf62707SMatthew Dillon #if 0 1029fdf62707SMatthew Dillon /* XXX */ 1030fdf62707SMatthew Dillon /* chain and chain->bref, NOWAIT operation */ 1031fdf62707SMatthew Dillon #endif 103232b800e6SMatthew Dillon break; 1033512beabdSMatthew Dillon #if 0 103432b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 103532b800e6SMatthew Dillon /* 103632b800e6SMatthew Dillon * Indirect blocks may be in an INITIAL state. Use the 103732b800e6SMatthew Dillon * chain_lock() call to ensure that the buffer has been 103832b800e6SMatthew Dillon * instantiated (even though it is already locked the buffer 103932b800e6SMatthew Dillon * might not have been instantiated). 104032b800e6SMatthew Dillon * 104132b800e6SMatthew Dillon * Only write the buffer out if it is dirty, it is possible 104232b800e6SMatthew Dillon * the operating system had already written out the buffer. 104332b800e6SMatthew Dillon */ 10440dea3156SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 1045fdf62707SMatthew Dillon KKASSERT(chain->dio != NULL); 104632b800e6SMatthew Dillon 104732b800e6SMatthew Dillon chain->data = NULL; 1048fdf62707SMatthew Dillon hammer2_io_bqrelse(&chain->dio); 10490dea3156SMatthew Dillon hammer2_chain_unlock(chain); 105032b800e6SMatthew Dillon break; 1051512beabdSMatthew Dillon #endif 1052512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 10531a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 105491caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 105591caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 10561a7cfe5aSMatthew Dillon /* 10571a7cfe5aSMatthew Dillon * Device-backed. Buffer will be flushed by the sync 10581a7cfe5aSMatthew Dillon * code XXX. 10591a7cfe5aSMatthew Dillon */ 1060512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 10611a7cfe5aSMatthew Dillon break; 106232b800e6SMatthew Dillon default: 106391caa51cSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 106491caa51cSMatthew Dillon panic("hammer2_chain_flush_core: unsupported embedded bref %d", 106591caa51cSMatthew Dillon chain->bref.type); 106691caa51cSMatthew Dillon /* NOT REACHED */ 106791caa51cSMatthew Dillon #if 0 106832b800e6SMatthew Dillon /* 106932b800e6SMatthew Dillon * Embedded elements have to be flushed out. 10701a7cfe5aSMatthew Dillon * (Basically just BREF_TYPE_INODE). 107132b800e6SMatthew Dillon */ 1072512beabdSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 107332b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 1074fdf62707SMatthew Dillon KKASSERT(chain->dio == NULL); 107532b800e6SMatthew Dillon bref = &chain->bref; 107632b800e6SMatthew Dillon 107732b800e6SMatthew Dillon KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0); 10789061bde5SMatthew Dillon KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) == 1079512beabdSMatthew Dillon HAMMER2_CHECK_ISCSI32 || 1080512beabdSMatthew Dillon HAMMER2_DEC_CHECK(chain->bref.methods) == 1081512beabdSMatthew Dillon HAMMER2_CHECK_FREEMAP); 108232b800e6SMatthew Dillon 108332b800e6SMatthew Dillon /* 108432b800e6SMatthew Dillon * The data is embedded, we have to acquire the 108532b800e6SMatthew Dillon * buffer cache buffer and copy the data into it. 108632b800e6SMatthew Dillon */ 1087fdf62707SMatthew Dillon error = hammer2_io_bread(hmp, bref->data_off, chain->bytes, 1088fdf62707SMatthew Dillon &dio); 108932b800e6SMatthew Dillon KKASSERT(error == 0); 1090fdf62707SMatthew Dillon bdata = hammer2_io_data(dio, bref->data_off); 109132b800e6SMatthew Dillon 109232b800e6SMatthew Dillon /* 109332b800e6SMatthew Dillon * Copy the data to the buffer, mark the buffer 109432b800e6SMatthew Dillon * dirty, and convert the chain to unmodified. 109532b800e6SMatthew Dillon */ 109632b800e6SMatthew Dillon bcopy(chain->data, bdata, chain->bytes); 1097fdf62707SMatthew Dillon hammer2_io_bdwrite(&dio); 1098a98aa0b0SMatthew Dillon 1099512beabdSMatthew Dillon switch(HAMMER2_DEC_CHECK(chain->bref.methods)) { 1100512beabdSMatthew Dillon case HAMMER2_CHECK_FREEMAP: 1101512beabdSMatthew Dillon chain->bref.check.freemap.icrc32 = 1102512beabdSMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 1103512beabdSMatthew Dillon break; 1104512beabdSMatthew Dillon case HAMMER2_CHECK_ISCSI32: 110532b800e6SMatthew Dillon chain->bref.check.iscsi32.value = 110632b800e6SMatthew Dillon hammer2_icrc32(chain->data, chain->bytes); 1107512beabdSMatthew Dillon break; 1108512beabdSMatthew Dillon default: 1109512beabdSMatthew Dillon panic("hammer2_flush_core: bad crc type"); 1110512beabdSMatthew Dillon break; /* NOT REACHED */ 1111512beabdSMatthew Dillon } 111232b800e6SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 111332b800e6SMatthew Dillon ++hammer2_iod_meta_write; 111432b800e6SMatthew Dillon else 111532b800e6SMatthew Dillon ++hammer2_iod_indr_write; 111691caa51cSMatthew Dillon #endif 111732b800e6SMatthew Dillon } 111832b800e6SMatthew Dillon } 111932b800e6SMatthew Dillon 112032b800e6SMatthew Dillon /* 11210dea3156SMatthew Dillon * Flush helper scan1 (recursive) 11220dea3156SMatthew Dillon * 11230dea3156SMatthew Dillon * Flushes the children of the caller's chain (parent) and updates 1124ea155208SMatthew Dillon * the blockref, restricted by sync_tid. 11250dea3156SMatthew Dillon * 11260dea3156SMatthew Dillon * Ripouts during the loop should not cause any problems. Because we are 11270dea3156SMatthew Dillon * flushing to a synchronization point, modification races will occur after 11280dea3156SMatthew Dillon * sync_tid and do not have to be flushed anyway. 1129ea155208SMatthew Dillon * 1130ea155208SMatthew Dillon * It is also ok if the parent is chain_duplicate()'d while unlocked because 1131ea155208SMatthew Dillon * the delete/duplication will install a delete_tid that is still larger than 1132ea155208SMatthew Dillon * our current sync_tid. 1133925e4ad1SMatthew Dillon * 1134925e4ad1SMatthew Dillon * WARNING! If we do not call chain_flush_core we must update bref.mirror_tid 1135925e4ad1SMatthew Dillon * ourselves. 113632b800e6SMatthew Dillon */ 11370dea3156SMatthew Dillon static int 11380dea3156SMatthew Dillon hammer2_chain_flush_scan1(hammer2_chain_t *child, void *data) 113932b800e6SMatthew Dillon { 11400dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 1141cd189b1eSMatthew Dillon hammer2_trans_t *trans = info->trans; 11420dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 11430dea3156SMatthew Dillon int diddeferral; 114432b800e6SMatthew Dillon 1145925e4ad1SMatthew Dillon if (hammer2_debug & 0x80000) 1146925e4ad1SMatthew Dillon Debugger("hell3"); 1147925e4ad1SMatthew Dillon diddeferral = info->diddeferral; 1148925e4ad1SMatthew Dillon 11490dea3156SMatthew Dillon /* 1150a4dc31e0SMatthew Dillon * Child is beyond the flush synchronization zone, don't persue. 1151a4dc31e0SMatthew Dillon * Remember that modifications generally delete-duplicate so if the 1152a4dc31e0SMatthew Dillon * sub-tree is dirty another child will get us there. But not this 1153a4dc31e0SMatthew Dillon * one. 1154a4dc31e0SMatthew Dillon * 1155a4dc31e0SMatthew Dillon * Or MODIFIED is not set and child is already fully synchronized 1156a4dc31e0SMatthew Dillon * with its sub-tree. Don't persue. 115710136ab6SMatthew Dillon * 115810136ab6SMatthew Dillon * (child can never be fchain or vchain so a special check isn't 115910136ab6SMatthew Dillon * needed). 11600dea3156SMatthew Dillon */ 1161a4dc31e0SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1162a4dc31e0SMatthew Dillon KKASSERT(child->delete_tid >= child->modify_tid); 1163925e4ad1SMatthew Dillon /* do not update child->core->update_lo, core not flushed */ 1164925e4ad1SMatthew Dillon /* do not update core->update_lo, there may be another path */ 1165623d43d4SMatthew Dillon /* do not update mirror_tid, scan2 will ignore chain */ 11660dea3156SMatthew Dillon return (0); 116732b800e6SMatthew Dillon } 1168cd189b1eSMatthew Dillon 1169a4dc31e0SMatthew Dillon /* 1170a4dc31e0SMatthew Dillon * We must ref the child before unlocking the spinlock. 1171a4dc31e0SMatthew Dillon * 1172a4dc31e0SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 1173a4dc31e0SMatthew Dillon * unlock it in order to lock the child. 1174a4dc31e0SMatthew Dillon */ 1175ea155208SMatthew Dillon hammer2_chain_ref(child); 11760dea3156SMatthew Dillon spin_unlock(&parent->core->cst.spin); 11770dea3156SMatthew Dillon 11780dea3156SMatthew Dillon hammer2_chain_unlock(parent); 11790dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 11800dea3156SMatthew Dillon 118103faa7d5SMatthew Dillon /* 118203faa7d5SMatthew Dillon * No recursion needed if neither the child or anything under it 118303faa7d5SMatthew Dillon * was messed with. 118403faa7d5SMatthew Dillon */ 1185925e4ad1SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 1186925e4ad1SMatthew Dillon child->core->update_lo >= info->sync_tid) { 11870924b3f8SMatthew Dillon KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); 1188623d43d4SMatthew Dillon if (child->bref.mirror_tid < info->sync_tid) 1189623d43d4SMatthew Dillon child->bref.mirror_tid = info->sync_tid; 1190925e4ad1SMatthew Dillon goto skip; 1191925e4ad1SMatthew Dillon } 1192925e4ad1SMatthew Dillon 1193925e4ad1SMatthew Dillon /* 1194044541cdSMatthew Dillon * XXX delete child if parent is deleted. Propagate deletion 1195044541cdSMatthew Dillon * downward. TODO 1196044541cdSMatthew Dillon */ 1197044541cdSMatthew Dillon 1198044541cdSMatthew Dillon 1199044541cdSMatthew Dillon /* 120003faa7d5SMatthew Dillon * Re-check original pre-lock conditions after locking. 1201925e4ad1SMatthew Dillon */ 1202925e4ad1SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1203925e4ad1SMatthew Dillon hammer2_chain_unlock(child); 1204925e4ad1SMatthew Dillon hammer2_chain_drop(child); 1205925e4ad1SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1206925e4ad1SMatthew Dillon spin_lock(&parent->core->cst.spin); 1207925e4ad1SMatthew Dillon return (0); 1208925e4ad1SMatthew Dillon } 1209925e4ad1SMatthew Dillon 1210925e4ad1SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MODIFIED) == 0 && 1211925e4ad1SMatthew Dillon child->core->update_lo >= info->sync_tid) { 12120924b3f8SMatthew Dillon KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); 1213623d43d4SMatthew Dillon if (child->bref.mirror_tid < info->sync_tid) 1214623d43d4SMatthew Dillon child->bref.mirror_tid = info->sync_tid; 1215925e4ad1SMatthew Dillon goto skip; 1216925e4ad1SMatthew Dillon } 1217925e4ad1SMatthew Dillon 12180dea3156SMatthew Dillon /* 12190dea3156SMatthew Dillon * Recurse and collect deferral data. 12200dea3156SMatthew Dillon */ 12210dea3156SMatthew Dillon ++info->depth; 1222a7720be7SMatthew Dillon hammer2_chain_flush_core(info, &child); 12230dea3156SMatthew Dillon --info->depth; 12240dea3156SMatthew Dillon 1225a4dc31e0SMatthew Dillon skip: 1226a4dc31e0SMatthew Dillon /* 1227a4dc31e0SMatthew Dillon * Check the conditions that could cause SCAN2 to modify the parent. 1228a4dc31e0SMatthew Dillon * Modify the parent here instead of in SCAN2, which would cause 1229a4dc31e0SMatthew Dillon * rollup chicken-and-egg races. 1230925e4ad1SMatthew Dillon * 1231925e4ad1SMatthew Dillon * Scan2 is expected to update bref.mirror_tid in the domodify case, 1232925e4ad1SMatthew Dillon * but will skip the child otherwise giving us the responsibility to 1233925e4ad1SMatthew Dillon * update bref.mirror_tid. 1234623d43d4SMatthew Dillon * 1235623d43d4SMatthew Dillon * WARNING! Do NOT update the child's bref.mirror_tid right here, 1236623d43d4SMatthew Dillon * even if there was no deferral. Doing so would cause 1237623d43d4SMatthew Dillon * confusion with the child's block array state in a 1238623d43d4SMatthew Dillon * future flush. 1239a4dc31e0SMatthew Dillon */ 1240623d43d4SMatthew Dillon if (h2ignore_deleted(info, parent)) { 124103faa7d5SMatthew Dillon /* 124203faa7d5SMatthew Dillon * Special optimization matching similar tests done in 1243fdf62707SMatthew Dillon * flush_core, scan1, and scan2. Avoid updating the block 1244fdf62707SMatthew Dillon * table in the parent if the parent is no longer visible. 124503faa7d5SMatthew Dillon */ 124603faa7d5SMatthew Dillon ; 124703faa7d5SMatthew Dillon } else if (child->delete_tid <= trans->sync_tid && 1248a4dc31e0SMatthew Dillon child->delete_tid > parent->bref.mirror_tid && 1249a4dc31e0SMatthew Dillon child->modify_tid <= parent->bref.mirror_tid) { 125003faa7d5SMatthew Dillon info->domodify = 1; 1251a4dc31e0SMatthew Dillon } else if (child->delete_tid > trans->sync_tid && 1252a4dc31e0SMatthew Dillon child->modify_tid > parent->bref.mirror_tid) { 1253925e4ad1SMatthew Dillon info->domodify = 1; /* base insertion */ 1254a4dc31e0SMatthew Dillon } 1255a4dc31e0SMatthew Dillon 1256a4dc31e0SMatthew Dillon /* 1257a4dc31e0SMatthew Dillon * Relock to continue the loop 1258a4dc31e0SMatthew Dillon */ 1259a4dc31e0SMatthew Dillon hammer2_chain_unlock(child); 1260ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1261a4dc31e0SMatthew Dillon hammer2_chain_drop(child); 1262a4dc31e0SMatthew Dillon KKASSERT(info->parent == parent); 12630dea3156SMatthew Dillon 12640dea3156SMatthew Dillon spin_lock(&parent->core->cst.spin); 12650dea3156SMatthew Dillon return (0); 12660dea3156SMatthew Dillon } 12670dea3156SMatthew Dillon 12680dea3156SMatthew Dillon /* 12690dea3156SMatthew Dillon * Flush helper scan2 (non-recursive) 12700dea3156SMatthew Dillon * 12710dea3156SMatthew Dillon * This pass on a chain's children propagates any MOVED or DELETED 1272ea155208SMatthew Dillon * elements back up the chain towards the root after those elements have 1273ea155208SMatthew Dillon * been fully flushed. Unlike scan1, this function is NOT recursive and 1274ea155208SMatthew Dillon * the parent remains locked across the entire scan. 12750dea3156SMatthew Dillon * 12761897c66eSMatthew Dillon * SCAN2 is called twice, once with pass set to 1 and once with it set to 2. 12771897c66eSMatthew Dillon * We have to do this so base[] elements can be deleted in pass 1 to make 12781897c66eSMatthew Dillon * room for adding new elements in pass 2. 12791897c66eSMatthew Dillon * 128091abd410SMatthew Dillon * This function also rolls up storage statistics. 128191abd410SMatthew Dillon * 12821897c66eSMatthew Dillon * NOTE! A deletion is a visbility issue, there can still be references to 1283ea155208SMatthew Dillon * deleted elements (for example, to an unlinked file which is still 1284ea155208SMatthew Dillon * open), and there can also be multiple chains pointing to the same 1285ea155208SMatthew Dillon * bref where some are deleted and some are not (for example due to 1286ea155208SMatthew Dillon * a rename). So a chain marked for deletion is basically considered 1287a864c5d9SMatthew Dillon * to be live until it is explicitly destroyed or until its ref-count 1288a864c5d9SMatthew Dillon * reaches zero (also implying that MOVED and MODIFIED are clear). 1289a4dc31e0SMatthew Dillon * 1290a4dc31e0SMatthew Dillon * NOTE! Info->parent will be locked but will only be instantiated/modified 1291a4dc31e0SMatthew Dillon * if it is either MODIFIED or if scan1 determined that block table 1292a4dc31e0SMatthew Dillon * updates will occur. 1293925e4ad1SMatthew Dillon * 1294925e4ad1SMatthew Dillon * NOTE! SCAN2 is responsible for updating child->bref.mirror_tid only in 1295925e4ad1SMatthew Dillon * the case where it modifies the parent (does a base insertion 1296925e4ad1SMatthew Dillon * or deletion). SCAN1 handled all other cases. 12970dea3156SMatthew Dillon */ 12980dea3156SMatthew Dillon static int 12990dea3156SMatthew Dillon hammer2_chain_flush_scan2(hammer2_chain_t *child, void *data) 13000dea3156SMatthew Dillon { 13010dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 13020dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 1303731b2a84SMatthew Dillon hammer2_chain_core_t *above = child->above; 1304a5913bdfSMatthew Dillon hammer2_mount_t *hmp = child->hmp; 1305a864c5d9SMatthew Dillon hammer2_trans_t *trans = info->trans; 13060dea3156SMatthew Dillon hammer2_blockref_t *base; 13070dea3156SMatthew Dillon int count; 13081897c66eSMatthew Dillon int ok; 1309ea155208SMatthew Dillon 1310a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1311a4dc31e0SMatthew Dillon kprintf("SCAN2 %p.%d %08x mod=%016jx del=%016jx trans=%016jx\n", child, child->bref.type, child->flags, child->modify_tid, child->delete_tid, info->trans->sync_tid); 1312a4dc31e0SMatthew Dillon #endif 1313ea155208SMatthew Dillon /* 1314731b2a84SMatthew Dillon * Ignore children created after our flush point, treating them as 1315cd189b1eSMatthew Dillon * if they did not exist). These children will not cause the parent 1316cd189b1eSMatthew Dillon * to be updated. 1317731b2a84SMatthew Dillon * 1318a7720be7SMatthew Dillon * Children deleted after our flush point are treated as having been 1319925e4ad1SMatthew Dillon * created for the purposes of the flush. The parent's update_hi 1320925e4ad1SMatthew Dillon * will already be higher than our trans->sync_tid so the path for 1321925e4ad1SMatthew Dillon * the next flush is left intact. 1322a7720be7SMatthew Dillon * 1323cd189b1eSMatthew Dillon * When we encounter such children and the parent chain has not been 1324cd189b1eSMatthew Dillon * deleted, delete/duplicated, or delete/duplicated-for-move, then 1325cd189b1eSMatthew Dillon * the parent may be used to funnel through several flush points. 1326a7720be7SMatthew Dillon * These chains will still be visible to later flushes due to having 1327925e4ad1SMatthew Dillon * a higher update_hi than we can set in the current flush. 1328731b2a84SMatthew Dillon */ 1329731b2a84SMatthew Dillon if (child->modify_tid > trans->sync_tid) { 1330a4dc31e0SMatthew Dillon KKASSERT(child->delete_tid >= child->modify_tid); 1331731b2a84SMatthew Dillon goto finalize; 1332731b2a84SMatthew Dillon } 1333731b2a84SMatthew Dillon 1334925e4ad1SMatthew Dillon #if 0 1335731b2a84SMatthew Dillon /* 1336731b2a84SMatthew Dillon * Ignore children which have not changed. The parent's block table 1337731b2a84SMatthew Dillon * is already correct. 13381897c66eSMatthew Dillon * 13391897c66eSMatthew Dillon * XXX The MOVED bit is only cleared when all multi-homed parents 13401897c66eSMatthew Dillon * have flushed, creating a situation where a re-flush can occur 13411897c66eSMatthew Dillon * via a parent which has already flushed. The hammer2_base_*() 13421897c66eSMatthew Dillon * functions currently have a hack to deal with this case but 13431897c66eSMatthew Dillon * we need something better. 1344ea155208SMatthew Dillon */ 1345ea155208SMatthew Dillon if ((child->flags & HAMMER2_CHAIN_MOVED) == 0) { 1346a4dc31e0SMatthew Dillon KKASSERT((child->flags & HAMMER2_CHAIN_MODIFIED) == 0); 13470dea3156SMatthew Dillon goto finalize; 13480dea3156SMatthew Dillon } 1349925e4ad1SMatthew Dillon #endif 1350ea155208SMatthew Dillon 13511897c66eSMatthew Dillon /* 13521897c66eSMatthew Dillon * Make sure child is referenced before we unlock. 13531897c66eSMatthew Dillon */ 1354ea155208SMatthew Dillon hammer2_chain_ref(child); 1355731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 13560dea3156SMatthew Dillon 13570dea3156SMatthew Dillon /* 13581897c66eSMatthew Dillon * Parent reflushed after the child has passed them by should skip 13591897c66eSMatthew Dillon * due to the modify_tid test. XXX 13600dea3156SMatthew Dillon */ 13610dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_NEVER); 13621897c66eSMatthew Dillon KKASSERT(child->above == above); 13631897c66eSMatthew Dillon KKASSERT(parent->core == above); 13640dea3156SMatthew Dillon 13650dea3156SMatthew Dillon /* 13660dea3156SMatthew Dillon * The parent's blockref to the child must be deleted or updated. 13670dea3156SMatthew Dillon * 1368044541cdSMatthew Dillon * This point is not reached on successful DELETED optimizations 1369a864c5d9SMatthew Dillon * but can be reached on recursive deletions and restricted flushes. 1370ea155208SMatthew Dillon * 1371a7720be7SMatthew Dillon * The chain_modify here may delete-duplicate the block. This can 1372a7720be7SMatthew Dillon * cause a multitude of issues if the block was already modified 1373a7720be7SMatthew Dillon * by a later (post-flush) transaction. Primarily blockrefs in 1374a7720be7SMatthew Dillon * the later block can be out-of-date, so if the situation occurs 1375a7720be7SMatthew Dillon * we can't throw away the MOVED bit on the current blocks until 1376a7720be7SMatthew Dillon * the later blocks are flushed (so as to be able to regenerate all 1377a7720be7SMatthew Dillon * the changes that were made). 1378a7720be7SMatthew Dillon * 13794a59bd3eSMatthew Dillon * Because flushes are ordered we do not have to make a 13804a59bd3eSMatthew Dillon * modify/duplicate of indirect blocks. That is, the flush 13814a59bd3eSMatthew Dillon * code does not have to kmalloc or duplicate anything. We 13824a59bd3eSMatthew Dillon * can adjust the indirect block table in-place and reuse the 13834a59bd3eSMatthew Dillon * chain. It IS possible that the chain has already been duplicated 13844a59bd3eSMatthew Dillon * or may wind up being duplicated on-the-fly by modifying code 13854a59bd3eSMatthew Dillon * on the frontend. We simply use the original and ignore such 13864a59bd3eSMatthew Dillon * chains. However, it does mean we can't clear the MOVED bit. 13874a59bd3eSMatthew Dillon * 1388ea155208SMatthew Dillon * XXX recursive deletions not optimized. 13890dea3156SMatthew Dillon */ 13900dea3156SMatthew Dillon 13910dea3156SMatthew Dillon switch(parent->bref.type) { 13920dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 1393ea155208SMatthew Dillon /* 13943f5b8b3bSMatthew Dillon * Access the inode's block array. However, there is no 13953f5b8b3bSMatthew Dillon * block array if the inode is flagged DIRECTDATA. The 13963f5b8b3bSMatthew Dillon * DIRECTDATA case typicaly only occurs when a hardlink has 13973f5b8b3bSMatthew Dillon * been shifted up the tree and the original inode gets 13983f5b8b3bSMatthew Dillon * replaced with an OBJTYPE_HARDLINK placeholding inode. 1399ea155208SMatthew Dillon */ 14003f5b8b3bSMatthew Dillon if (parent->data && 14013f5b8b3bSMatthew Dillon (parent->data->ipdata.op_flags & 14023f5b8b3bSMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0) { 14030dea3156SMatthew Dillon base = &parent->data->ipdata.u.blockset.blockref[0]; 14043f5b8b3bSMatthew Dillon } else { 1405a4dc31e0SMatthew Dillon base = NULL; 14063f5b8b3bSMatthew Dillon } 14070dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 14080dea3156SMatthew Dillon break; 14090dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 14101a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1411a4dc31e0SMatthew Dillon if (parent->data) 141293f3933aSMatthew Dillon base = &parent->data->npdata[0]; 1413a4dc31e0SMatthew Dillon else 14140dea3156SMatthew Dillon base = NULL; 14150dea3156SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 14160dea3156SMatthew Dillon break; 14170dea3156SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 14180dea3156SMatthew Dillon base = &hmp->voldata.sroot_blockset.blockref[0]; 14190dea3156SMatthew Dillon count = HAMMER2_SET_COUNT; 14200dea3156SMatthew Dillon break; 14211a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 142293f3933aSMatthew Dillon base = &parent->data->npdata[0]; 14231a7cfe5aSMatthew Dillon count = HAMMER2_SET_COUNT; 14241a7cfe5aSMatthew Dillon break; 14250dea3156SMatthew Dillon default: 14260dea3156SMatthew Dillon base = NULL; 14270dea3156SMatthew Dillon count = 0; 14281897c66eSMatthew Dillon panic("hammer2_chain_flush_scan2: " 14290dea3156SMatthew Dillon "unrecognized blockref type: %d", 14300dea3156SMatthew Dillon parent->bref.type); 14310dea3156SMatthew Dillon } 14320dea3156SMatthew Dillon 14330dea3156SMatthew Dillon /* 1434044541cdSMatthew Dillon * Don't bother updating a deleted parent's blockrefs. 14351897c66eSMatthew Dillon * 14361897c66eSMatthew Dillon * Otherwise, we need to be COUNTEDBREFS synchronized for the 14371897c66eSMatthew Dillon * hammer2_base_*() functions. 143803faa7d5SMatthew Dillon * 143903faa7d5SMatthew Dillon * This test must match the similar one in flush_core. 14401897c66eSMatthew Dillon */ 1441a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1442a4dc31e0SMatthew Dillon kprintf("SCAN2 base=%p pass=%d PARENT %p.%d DTID=%016jx SYNC=%016jx\n", 1443a4dc31e0SMatthew Dillon base, 1444a4dc31e0SMatthew Dillon info->pass, parent, parent->bref.type, parent->delete_tid, trans->sync_tid); 1445a4dc31e0SMatthew Dillon #endif 1446623d43d4SMatthew Dillon if (h2ignore_deleted(info, parent)) 14471897c66eSMatthew Dillon base = NULL; 14481897c66eSMatthew Dillon 14491897c66eSMatthew Dillon /* 14500dea3156SMatthew Dillon * Update the parent's blockref table and propagate mirror_tid. 1451d5fabb70SMatthew Dillon * 1452731b2a84SMatthew Dillon * NOTE! Children with modify_tid's beyond our flush point are 1453731b2a84SMatthew Dillon * considered to not exist for the purposes of updating the 1454731b2a84SMatthew Dillon * parent's blockref array. 1455d5fabb70SMatthew Dillon * 1456925e4ad1SMatthew Dillon * NOTE! SCAN1 has already put the parent in a modified state 1457a4dc31e0SMatthew Dillon * so if it isn't we panic. 1458a4dc31e0SMatthew Dillon * 1459a7720be7SMatthew Dillon * NOTE! chain->modify_tid vs chain->bref.modify_tid. The chain's 1460a7720be7SMatthew Dillon * internal modify_tid is always updated based on creation 1461a7720be7SMatthew Dillon * or delete-duplicate. However, the bref.modify_tid is NOT 1462a7720be7SMatthew Dillon * updated due to simple blockref updates. 14630dea3156SMatthew Dillon */ 1464a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1465a7720be7SMatthew Dillon kprintf("chain %p->%p pass %d trans %016jx sync %p.%d %016jx/%d C=%016jx D=%016jx PMIRROR %016jx\n", 1466a7720be7SMatthew Dillon parent, child, 1467a7720be7SMatthew Dillon info->pass, trans->sync_tid, 1468a7720be7SMatthew Dillon child, child->bref.type, 1469a7720be7SMatthew Dillon child->bref.key, child->bref.keybits, 1470a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, parent->bref.mirror_tid); 1471a7720be7SMatthew Dillon #endif 1472a7720be7SMatthew Dillon 14731897c66eSMatthew Dillon if (info->pass == 1 && child->delete_tid <= trans->sync_tid) { 14741897c66eSMatthew Dillon /* 1475a7720be7SMatthew Dillon * Deleting. The block array is expected to contain the 1476a7720be7SMatthew Dillon * child's entry if: 1477a7720be7SMatthew Dillon * 1478a7720be7SMatthew Dillon * (1) The deletion occurred after the parent's block table 1479a7720be7SMatthew Dillon * was last synchronized (delete_tid), and 1480a7720be7SMatthew Dillon * 1481a7720be7SMatthew Dillon * (2) The creation occurred before or during the parent's 1482a7720be7SMatthew Dillon * last block table synchronization. 14831897c66eSMatthew Dillon */ 1484a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1485623d43d4SMatthew Dillon kprintf("S2A %p.%d b=%p d/b=%016jx/%016jx m/b=%016jx/%016jx\n", 1486623d43d4SMatthew Dillon child, child->bref.type, 1487623d43d4SMatthew Dillon base, child->delete_tid, parent->bref.mirror_tid, 1488a4dc31e0SMatthew Dillon child->modify_tid, parent->bref.mirror_tid); 1489a4dc31e0SMatthew Dillon #endif 1490a7720be7SMatthew Dillon if (base && 1491a7720be7SMatthew Dillon child->delete_tid > parent->bref.mirror_tid && 1492a7720be7SMatthew Dillon child->modify_tid <= parent->bref.mirror_tid) { 1493925e4ad1SMatthew Dillon KKASSERT(child->flags & HAMMER2_CHAIN_MOVED); 149410136ab6SMatthew Dillon KKASSERT(parent->modify_tid == trans->sync_tid || 149510136ab6SMatthew Dillon (parent == &hmp->vchain || 149610136ab6SMatthew Dillon parent == &hmp->fchain)); 149791abd410SMatthew Dillon hammer2_rollup_stats(parent, child, -1); 14981897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1499a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1500a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1501a7720be7SMatthew Dillon "flg=%08x %016jx/%d delete\n", 1502a7720be7SMatthew Dillon trans->sync_tid, 1503a7720be7SMatthew Dillon parent, parent->bref.type, 1504a7720be7SMatthew Dillon child, child->bref.type, 1505a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1506a7720be7SMatthew Dillon child->flags, 1507a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1508a7720be7SMatthew Dillon #endif 1509623d43d4SMatthew Dillon hammer2_base_delete(trans, parent, base, count, 1510a7720be7SMatthew Dillon &info->cache_index, child); 15111897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 151293f3933aSMatthew Dillon } 15131897c66eSMatthew Dillon } else if (info->pass == 2 && child->delete_tid > trans->sync_tid) { 15141897c66eSMatthew Dillon /* 1515a7720be7SMatthew Dillon * Inserting. The block array is expected to NOT contain 1516a7720be7SMatthew Dillon * the child's entry if: 1517a7720be7SMatthew Dillon * 1518a7720be7SMatthew Dillon * (1) The creation occurred after the parent's block table 1519a7720be7SMatthew Dillon * was last synchronized (modify_tid), and 1520a7720be7SMatthew Dillon * 1521a7720be7SMatthew Dillon * (2) The child is not being deleted in the same 1522a7720be7SMatthew Dillon * transaction. 15231897c66eSMatthew Dillon */ 1524623d43d4SMatthew Dillon #if FLUSH_DEBUG 1525623d43d4SMatthew Dillon kprintf("S2B %p.%d b=%p d/b=%016jx/%016jx m/b=%016jx/%016jx\n", 1526623d43d4SMatthew Dillon child, child->bref.type, 1527623d43d4SMatthew Dillon base, child->delete_tid, parent->bref.mirror_tid, 1528623d43d4SMatthew Dillon child->modify_tid, parent->bref.mirror_tid); 1529623d43d4SMatthew Dillon #endif 1530a7720be7SMatthew Dillon if (base && 1531a4dc31e0SMatthew Dillon child->modify_tid > parent->bref.mirror_tid) { 1532925e4ad1SMatthew Dillon KKASSERT(child->flags & HAMMER2_CHAIN_MOVED); 153310136ab6SMatthew Dillon KKASSERT(parent->modify_tid == trans->sync_tid || 153410136ab6SMatthew Dillon (parent == &hmp->vchain || 153510136ab6SMatthew Dillon parent == &hmp->fchain)); 15361897c66eSMatthew Dillon hammer2_rollup_stats(parent, child, 1); 15371897c66eSMatthew Dillon spin_lock(&above->cst.spin); 1538a4dc31e0SMatthew Dillon #if FLUSH_DEBUG 1539a7720be7SMatthew Dillon kprintf("trans %jx parent %p.%d child %p.%d m/d %016jx/%016jx " 1540a7720be7SMatthew Dillon "flg=%08x %016jx/%d insert\n", 1541a7720be7SMatthew Dillon trans->sync_tid, 1542a7720be7SMatthew Dillon parent, parent->bref.type, 1543a7720be7SMatthew Dillon child, child->bref.type, 1544a7720be7SMatthew Dillon child->modify_tid, child->delete_tid, 1545a7720be7SMatthew Dillon child->flags, 1546a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1547a7720be7SMatthew Dillon #endif 1548623d43d4SMatthew Dillon hammer2_base_insert(trans, parent, base, count, 1549a7720be7SMatthew Dillon &info->cache_index, child); 15501897c66eSMatthew Dillon spin_unlock(&above->cst.spin); 155193f3933aSMatthew Dillon } 1552623d43d4SMatthew Dillon } else if (info->pass == 3 && 1553623d43d4SMatthew Dillon (child->delete_tid == HAMMER2_MAX_TID || 1554623d43d4SMatthew Dillon child->delete_tid <= trans->sync_tid) && 1555623d43d4SMatthew Dillon (child->flags & HAMMER2_CHAIN_MOVED)) { 15560dea3156SMatthew Dillon /* 1557623d43d4SMatthew Dillon * We can't clear the MOVED bit on children whos modify_tid 1558623d43d4SMatthew Dillon * is beyond our current trans (was tested at top of scan2), 1559623d43d4SMatthew Dillon * or on deleted children which have not yet been flushed 1560623d43d4SMatthew Dillon * (handled above). 1561623d43d4SMatthew Dillon * 1562623d43d4SMatthew Dillon * Scan all parents of this child and determine if any of 1563623d43d4SMatthew Dillon * them still need the child's MOVED bit. 15640dea3156SMatthew Dillon */ 1565cd189b1eSMatthew Dillon hammer2_chain_t *scan; 1566a7720be7SMatthew Dillon 1567a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1568a7720be7SMatthew Dillon kprintf("CHECKMOVED %p (parent=%p)", child, parent); 1569731b2a84SMatthew Dillon 1570925e4ad1SMatthew Dillon ok = 1; 1571731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 15721897c66eSMatthew Dillon TAILQ_FOREACH(scan, &above->ownerq, core_entry) { 157309dd2dfeSMatthew Dillon /* 1574a4dc31e0SMatthew Dillon * Can't clear child's MOVED until all parent's have 1575a4dc31e0SMatthew Dillon * synchronized with it. 1576a7720be7SMatthew Dillon * 1577623d43d4SMatthew Dillon * Ignore deleted parents as-of this flush TID. 1578623d43d4SMatthew Dillon * Ignore the current parent being flushed. 157909dd2dfeSMatthew Dillon */ 1580623d43d4SMatthew Dillon if (h2ignore_deleted(info, scan)) 1581623d43d4SMatthew Dillon continue; 1582623d43d4SMatthew Dillon if (scan == parent) 158309dd2dfeSMatthew Dillon continue; 1584a4dc31e0SMatthew Dillon 1585a4dc31e0SMatthew Dillon /* 1586623d43d4SMatthew Dillon * For parents not already synchronized check to see 1587623d43d4SMatthew Dillon * if the flush has gotten past them yet or not. 15880924b3f8SMatthew Dillon * 15890924b3f8SMatthew Dillon * This must roughly mimic the tests that 15900924b3f8SMatthew Dillon * hammer2_chain_flush_core() runs or we could leave 15910924b3f8SMatthew Dillon * children hanging around with MOVED set and cause 15920924b3f8SMatthew Dillon * a memory leak. 1593a4dc31e0SMatthew Dillon */ 1594623d43d4SMatthew Dillon if (scan->bref.mirror_tid >= trans->sync_tid) 1595623d43d4SMatthew Dillon continue; 15960924b3f8SMatthew Dillon if (scan->bref.mirror_tid >= above->update_hi) 15970924b3f8SMatthew Dillon continue; 1598623d43d4SMatthew Dillon 1599623d43d4SMatthew Dillon if (hammer2_debug & 0x4000) { 1600a7720be7SMatthew Dillon kprintf("(fail scan %p %016jx/%016jx)", 1601a7720be7SMatthew Dillon scan, scan->bref.mirror_tid, 1602a7720be7SMatthew Dillon child->modify_tid); 1603731b2a84SMatthew Dillon } 1604623d43d4SMatthew Dillon ok = 0; 1605623d43d4SMatthew Dillon break; 1606731b2a84SMatthew Dillon } 1607a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1608a7720be7SMatthew Dillon kprintf("\n"); 1609731b2a84SMatthew Dillon spin_unlock(&above->cst.spin); 1610a4dc31e0SMatthew Dillon 1611a4dc31e0SMatthew Dillon /* 1612a4dc31e0SMatthew Dillon * Can we finally clear MOVED? 1613a4dc31e0SMatthew Dillon */ 1614731b2a84SMatthew Dillon if (ok) { 1615a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1616a7720be7SMatthew Dillon kprintf("clear moved %p.%d %016jx/%d\n", 1617a7720be7SMatthew Dillon child, child->bref.type, 1618a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1619623d43d4SMatthew Dillon atomic_clear_int(&child->flags, HAMMER2_CHAIN_MOVED); 16200924b3f8SMatthew Dillon if (child->flags & HAMMER2_CHAIN_MODIFIED) { 16210924b3f8SMatthew Dillon kprintf("modified child %p all parents updated\n", 16220924b3f8SMatthew Dillon child); 16230924b3f8SMatthew Dillon atomic_clear_int(&child->flags, 16240924b3f8SMatthew Dillon HAMMER2_CHAIN_MODIFIED); 16250924b3f8SMatthew Dillon hammer2_chain_memory_wakeup(child->pmp); 16260924b3f8SMatthew Dillon hammer2_chain_drop(child);/* cleared MODIFIED */ 16270924b3f8SMatthew Dillon } 16280924b3f8SMatthew Dillon hammer2_chain_drop(child); /* cleared MOVED */ 1629a7720be7SMatthew Dillon } else { 1630a7720be7SMatthew Dillon if (hammer2_debug & 0x4000) 1631a7720be7SMatthew Dillon kprintf("keep moved %p.%d %016jx/%d\n", 1632a7720be7SMatthew Dillon child, child->bref.type, 1633a7720be7SMatthew Dillon child->bref.key, child->bref.keybits); 1634ea155208SMatthew Dillon } 16350dea3156SMatthew Dillon } 16360dea3156SMatthew Dillon 16370dea3156SMatthew Dillon /* 16380dea3156SMatthew Dillon * Unlock the child. This can wind up dropping the child's 16390dea3156SMatthew Dillon * last ref, removing it from the parent's RB tree, and deallocating 16400dea3156SMatthew Dillon * the structure. The RB_SCAN() our caller is doing handles the 16410dea3156SMatthew Dillon * situation. 16420dea3156SMatthew Dillon */ 16430dea3156SMatthew Dillon hammer2_chain_unlock(child); 1644ea155208SMatthew Dillon hammer2_chain_drop(child); 1645731b2a84SMatthew Dillon spin_lock(&above->cst.spin); 16460dea3156SMatthew Dillon 16470dea3156SMatthew Dillon /* 1648a7720be7SMatthew Dillon * The parent may have been delete-duplicated. 16490dea3156SMatthew Dillon */ 1650a7720be7SMatthew Dillon info->parent = parent; 16510dea3156SMatthew Dillon finalize: 16520dea3156SMatthew Dillon return (0); 165332b800e6SMatthew Dillon } 165491abd410SMatthew Dillon 16550924b3f8SMatthew Dillon /* 16560924b3f8SMatthew Dillon * Update core->update_lo and attempt to clear the MOVED bit 16570924b3f8SMatthew Dillon * for its children. 16580924b3f8SMatthew Dillon * 16590924b3f8SMatthew Dillon * This routine is only called after a sub-tree has been fully flushed 16600924b3f8SMatthew Dillon * up to the current flush synchronization point. Calling it under any 16610924b3f8SMatthew Dillon * other condition will blow up flush tracking. 16620924b3f8SMatthew Dillon */ 16630924b3f8SMatthew Dillon static 16640924b3f8SMatthew Dillon void 16650924b3f8SMatthew Dillon hammer2_flush_core_update(hammer2_chain_core_t *core, 16660924b3f8SMatthew Dillon hammer2_flush_info_t *info) 16670924b3f8SMatthew Dillon { 16680924b3f8SMatthew Dillon hammer2_chain_layer_t *layer; 16690924b3f8SMatthew Dillon 16700924b3f8SMatthew Dillon spin_lock(&core->cst.spin); 16710924b3f8SMatthew Dillon if (core->update_lo < info->sync_tid) 16720924b3f8SMatthew Dillon core->update_lo = info->sync_tid; 16730924b3f8SMatthew Dillon TAILQ_FOREACH_REVERSE(layer, &core->layerq, 16740924b3f8SMatthew Dillon h2_layer_list, entry) { 16750924b3f8SMatthew Dillon info->pass = 3; 16760924b3f8SMatthew Dillon ++layer->refs; 16770924b3f8SMatthew Dillon KKASSERT(layer->good == 0xABCD); 16780924b3f8SMatthew Dillon RB_SCAN(hammer2_chain_tree, &layer->rbtree, 16790924b3f8SMatthew Dillon NULL, hammer2_chain_flush_scan2, info); 16800924b3f8SMatthew Dillon --layer->refs; 16810924b3f8SMatthew Dillon KKASSERT(info->parent->core == core); 16820924b3f8SMatthew Dillon } 16830924b3f8SMatthew Dillon spin_unlock(&core->cst.spin); 16840924b3f8SMatthew Dillon } 16850924b3f8SMatthew Dillon 168691abd410SMatthew Dillon static 168791abd410SMatthew Dillon void 168891abd410SMatthew Dillon hammer2_rollup_stats(hammer2_chain_t *parent, hammer2_chain_t *child, int how) 168991abd410SMatthew Dillon { 16901897c66eSMatthew Dillon #if 0 169191abd410SMatthew Dillon hammer2_chain_t *grandp; 16921897c66eSMatthew Dillon #endif 169391abd410SMatthew Dillon 169491abd410SMatthew Dillon parent->data_count += child->data_count; 169591abd410SMatthew Dillon parent->inode_count += child->inode_count; 169691abd410SMatthew Dillon child->data_count = 0; 169791abd410SMatthew Dillon child->inode_count = 0; 169891abd410SMatthew Dillon if (how < 0) { 169991abd410SMatthew Dillon parent->data_count -= child->bytes; 170091abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 170191abd410SMatthew Dillon parent->inode_count -= 1; 17029ec04660SMatthew Dillon #if 0 17039ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 170491abd410SMatthew Dillon parent->data_count -= child->data->ipdata.data_count; 170591abd410SMatthew Dillon parent->inode_count -= child->data->ipdata.inode_count; 17069ec04660SMatthew Dillon #endif 170791abd410SMatthew Dillon } 170891abd410SMatthew Dillon } else if (how > 0) { 170991abd410SMatthew Dillon parent->data_count += child->bytes; 171091abd410SMatthew Dillon if (child->bref.type == HAMMER2_BREF_TYPE_INODE) { 171191abd410SMatthew Dillon parent->inode_count += 1; 17129ec04660SMatthew Dillon #if 0 17139ec04660SMatthew Dillon /* XXX child->data may be NULL atm */ 171491abd410SMatthew Dillon parent->data_count += child->data->ipdata.data_count; 171591abd410SMatthew Dillon parent->inode_count += child->data->ipdata.inode_count; 17169ec04660SMatthew Dillon #endif 171791abd410SMatthew Dillon } 171891abd410SMatthew Dillon } 171991abd410SMatthew Dillon if (parent->bref.type == HAMMER2_BREF_TYPE_INODE) { 172091abd410SMatthew Dillon parent->data->ipdata.data_count += parent->data_count; 172191abd410SMatthew Dillon parent->data->ipdata.inode_count += parent->inode_count; 17221897c66eSMatthew Dillon #if 0 172391abd410SMatthew Dillon for (grandp = parent->above->first_parent; 172491abd410SMatthew Dillon grandp; 172591abd410SMatthew Dillon grandp = grandp->next_parent) { 172691abd410SMatthew Dillon grandp->data_count += parent->data_count; 172791abd410SMatthew Dillon grandp->inode_count += parent->inode_count; 172891abd410SMatthew Dillon } 17291897c66eSMatthew Dillon #endif 173091abd410SMatthew Dillon parent->data_count = 0; 173191abd410SMatthew Dillon parent->inode_count = 0; 173291abd410SMatthew Dillon } 173391abd410SMatthew Dillon } 1734