132b800e6SMatthew Dillon /* 268b321c1SMatthew Dillon * Copyright (c) 2011-2018 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3550456506SMatthew Dillon /* 3650456506SMatthew Dillon * TRANSACTION AND FLUSH HANDLING 3750456506SMatthew Dillon * 3850456506SMatthew Dillon * Deceptively simple but actually fairly difficult to implement properly is 3950456506SMatthew Dillon * how I would describe it. 4050456506SMatthew Dillon * 41da6f36f4SMatthew Dillon * Flushing generally occurs bottom-up but requires a top-down scan to 42da6f36f4SMatthew Dillon * locate chains with MODIFIED and/or UPDATE bits set. The ONFLUSH flag 43da6f36f4SMatthew Dillon * tells how to recurse downward to find these chains. 4450456506SMatthew Dillon */ 4550456506SMatthew Dillon 4632b800e6SMatthew Dillon #include <sys/cdefs.h> 4732b800e6SMatthew Dillon #include <sys/param.h> 4832b800e6SMatthew Dillon #include <sys/systm.h> 4932b800e6SMatthew Dillon #include <sys/types.h> 5032b800e6SMatthew Dillon #include <sys/lock.h> 5132b800e6SMatthew Dillon #include <sys/uuid.h> 5232b800e6SMatthew Dillon 5332b800e6SMatthew Dillon #include "hammer2.h" 5432b800e6SMatthew Dillon 55925e4ad1SMatthew Dillon #define FLUSH_DEBUG 0 56925e4ad1SMatthew Dillon 576aaf5cb0SMatthew Dillon #define HAMMER2_FLUSH_DEPTH_LIMIT 60 /* stack recursion limit */ 58a71db85dSMatthew Dillon 59a71db85dSMatthew Dillon 6032b800e6SMatthew Dillon /* 6132b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 6232b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 6332b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 6432b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 6532b800e6SMatthew Dillon * processes. 6632b800e6SMatthew Dillon */ 6732b800e6SMatthew Dillon struct hammer2_flush_info { 680dea3156SMatthew Dillon hammer2_chain_t *parent; 6932b800e6SMatthew Dillon int depth; 7065cacacfSMatthew Dillon int error; /* cumulative error */ 7153f84d31SMatthew Dillon int flags; 726aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG 736aaf5cb0SMatthew Dillon long scan_count; 746aaf5cb0SMatthew Dillon long scan_mod_count; 756aaf5cb0SMatthew Dillon long scan_upd_count; 766aaf5cb0SMatthew Dillon long scan_onf_count; 776aaf5cb0SMatthew Dillon long scan_del_count; 786aaf5cb0SMatthew Dillon long scan_btype[7]; 796aaf5cb0SMatthew Dillon #endif 80850687d2SMatthew Dillon hammer2_chain_t *debug; 8132b800e6SMatthew Dillon }; 8232b800e6SMatthew Dillon 8332b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 8432b800e6SMatthew Dillon 85ecfe89b8SMatthew Dillon static int hammer2_flush_core(hammer2_flush_info_t *info, 8653f84d31SMatthew Dillon hammer2_chain_t *chain, int flags); 87da6f36f4SMatthew Dillon static int hammer2_flush_recurse(hammer2_chain_t *child, void *data); 8893f3933aSMatthew Dillon 8932b800e6SMatthew Dillon /* 90c603b86bSMatthew Dillon * Any per-pfs transaction initialization goes here. 9150456506SMatthew Dillon */ 9250456506SMatthew Dillon void 93c603b86bSMatthew Dillon hammer2_trans_manage_init(hammer2_pfs_t *pmp) 9450456506SMatthew Dillon { 9550456506SMatthew Dillon } 9650456506SMatthew Dillon 9750456506SMatthew Dillon /* 98d34788efSMatthew Dillon * Transaction support for any modifying operation. Transactions are used 99d34788efSMatthew Dillon * in the pmp layer by the frontend and in the spmp layer by the backend. 100c603b86bSMatthew Dillon * 1013e8408dbSMatthew Dillon * 0 - Normal transaction. Interlocks against just the 1023e8408dbSMatthew Dillon * COPYQ portion of an ISFLUSH transaction. 103c603b86bSMatthew Dillon * 104ecfe89b8SMatthew Dillon * TRANS_ISFLUSH - Flush transaction. Interlocks against other flush 105ecfe89b8SMatthew Dillon * transactions. 106c603b86bSMatthew Dillon * 1073e8408dbSMatthew Dillon * When COPYQ is also specified, waits for the count 1083e8408dbSMatthew Dillon * to drop to 1. 1093e8408dbSMatthew Dillon * 110ecfe89b8SMatthew Dillon * TRANS_BUFCACHE - Buffer cache transaction. No interlock. 111ecfe89b8SMatthew Dillon * 112ecfe89b8SMatthew Dillon * TRANS_SIDEQ - Run the sideq (only tested in trans_done()) 1130dea3156SMatthew Dillon * 11410136ab6SMatthew Dillon * Initializing a new transaction allocates a transaction ID. Typically 11510136ab6SMatthew Dillon * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can 11610136ab6SMatthew Dillon * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single 11710136ab6SMatthew Dillon * media target. The latter mode is used by the recovery code. 1180dea3156SMatthew Dillon */ 1190dea3156SMatthew Dillon void 120c603b86bSMatthew Dillon hammer2_trans_init(hammer2_pfs_t *pmp, uint32_t flags) 1210dea3156SMatthew Dillon { 122c603b86bSMatthew Dillon uint32_t oflags; 123c603b86bSMatthew Dillon uint32_t nflags; 124c603b86bSMatthew Dillon int dowait; 125d001f460SMatthew Dillon 126c603b86bSMatthew Dillon for (;;) { 127c603b86bSMatthew Dillon oflags = pmp->trans.flags; 128c603b86bSMatthew Dillon cpu_ccfence(); 129c603b86bSMatthew Dillon dowait = 0; 130d001f460SMatthew Dillon 131d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 132d001f460SMatthew Dillon /* 133ecfe89b8SMatthew Dillon * Interlock against other flush transactions. 134355d67fcSMatthew Dillon */ 1355afbe9d8SMatthew Dillon if (oflags & HAMMER2_TRANS_ISFLUSH) { 1365afbe9d8SMatthew Dillon nflags = oflags | HAMMER2_TRANS_WAITING; 1375afbe9d8SMatthew Dillon dowait = 1; 1385afbe9d8SMatthew Dillon } else { 1395afbe9d8SMatthew Dillon nflags = (oflags | flags) + 1; 1405afbe9d8SMatthew Dillon } 141c603b86bSMatthew Dillon } else if (flags & HAMMER2_TRANS_BUFCACHE) { 142a7720be7SMatthew Dillon /* 14320852157SMatthew Dillon * Requesting strategy transaction from buffer-cache, 14420852157SMatthew Dillon * or a VM getpages/putpages through the buffer cache. 14520852157SMatthew Dillon * We must allow such transactions in all situations 14620852157SMatthew Dillon * to avoid deadlocks. 14720852157SMatthew Dillon */ 14820852157SMatthew Dillon nflags = (oflags | flags) + 1; 149a4dc31e0SMatthew Dillon } else { 150a4dc31e0SMatthew Dillon /* 151*d0755e6dSMatthew Dillon * Normal transaction. We do not interlock against 152*d0755e6dSMatthew Dillon * BUFCACHE or ISFLUSH. 15368b321c1SMatthew Dillon * 154ecfe89b8SMatthew Dillon * Note that vnode locks may be held going into 155ecfe89b8SMatthew Dillon * this call. 15668b321c1SMatthew Dillon * 15768b321c1SMatthew Dillon * NOTE: Remember that non-modifying operations 15868b321c1SMatthew Dillon * such as read, stat, readdir, etc, do 15968b321c1SMatthew Dillon * not use transactions. 160a4dc31e0SMatthew Dillon */ 161c603b86bSMatthew Dillon nflags = (oflags | flags) + 1; 162c603b86bSMatthew Dillon } 163c603b86bSMatthew Dillon if (dowait) 164c603b86bSMatthew Dillon tsleep_interlock(&pmp->trans.sync_wait, 0); 165c603b86bSMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 166c603b86bSMatthew Dillon if (dowait == 0) 167a4dc31e0SMatthew Dillon break; 168c603b86bSMatthew Dillon tsleep(&pmp->trans.sync_wait, PINTERLOCKED, 169c603b86bSMatthew Dillon "h2trans", hz); 170ecfe89b8SMatthew Dillon /* retry */ 171c603b86bSMatthew Dillon } else { 172c603b86bSMatthew Dillon cpu_pause(); 173ecfe89b8SMatthew Dillon /* retry */ 174a7720be7SMatthew Dillon } 175c603b86bSMatthew Dillon /* retry */ 176c603b86bSMatthew Dillon } 1773e8408dbSMatthew Dillon 178*d0755e6dSMatthew Dillon #if 0 1793e8408dbSMatthew Dillon /* 1803e8408dbSMatthew Dillon * When entering a FLUSH transaction with COPYQ set, wait for the 1813e8408dbSMatthew Dillon * transaction count to drop to 1 (our flush transaction only) 1823e8408dbSMatthew Dillon * before proceeding. 1833e8408dbSMatthew Dillon * 1843e8408dbSMatthew Dillon * This waits for all non-flush transactions to complete and blocks 1853e8408dbSMatthew Dillon * new non-flush transactions from starting until COPYQ is cleared. 1863e8408dbSMatthew Dillon * (the flush will then proceed after clearing COPYQ). This should 1873e8408dbSMatthew Dillon * be a very short stall on modifying operations. 1883e8408dbSMatthew Dillon */ 1893e8408dbSMatthew Dillon while ((flags & HAMMER2_TRANS_ISFLUSH) && 1903e8408dbSMatthew Dillon (flags & HAMMER2_TRANS_COPYQ)) { 1913e8408dbSMatthew Dillon oflags = pmp->trans.flags; 1923e8408dbSMatthew Dillon cpu_ccfence(); 1933e8408dbSMatthew Dillon if ((oflags & HAMMER2_TRANS_MASK) == 1) 1943e8408dbSMatthew Dillon break; 1953e8408dbSMatthew Dillon nflags = oflags | HAMMER2_TRANS_WAITING; 1963e8408dbSMatthew Dillon tsleep_interlock(&pmp->trans.sync_wait, 0); 1973e8408dbSMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 1983e8408dbSMatthew Dillon tsleep(&pmp->trans.sync_wait, PINTERLOCKED, 1993e8408dbSMatthew Dillon "h2trans2", hz); 2003e8408dbSMatthew Dillon } 2013e8408dbSMatthew Dillon } 202*d0755e6dSMatthew Dillon #endif 203c603b86bSMatthew Dillon } 204a4dc31e0SMatthew Dillon 205e2163f5bSMatthew Dillon /* 206e2163f5bSMatthew Dillon * Start a sub-transaction, there is no 'subdone' function. This will 20753f84d31SMatthew Dillon * issue a new modify_tid (mtid) for the current transaction, which is a 20853f84d31SMatthew Dillon * CLC (cluster level change) id and not a per-node id. 20953f84d31SMatthew Dillon * 21053f84d31SMatthew Dillon * This function must be called for each XOP when multiple XOPs are run in 21153f84d31SMatthew Dillon * sequence within a transaction. 21253f84d31SMatthew Dillon * 21353f84d31SMatthew Dillon * Callers typically update the inode with the transaction mtid manually 21453f84d31SMatthew Dillon * to enforce sequencing. 215e2163f5bSMatthew Dillon */ 216e2163f5bSMatthew Dillon hammer2_tid_t 217e2163f5bSMatthew Dillon hammer2_trans_sub(hammer2_pfs_t *pmp) 218e2163f5bSMatthew Dillon { 219e2163f5bSMatthew Dillon hammer2_tid_t mtid; 220e2163f5bSMatthew Dillon 221e2163f5bSMatthew Dillon mtid = atomic_fetchadd_64(&pmp->modify_tid, 1); 222e2163f5bSMatthew Dillon 223e2163f5bSMatthew Dillon return (mtid); 224e2163f5bSMatthew Dillon } 225e2163f5bSMatthew Dillon 226c603b86bSMatthew Dillon void 227ecfe89b8SMatthew Dillon hammer2_trans_setflags(hammer2_pfs_t *pmp, uint32_t flags) 228ecfe89b8SMatthew Dillon { 229ecfe89b8SMatthew Dillon atomic_set_int(&pmp->trans.flags, flags); 230ecfe89b8SMatthew Dillon } 231ecfe89b8SMatthew Dillon 2323e8408dbSMatthew Dillon /* 2333e8408dbSMatthew Dillon * Typically used to clear trans flags asynchronously. If TRANS_WAITING 2343e8408dbSMatthew Dillon * is in the mask, and was previously set, this function will wake up 2353e8408dbSMatthew Dillon * any waiters. 2363e8408dbSMatthew Dillon */ 237ecfe89b8SMatthew Dillon void 238ecfe89b8SMatthew Dillon hammer2_trans_clearflags(hammer2_pfs_t *pmp, uint32_t flags) 239ecfe89b8SMatthew Dillon { 240ecfe89b8SMatthew Dillon uint32_t oflags; 241ecfe89b8SMatthew Dillon uint32_t nflags; 242ecfe89b8SMatthew Dillon 243ecfe89b8SMatthew Dillon for (;;) { 244ecfe89b8SMatthew Dillon oflags = pmp->trans.flags; 245ecfe89b8SMatthew Dillon cpu_ccfence(); 246ecfe89b8SMatthew Dillon nflags = oflags & ~flags; 247ecfe89b8SMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 248ecfe89b8SMatthew Dillon if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING) 249ecfe89b8SMatthew Dillon wakeup(&pmp->trans.sync_wait); 250ecfe89b8SMatthew Dillon break; 251ecfe89b8SMatthew Dillon } 252ecfe89b8SMatthew Dillon cpu_pause(); 253ecfe89b8SMatthew Dillon /* retry */ 254ecfe89b8SMatthew Dillon } 255ecfe89b8SMatthew Dillon } 256ecfe89b8SMatthew Dillon 257ecfe89b8SMatthew Dillon void 258ecfe89b8SMatthew Dillon hammer2_trans_done(hammer2_pfs_t *pmp, uint32_t flags) 259c603b86bSMatthew Dillon { 260c603b86bSMatthew Dillon uint32_t oflags; 261c603b86bSMatthew Dillon uint32_t nflags; 262c603b86bSMatthew Dillon 263257c2728SMatthew Dillon /* 264257c2728SMatthew Dillon * Modifying ops on the front-end can cause dirty inodes to 265257c2728SMatthew Dillon * build up in the sideq. We don't flush these on inactive/reclaim 266257c2728SMatthew Dillon * due to potential deadlocks, so we have to deal with them from 267257c2728SMatthew Dillon * inside other nominal modifying front-end transactions. 268257c2728SMatthew Dillon */ 269ecfe89b8SMatthew Dillon if ((flags & HAMMER2_TRANS_SIDEQ) && 270*d0755e6dSMatthew Dillon pmp->sideq_count > hammer2_limit_dirty_inodes / 2 && 271ecfe89b8SMatthew Dillon pmp->sideq_count > (pmp->inum_count >> 3) && 272ecfe89b8SMatthew Dillon pmp->mp) { 2735afbe9d8SMatthew Dillon speedup_syncer(pmp->mp); 274ecfe89b8SMatthew Dillon } 275257c2728SMatthew Dillon 276257c2728SMatthew Dillon /* 2773e8408dbSMatthew Dillon * Clean-up the transaction. Wakeup any waiters when finishing 2783e8408dbSMatthew Dillon * a flush transaction or transitioning the non-flush transaction 2793e8408dbSMatthew Dillon * count from 2->1 while a flush transaction is pending. 280257c2728SMatthew Dillon */ 281c603b86bSMatthew Dillon for (;;) { 282c603b86bSMatthew Dillon oflags = pmp->trans.flags; 283c603b86bSMatthew Dillon cpu_ccfence(); 284c603b86bSMatthew Dillon KKASSERT(oflags & HAMMER2_TRANS_MASK); 285ecfe89b8SMatthew Dillon 286ecfe89b8SMatthew Dillon nflags = (oflags - 1) & ~flags; 287ecfe89b8SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 288ecfe89b8SMatthew Dillon nflags &= ~HAMMER2_TRANS_WAITING; 289c603b86bSMatthew Dillon } 2903e8408dbSMatthew Dillon if ((oflags & (HAMMER2_TRANS_ISFLUSH|HAMMER2_TRANS_MASK)) == 2913e8408dbSMatthew Dillon (HAMMER2_TRANS_ISFLUSH|2)) { 2923e8408dbSMatthew Dillon nflags &= ~HAMMER2_TRANS_WAITING; 2933e8408dbSMatthew Dillon } 294c603b86bSMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 295ecfe89b8SMatthew Dillon if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING) 296c603b86bSMatthew Dillon wakeup(&pmp->trans.sync_wait); 297c603b86bSMatthew Dillon break; 298c603b86bSMatthew Dillon } 299ecfe89b8SMatthew Dillon cpu_pause(); 300c603b86bSMatthew Dillon /* retry */ 301044541cdSMatthew Dillon } 30250456506SMatthew Dillon } 30350456506SMatthew Dillon 304c603b86bSMatthew Dillon /* 305c603b86bSMatthew Dillon * Obtain new, unique inode number (not serialized by caller). 306c603b86bSMatthew Dillon */ 307c603b86bSMatthew Dillon hammer2_tid_t 308c603b86bSMatthew Dillon hammer2_trans_newinum(hammer2_pfs_t *pmp) 309c603b86bSMatthew Dillon { 310c603b86bSMatthew Dillon hammer2_tid_t tid; 311c603b86bSMatthew Dillon 312e2163f5bSMatthew Dillon tid = atomic_fetchadd_64(&pmp->inode_tid, 1); 313c603b86bSMatthew Dillon 314c603b86bSMatthew Dillon return tid; 315a7720be7SMatthew Dillon } 316a7720be7SMatthew Dillon 317c603b86bSMatthew Dillon /* 31820852157SMatthew Dillon * Assert that a strategy call is ok here. Currently we allow strategy 31920852157SMatthew Dillon * calls in all situations, including during flushes. Previously: 32020852157SMatthew Dillon * (old) (1) In a normal transaction. 32120852157SMatthew Dillon * (old) (2) In a flush transaction only if PREFLUSH is also set. 322c603b86bSMatthew Dillon */ 3230dea3156SMatthew Dillon void 3249450e866SMatthew Dillon hammer2_trans_assert_strategy(hammer2_pfs_t *pmp) 325c7916d0bSMatthew Dillon { 32620852157SMatthew Dillon #if 0 327c603b86bSMatthew Dillon KKASSERT((pmp->trans.flags & HAMMER2_TRANS_ISFLUSH) == 0 || 328c603b86bSMatthew Dillon (pmp->trans.flags & HAMMER2_TRANS_PREFLUSH)); 32920852157SMatthew Dillon #endif 330c7916d0bSMatthew Dillon } 331c7916d0bSMatthew Dillon 332eedd52a3SMatthew Dillon /* 3330dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 33453f84d31SMatthew Dillon * synchronization point, propagating blockref updates back up. As 33553f84d31SMatthew Dillon * part of this propagation, mirror_tid and inode/data usage statistics 33653f84d31SMatthew Dillon * propagates back upward. 3370dea3156SMatthew Dillon * 33865cacacfSMatthew Dillon * Returns a HAMMER2 error code, 0 if no error. Note that I/O errors from 33965cacacfSMatthew Dillon * buffers dirtied during the flush operation can occur later. 34065cacacfSMatthew Dillon * 34153f84d31SMatthew Dillon * modify_tid (clc - cluster level change) is not propagated. 34253f84d31SMatthew Dillon * 34353f84d31SMatthew Dillon * update_tid (clc) is used for validation and is not propagated by this 34453f84d31SMatthew Dillon * function. 3450dea3156SMatthew Dillon * 34632b800e6SMatthew Dillon * This routine can be called from several places but the most important 347c4421f07SMatthew Dillon * is from VFS_SYNC (frontend) via hammer2_xop_inode_flush (backend). 34832b800e6SMatthew Dillon * 349da6f36f4SMatthew Dillon * chain is locked on call and will remain locked on return. The chain's 350da6f36f4SMatthew Dillon * UPDATE flag indicates that its parent's block table (which is not yet 3515c51ecaeSMatthew Dillon * part of the flush) should be updated. 35240498d1cSMatthew Dillon * 35340498d1cSMatthew Dillon * flags: 35440498d1cSMatthew Dillon * HAMMER2_FLUSH_TOP Indicates that this is the top of the flush. 35540498d1cSMatthew Dillon * Is cleared for the recursion. 35640498d1cSMatthew Dillon * 35740498d1cSMatthew Dillon * HAMMER2_FLUSH_ALL Recurse everything 35840498d1cSMatthew Dillon * 35965c894ffSMatthew Dillon * HAMMER2_FLUSH_INODE_STOP 36065c894ffSMatthew Dillon * Stop at PFS inode or normal inode boundary 36132b800e6SMatthew Dillon */ 36265cacacfSMatthew Dillon int 36353f84d31SMatthew Dillon hammer2_flush(hammer2_chain_t *chain, int flags) 36432b800e6SMatthew Dillon { 36532b800e6SMatthew Dillon hammer2_flush_info_t info; 366eedd52a3SMatthew Dillon hammer2_dev_t *hmp; 367925e4ad1SMatthew Dillon int loops; 36832b800e6SMatthew Dillon 36932b800e6SMatthew Dillon /* 37032b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 37132b800e6SMatthew Dillon * 37232b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 37332b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 37432b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 37532b800e6SMatthew Dillon * for re-execution after the stack has been popped. 37632b800e6SMatthew Dillon */ 37732b800e6SMatthew Dillon bzero(&info, sizeof(info)); 37853f84d31SMatthew Dillon info.flags = flags & ~HAMMER2_FLUSH_TOP; 37932b800e6SMatthew Dillon 380da6f36f4SMatthew Dillon /* 381da6f36f4SMatthew Dillon * Calculate parent (can be NULL), if not NULL the flush core 382da6f36f4SMatthew Dillon * expects the parent to be referenced so it can easily lock/unlock 383da6f36f4SMatthew Dillon * it without it getting ripped up. 384da6f36f4SMatthew Dillon */ 385da6f36f4SMatthew Dillon if ((info.parent = chain->parent) != NULL) 386da6f36f4SMatthew Dillon hammer2_chain_ref(info.parent); 387731b2a84SMatthew Dillon 388a7720be7SMatthew Dillon /* 389a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 390a7720be7SMatthew Dillon * chain. 391a7720be7SMatthew Dillon */ 392a7720be7SMatthew Dillon hammer2_chain_ref(chain); 393eedd52a3SMatthew Dillon hmp = chain->hmp; 394925e4ad1SMatthew Dillon loops = 0; 395a7720be7SMatthew Dillon 3960dea3156SMatthew Dillon for (;;) { 39732b800e6SMatthew Dillon /* 39840498d1cSMatthew Dillon * [re]flush chain as the deep recursion may have generated 39940498d1cSMatthew Dillon * additional modifications. 40032b800e6SMatthew Dillon */ 40140498d1cSMatthew Dillon if (info.parent != chain->parent) { 40268b321c1SMatthew Dillon if (hammer2_debug & 0x0040) { 40368b321c1SMatthew Dillon kprintf("LOST CHILD4 %p->%p " 40468b321c1SMatthew Dillon "(actual parent %p)\n", 40540498d1cSMatthew Dillon info.parent, chain, chain->parent); 40668b321c1SMatthew Dillon } 40740498d1cSMatthew Dillon hammer2_chain_drop(info.parent); 40840498d1cSMatthew Dillon info.parent = chain->parent; 40940498d1cSMatthew Dillon hammer2_chain_ref(info.parent); 41040498d1cSMatthew Dillon } 411ecfe89b8SMatthew Dillon if (hammer2_flush_core(&info, chain, flags) == 0) 41232b800e6SMatthew Dillon break; 413925e4ad1SMatthew Dillon 414925e4ad1SMatthew Dillon if (++loops % 1000 == 0) { 4158138a154SMatthew Dillon kprintf("hammer2_flush: excessive loops on %p\n", 416925e4ad1SMatthew Dillon chain); 417925e4ad1SMatthew Dillon if (hammer2_debug & 0x100000) 418925e4ad1SMatthew Dillon Debugger("hell4"); 419925e4ad1SMatthew Dillon } 42032b800e6SMatthew Dillon } 4216aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG 4226aaf5cb0SMatthew Dillon if (info.scan_count >= 10) 4236aaf5cb0SMatthew Dillon kprintf("hammer2_flush: scan_count %ld (%ld,%ld,%ld,%ld) " 424ecfe89b8SMatthew Dillon "bt(%ld,%ld,%ld,%ld,%ld,%ld)\n", 4256aaf5cb0SMatthew Dillon info.scan_count, 4266aaf5cb0SMatthew Dillon info.scan_mod_count, 4276aaf5cb0SMatthew Dillon info.scan_upd_count, 4286aaf5cb0SMatthew Dillon info.scan_onf_count, 4296aaf5cb0SMatthew Dillon info.scan_del_count, 4306aaf5cb0SMatthew Dillon info.scan_btype[1], 4316aaf5cb0SMatthew Dillon info.scan_btype[2], 4326aaf5cb0SMatthew Dillon info.scan_btype[3], 4336aaf5cb0SMatthew Dillon info.scan_btype[4], 4346aaf5cb0SMatthew Dillon info.scan_btype[5], 435ecfe89b8SMatthew Dillon info.scan_btype[6]); 4366aaf5cb0SMatthew Dillon #endif 437a7720be7SMatthew Dillon hammer2_chain_drop(chain); 438da6f36f4SMatthew Dillon if (info.parent) 439da6f36f4SMatthew Dillon hammer2_chain_drop(info.parent); 44065cacacfSMatthew Dillon return (info.error); 44132b800e6SMatthew Dillon } 44232b800e6SMatthew Dillon 443476d2aadSMatthew Dillon /* 444ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 445a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 446fae225dcSMatthew Dillon * locked and will have an extra ref on return. info.parent is referenced 447fae225dcSMatthew Dillon * but not locked. 448fae225dcSMatthew Dillon * 449fae225dcSMatthew Dillon * Upon return, the caller can test the UPDATE bit on the chain to determine 450fae225dcSMatthew Dillon * if the parent needs updating. 451a7720be7SMatthew Dillon * 452ecfe89b8SMatthew Dillon * If non-zero is returned, the chain's parent changed during the flush and 453ecfe89b8SMatthew Dillon * the caller must retry the operation. 454ecfe89b8SMatthew Dillon * 4558138a154SMatthew Dillon * (1) Determine if this node is a candidate for the flush, return if it is 4568138a154SMatthew Dillon * not. fchain and vchain are always candidates for the flush. 4570dea3156SMatthew Dillon * 4588138a154SMatthew Dillon * (2) If we recurse too deep the chain is entered onto the deferral list and 4598138a154SMatthew Dillon * the current flush stack is aborted until after the deferral list is 4608138a154SMatthew Dillon * run. 4618138a154SMatthew Dillon * 4628138a154SMatthew Dillon * (3) Recursively flush live children (rbtree). This can create deferrals. 463da6f36f4SMatthew Dillon * A successful flush clears the MODIFIED and UPDATE bits on the children 464da6f36f4SMatthew Dillon * and typically causes the parent to be marked MODIFIED as the children 465da6f36f4SMatthew Dillon * update the parent's block table. A parent might already be marked 466da6f36f4SMatthew Dillon * MODIFIED due to a deletion (whos blocktable update in the parent is 467da6f36f4SMatthew Dillon * handled by the frontend), or if the parent itself is modified by the 468da6f36f4SMatthew Dillon * frontend for other reasons. 4698138a154SMatthew Dillon * 470da6f36f4SMatthew Dillon * (4) Permanently disconnected sub-trees are cleaned up by the front-end. 471da6f36f4SMatthew Dillon * Deleted-but-open inodes can still be individually flushed via the 472da6f36f4SMatthew Dillon * filesystem syncer. 4738138a154SMatthew Dillon * 474470dad14SMatthew Dillon * (5) Delete parents on the way back up if they are normal indirect blocks 475470dad14SMatthew Dillon * and have no children. 476470dad14SMatthew Dillon * 477470dad14SMatthew Dillon * (6) Note that an unmodified child may still need the block table in its 478da6f36f4SMatthew Dillon * parent updated (e.g. rename/move). The child will have UPDATE set 479da6f36f4SMatthew Dillon * in this case. 4808138a154SMatthew Dillon * 48150456506SMatthew Dillon * WARNING ON BREF MODIFY_TID/MIRROR_TID 482925e4ad1SMatthew Dillon * 483e513e77eSMatthew Dillon * blockref.modify_tid is consistent only within a PFS, and will not be 484e513e77eSMatthew Dillon * consistent during synchronization. mirror_tid is consistent across the 485e513e77eSMatthew Dillon * block device regardless of the PFS. 486476d2aadSMatthew Dillon */ 487ecfe89b8SMatthew Dillon static int 488da6f36f4SMatthew Dillon hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain, 48953f84d31SMatthew Dillon int flags) 49032b800e6SMatthew Dillon { 491da6f36f4SMatthew Dillon hammer2_chain_t *parent; 492506bd6d1SMatthew Dillon hammer2_dev_t *hmp; 49365cacacfSMatthew Dillon int save_error; 494ecfe89b8SMatthew Dillon int retry; 495ecfe89b8SMatthew Dillon 496ecfe89b8SMatthew Dillon retry = 0; 497da6f36f4SMatthew Dillon 498da6f36f4SMatthew Dillon /* 499da6f36f4SMatthew Dillon * (1) Optimize downward recursion to locate nodes needing action. 500da6f36f4SMatthew Dillon * Nothing to do if none of these flags are set. 501da6f36f4SMatthew Dillon */ 502850687d2SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0) { 503850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 504850687d2SMatthew Dillon if (info->debug == NULL) 505850687d2SMatthew Dillon info->debug = chain; 506850687d2SMatthew Dillon } else { 507ecfe89b8SMatthew Dillon return 0; 508850687d2SMatthew Dillon } 509850687d2SMatthew Dillon } 51032b800e6SMatthew Dillon 511a5913bdfSMatthew Dillon hmp = chain->hmp; 51240498d1cSMatthew Dillon 51340498d1cSMatthew Dillon /* 51440498d1cSMatthew Dillon * NOTE: parent can be NULL, usually due to destroy races. 51540498d1cSMatthew Dillon */ 51640498d1cSMatthew Dillon parent = info->parent; 517fae225dcSMatthew Dillon KKASSERT(chain->parent == parent); 518925e4ad1SMatthew Dillon 5190924b3f8SMatthew Dillon /* 520da6f36f4SMatthew Dillon * Downward search recursion 52140498d1cSMatthew Dillon * 522*d0755e6dSMatthew Dillon * We must be careful on cold stops, which often occur on inode 523*d0755e6dSMatthew Dillon * boundaries due to the way hammer2_vfs_sync() sequences the flush. 524*d0755e6dSMatthew Dillon * Be sure to issue an appropriate chain_setflush() 525ea155208SMatthew Dillon */ 526ecfe89b8SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) && 52753f84d31SMatthew Dillon (flags & HAMMER2_FLUSH_ALL) == 0 && 528c42feed6SMatthew Dillon (flags & HAMMER2_FLUSH_TOP) == 0 && 529c42feed6SMatthew Dillon chain->pmp && chain->pmp->mp) { 5309450e866SMatthew Dillon /* 531fae225dcSMatthew Dillon * If FLUSH_ALL is not specified the caller does not want 532c42feed6SMatthew Dillon * to recurse through PFS roots that have been mounted. 533c42feed6SMatthew Dillon * 534c42feed6SMatthew Dillon * (If the PFS has not been mounted there may not be 535c42feed6SMatthew Dillon * anything monitoring its chains and its up to us 536c42feed6SMatthew Dillon * to flush it). 537c42feed6SMatthew Dillon * 538c42feed6SMatthew Dillon * The typical sequence is to flush dirty PFS's starting at 539c42feed6SMatthew Dillon * their root downward, then flush the device root (vchain). 540c42feed6SMatthew Dillon * It is this second flush that typically leaves out the 541c42feed6SMatthew Dillon * ALL flag. 5429450e866SMatthew Dillon * 543fae225dcSMatthew Dillon * However we must still process the PFSROOT chains for block 5449450e866SMatthew Dillon * table updates in their parent (which IS part of our flush). 5459450e866SMatthew Dillon * 546fae225dcSMatthew Dillon * NOTE: The volume root, vchain, does not set PFSBOUNDARY. 547fae225dcSMatthew Dillon * 548fae225dcSMatthew Dillon * NOTE: We must re-set ONFLUSH in the parent to retain if 549fae225dcSMatthew Dillon * this chain (that we are skipping) requires work. 5509450e866SMatthew Dillon */ 551fae225dcSMatthew Dillon if (chain->flags & (HAMMER2_CHAIN_ONFLUSH | 552fae225dcSMatthew Dillon HAMMER2_CHAIN_DESTROY | 553fae225dcSMatthew Dillon HAMMER2_CHAIN_MODIFIED)) { 554fae225dcSMatthew Dillon hammer2_chain_setflush(parent); 555fae225dcSMatthew Dillon } 556ecfe89b8SMatthew Dillon goto done; 55740498d1cSMatthew Dillon } else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 55840498d1cSMatthew Dillon (flags & HAMMER2_FLUSH_INODE_STOP) && 55940498d1cSMatthew Dillon (flags & HAMMER2_FLUSH_ALL) == 0 && 56040498d1cSMatthew Dillon (flags & HAMMER2_FLUSH_TOP) == 0 && 56140498d1cSMatthew Dillon chain->pmp && chain->pmp->mp) { 56240498d1cSMatthew Dillon /* 563ecfe89b8SMatthew Dillon * When FLUSH_INODE_STOP is specified we are being asked not 564ecfe89b8SMatthew Dillon * to include any inode changes for inodes we encounter, 565ecfe89b8SMatthew Dillon * with the exception of the inode that the flush began with. 566ecfe89b8SMatthew Dillon * So: INODE, INODE_STOP, and TOP==0 basically. 567*d0755e6dSMatthew Dillon * 568*d0755e6dSMatthew Dillon * Dirty inodes are flushed based on the hammer2_inode 569*d0755e6dSMatthew Dillon * in-memory structure, issuing a chain_setflush() here 570*d0755e6dSMatthew Dillon * will only cause unnecessary traversals of the topology. 571ecfe89b8SMatthew Dillon */ 572ecfe89b8SMatthew Dillon goto done; 573ecfe89b8SMatthew Dillon #if 0 574ecfe89b8SMatthew Dillon /* 57540498d1cSMatthew Dillon * If FLUSH_INODE_STOP is specified and both ALL and TOP 57640498d1cSMatthew Dillon * are clear, we must not flush the chain. The chain should 57740498d1cSMatthew Dillon * have already been flushed and any further ONFLUSH/UPDATE 57840498d1cSMatthew Dillon * setting will be related to the next flush. 57940498d1cSMatthew Dillon * 58040498d1cSMatthew Dillon * This features allows us to flush inodes independently of 58140498d1cSMatthew Dillon * each other and meta-data above the inodes separately. 58240498d1cSMatthew Dillon */ 58340498d1cSMatthew Dillon if (chain->flags & (HAMMER2_CHAIN_ONFLUSH | 58440498d1cSMatthew Dillon HAMMER2_CHAIN_DESTROY | 58540498d1cSMatthew Dillon HAMMER2_CHAIN_MODIFIED)) { 58640498d1cSMatthew Dillon if (parent) 58740498d1cSMatthew Dillon hammer2_chain_setflush(parent); 58840498d1cSMatthew Dillon } 589ecfe89b8SMatthew Dillon #endif 59053f84d31SMatthew Dillon } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 59153f84d31SMatthew Dillon /* 59253f84d31SMatthew Dillon * Recursion depth reached. 59353f84d31SMatthew Dillon */ 594ecfe89b8SMatthew Dillon panic("hammer2: flush depth limit"); 5958bbe5025SMatthew Dillon } else if (chain->flags & (HAMMER2_CHAIN_ONFLUSH | 5968bbe5025SMatthew Dillon HAMMER2_CHAIN_DESTROY)) { 5978138a154SMatthew Dillon /* 598da6f36f4SMatthew Dillon * Downward recursion search (actual flush occurs bottom-up). 59965cacacfSMatthew Dillon * pre-clear ONFLUSH. It can get set again due to races or 60065cacacfSMatthew Dillon * flush errors, which we want so the scan finds us again in 60165cacacfSMatthew Dillon * the next flush. 6028bbe5025SMatthew Dillon * 6038bbe5025SMatthew Dillon * We must also recurse if DESTROY is set so we can finally 6048bbe5025SMatthew Dillon * get rid of the related children, otherwise the node will 6058bbe5025SMatthew Dillon * just get re-flushed on lastdrop. 606fae225dcSMatthew Dillon * 607fae225dcSMatthew Dillon * WARNING! The recursion will unlock/relock info->parent 608fae225dcSMatthew Dillon * (which is 'chain'), potentially allowing it 609fae225dcSMatthew Dillon * to be ripped up. 6108138a154SMatthew Dillon */ 611a964af6fSMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 61265cacacfSMatthew Dillon save_error = info->error; 61365cacacfSMatthew Dillon info->error = 0; 6148138a154SMatthew Dillon info->parent = chain; 615a964af6fSMatthew Dillon 616a964af6fSMatthew Dillon /* 617a964af6fSMatthew Dillon * We may have to do this twice to catch any indirect 618ecfe89b8SMatthew Dillon * block maintenance that occurs. 619a964af6fSMatthew Dillon */ 62094491fa0SMatthew Dillon hammer2_spin_ex(&chain->core.spin); 621da6f36f4SMatthew Dillon RB_SCAN(hammer2_chain_tree, &chain->core.rbtree, 622da6f36f4SMatthew Dillon NULL, hammer2_flush_recurse, info); 623a964af6fSMatthew Dillon if (chain->flags & HAMMER2_CHAIN_ONFLUSH) { 624a964af6fSMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 625a964af6fSMatthew Dillon RB_SCAN(hammer2_chain_tree, &chain->core.rbtree, 626a964af6fSMatthew Dillon NULL, hammer2_flush_recurse, info); 62719808ac9SMatthew Dillon } 628a964af6fSMatthew Dillon hammer2_spin_unex(&chain->core.spin); 629da6f36f4SMatthew Dillon info->parent = parent; 63065cacacfSMatthew Dillon 63165cacacfSMatthew Dillon /* 63265cacacfSMatthew Dillon * Re-set the flush bits if the flush was incomplete or 63365cacacfSMatthew Dillon * an error occurred. If an error occurs it is typically 63465cacacfSMatthew Dillon * an allocation error. Errors do not cause deferrals. 63565cacacfSMatthew Dillon */ 63665cacacfSMatthew Dillon if (info->error) 63765cacacfSMatthew Dillon hammer2_chain_setflush(chain); 63865cacacfSMatthew Dillon info->error |= save_error; 639fae225dcSMatthew Dillon 640fae225dcSMatthew Dillon /* 641fae225dcSMatthew Dillon * If we lost the parent->chain association we have to 642fae225dcSMatthew Dillon * stop processing this chain because it is no longer 643fae225dcSMatthew Dillon * in this recursion. If it moved, it will be handled 644fae225dcSMatthew Dillon * by the ONFLUSH flag elsewhere. 645fae225dcSMatthew Dillon */ 646fae225dcSMatthew Dillon if (chain->parent != parent) { 647fae225dcSMatthew Dillon kprintf("LOST CHILD2 %p->%p (actual parent %p)\n", 648fae225dcSMatthew Dillon parent, chain, chain->parent); 649fae225dcSMatthew Dillon goto done; 650fae225dcSMatthew Dillon } 6518138a154SMatthew Dillon } 6520924b3f8SMatthew Dillon 65332b800e6SMatthew Dillon /* 654da6f36f4SMatthew Dillon * Now we are in the bottom-up part of the recursion. 655da6f36f4SMatthew Dillon * 656ecfe89b8SMatthew Dillon * We continue to try to update the chain on lower-level errors, but 657ecfe89b8SMatthew Dillon * the flush code may decide not to flush the volume root. 65865cacacfSMatthew Dillon * 65965cacacfSMatthew Dillon * XXX should we continue to try to update the chain if an error 66065cacacfSMatthew Dillon * occurred? 6618138a154SMatthew Dillon */ 6628138a154SMatthew Dillon 6638138a154SMatthew Dillon /* 664fae225dcSMatthew Dillon * Both parent and chain must be locked in order to flush chain, 665fae225dcSMatthew Dillon * in order to properly update the parent under certain conditions. 666fae225dcSMatthew Dillon * 667fae225dcSMatthew Dillon * In addition, we can't safely unlock/relock the chain once we 668fae225dcSMatthew Dillon * start flushing the chain itself, which we would have to do later 669fae225dcSMatthew Dillon * on in order to lock the parent if we didn't do that now. 670fae225dcSMatthew Dillon */ 6716aaf5cb0SMatthew Dillon hammer2_chain_ref_hold(chain); 672fae225dcSMatthew Dillon hammer2_chain_unlock(chain); 673fae225dcSMatthew Dillon if (parent) 674fae225dcSMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 675fae225dcSMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE); 6766aaf5cb0SMatthew Dillon hammer2_chain_drop_unhold(chain); 67765cacacfSMatthew Dillon 67865cacacfSMatthew Dillon /* 67965cacacfSMatthew Dillon * Can't process if we can't access their content. 68065cacacfSMatthew Dillon */ 68165cacacfSMatthew Dillon if ((parent && parent->error) || chain->error) { 68265cacacfSMatthew Dillon kprintf("hammer2: chain error during flush\n"); 68365cacacfSMatthew Dillon info->error |= chain->error; 68465cacacfSMatthew Dillon if (parent) { 68565cacacfSMatthew Dillon info->error |= parent->error; 68665cacacfSMatthew Dillon hammer2_chain_unlock(parent); 68765cacacfSMatthew Dillon } 68865cacacfSMatthew Dillon goto done; 68965cacacfSMatthew Dillon } 69065cacacfSMatthew Dillon 691fae225dcSMatthew Dillon if (chain->parent != parent) { 69268b321c1SMatthew Dillon if (hammer2_debug & 0x0040) { 693fae225dcSMatthew Dillon kprintf("LOST CHILD3 %p->%p (actual parent %p)\n", 694fae225dcSMatthew Dillon parent, chain, chain->parent); 69568b321c1SMatthew Dillon } 696fae225dcSMatthew Dillon KKASSERT(parent != NULL); 697fae225dcSMatthew Dillon hammer2_chain_unlock(parent); 698ecfe89b8SMatthew Dillon retry = 1; 699fae225dcSMatthew Dillon goto done; 700fae225dcSMatthew Dillon } 701fae225dcSMatthew Dillon 702fae225dcSMatthew Dillon /* 703da6f36f4SMatthew Dillon * Propagate the DESTROY flag downwards. This dummies up the flush 704da6f36f4SMatthew Dillon * code and tries to invalidate related buffer cache buffers to 705da6f36f4SMatthew Dillon * avoid the disk write. 706623d43d4SMatthew Dillon */ 707da6f36f4SMatthew Dillon if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 708da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 709623d43d4SMatthew Dillon 710623d43d4SMatthew Dillon /* 711e513e77eSMatthew Dillon * Dispose of the modified bit. 712e513e77eSMatthew Dillon * 7133f4ec3cfSMatthew Dillon * If parent is present, the UPDATE bit should already be set. 714e513e77eSMatthew Dillon * UPDATE should already be set. 715e513e77eSMatthew Dillon * bref.mirror_tid should already be set. 71632b800e6SMatthew Dillon */ 71765cacacfSMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 718da6f36f4SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) || 7193f4ec3cfSMatthew Dillon chain->parent == NULL); 7200dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 721f9f4459eSMatthew Dillon atomic_add_long(&hammer2_count_modified_chains, -1); 7228db69c9fSMatthew Dillon 7238db69c9fSMatthew Dillon /* 724e513e77eSMatthew Dillon * Manage threads waiting for excessive dirty memory to 725e513e77eSMatthew Dillon * be retired. 7268db69c9fSMatthew Dillon */ 727e513e77eSMatthew Dillon if (chain->pmp) 728e513e77eSMatthew Dillon hammer2_pfs_memory_wakeup(chain->pmp); 7298138a154SMatthew Dillon 7303f4ec3cfSMatthew Dillon #if 0 7313f4ec3cfSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0 && 7323f4ec3cfSMatthew Dillon chain != &hmp->vchain && 7333f4ec3cfSMatthew Dillon chain != &hmp->fchain) { 7348138a154SMatthew Dillon /* 7353f4ec3cfSMatthew Dillon * Set UPDATE bit indicating that the parent block 7363f4ec3cfSMatthew Dillon * table requires updating. 7378138a154SMatthew Dillon */ 738da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 7390dea3156SMatthew Dillon } 7403f4ec3cfSMatthew Dillon #endif 7410dea3156SMatthew Dillon 7420dea3156SMatthew Dillon /* 743a71db85dSMatthew Dillon * Issue the flush. This is indirect via the DIO. 7440dea3156SMatthew Dillon * 745a71db85dSMatthew Dillon * NOTE: A DELETED node that reaches this point must be 746a71db85dSMatthew Dillon * flushed for synchronization point consistency. 747a71db85dSMatthew Dillon * 748a71db85dSMatthew Dillon * NOTE: Even though MODIFIED was already set, the related DIO 749a71db85dSMatthew Dillon * might not be dirty due to a system buffer cache 750a71db85dSMatthew Dillon * flush and must be set dirty if we are going to make 751a71db85dSMatthew Dillon * further modifications to the buffer. Chains with 752a71db85dSMatthew Dillon * embedded data don't need this. 7530dea3156SMatthew Dillon */ 754a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 7557fece146SMatthew Dillon kprintf("Flush %p.%d %016jx/%d data=%016jx\n", 756a7720be7SMatthew Dillon chain, chain->bref.type, 757c603b86bSMatthew Dillon (uintmax_t)chain->bref.key, 758c603b86bSMatthew Dillon chain->bref.keybits, 759c603b86bSMatthew Dillon (uintmax_t)chain->bref.data_off); 760a7720be7SMatthew Dillon } 761a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 762a7720be7SMatthew Dillon Debugger("Flush hell"); 763a7720be7SMatthew Dillon } 76410136ab6SMatthew Dillon 76532b800e6SMatthew Dillon /* 766da6f36f4SMatthew Dillon * Update chain CRCs for flush. 76732b800e6SMatthew Dillon * 768da6f36f4SMatthew Dillon * NOTE: Volume headers are NOT flushed here as they require 769da6f36f4SMatthew Dillon * special processing. 77032b800e6SMatthew Dillon */ 77132b800e6SMatthew Dillon switch(chain->bref.type) { 7721a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 773a71db85dSMatthew Dillon /* 774e513e77eSMatthew Dillon * Update the volume header's freemap_tid to the 775e513e77eSMatthew Dillon * freemap's flushing mirror_tid. 776e513e77eSMatthew Dillon * 777a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 778a71db85dSMatthew Dillon */ 77950456506SMatthew Dillon KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED); 780e513e77eSMatthew Dillon KKASSERT(chain == &hmp->fchain); 781e513e77eSMatthew Dillon hmp->voldata.freemap_tid = chain->bref.mirror_tid; 7825d37f96dSMatthew Dillon if (hammer2_debug & 0x8000) { 7835d37f96dSMatthew Dillon /* debug only, avoid syslogd loop */ 784e513e77eSMatthew Dillon kprintf("sync freemap mirror_tid %08jx\n", 785e513e77eSMatthew Dillon (intmax_t)chain->bref.mirror_tid); 7865d37f96dSMatthew Dillon } 787e513e77eSMatthew Dillon 788e513e77eSMatthew Dillon /* 789e513e77eSMatthew Dillon * The freemap can be flushed independently of the 790e513e77eSMatthew Dillon * main topology, but for the case where it is 791e513e77eSMatthew Dillon * flushed in the same transaction, and flushed 792e513e77eSMatthew Dillon * before vchain (a case we want to allow for 793e513e77eSMatthew Dillon * performance reasons), make sure modifications 794e513e77eSMatthew Dillon * made during the flush under vchain use a new 795e513e77eSMatthew Dillon * transaction id. 796e513e77eSMatthew Dillon * 797e513e77eSMatthew Dillon * Otherwise the mount recovery code will get confused. 798e513e77eSMatthew Dillon */ 799e513e77eSMatthew Dillon ++hmp->voldata.mirror_tid; 8001a7cfe5aSMatthew Dillon break; 80132b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 80232b800e6SMatthew Dillon /* 803e513e77eSMatthew Dillon * The free block table is flushed by 804e513e77eSMatthew Dillon * hammer2_vfs_sync() before it flushes vchain. 805e513e77eSMatthew Dillon * We must still hold fchain locked while copying 806e513e77eSMatthew Dillon * voldata to volsync, however. 807a71db85dSMatthew Dillon * 80865cacacfSMatthew Dillon * These do not error per-say since their data does 80965cacacfSMatthew Dillon * not need to be re-read from media on lock. 81065cacacfSMatthew Dillon * 811a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 8121a7cfe5aSMatthew Dillon */ 813da6f36f4SMatthew Dillon hammer2_chain_lock(&hmp->fchain, 814da6f36f4SMatthew Dillon HAMMER2_RESOLVE_ALWAYS); 815a6cf1052SMatthew Dillon hammer2_voldata_lock(hmp); 8165d37f96dSMatthew Dillon if (hammer2_debug & 0x8000) { 8175d37f96dSMatthew Dillon /* debug only, avoid syslogd loop */ 818e513e77eSMatthew Dillon kprintf("sync volume mirror_tid %08jx\n", 819da6f36f4SMatthew Dillon (intmax_t)chain->bref.mirror_tid); 8205d37f96dSMatthew Dillon } 8211a7cfe5aSMatthew Dillon 8221a7cfe5aSMatthew Dillon /* 823e513e77eSMatthew Dillon * Update the volume header's mirror_tid to the 824e513e77eSMatthew Dillon * main topology's flushing mirror_tid. It is 825e513e77eSMatthew Dillon * possible that voldata.mirror_tid is already 826e513e77eSMatthew Dillon * beyond bref.mirror_tid due to the bump we made 827e513e77eSMatthew Dillon * above in BREF_TYPE_FREEMAP. 828e513e77eSMatthew Dillon */ 829e513e77eSMatthew Dillon if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) { 830e513e77eSMatthew Dillon hmp->voldata.mirror_tid = 831e513e77eSMatthew Dillon chain->bref.mirror_tid; 832e513e77eSMatthew Dillon } 833e513e77eSMatthew Dillon 834e513e77eSMatthew Dillon /* 835da6f36f4SMatthew Dillon * The volume header is flushed manually by the 836da6f36f4SMatthew Dillon * syncer, not here. All we do here is adjust the 837da6f36f4SMatthew Dillon * crc's. 83832b800e6SMatthew Dillon */ 83932b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 840fdf62707SMatthew Dillon KKASSERT(chain->dio == NULL); 84132b800e6SMatthew Dillon 84232b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 84332b800e6SMatthew Dillon hammer2_icrc32( 84432b800e6SMatthew Dillon (char *)&hmp->voldata + 84532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 84632b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 84732b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 84832b800e6SMatthew Dillon hammer2_icrc32( 84932b800e6SMatthew Dillon (char *)&hmp->voldata + 85032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 85132b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 85232b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 85332b800e6SMatthew Dillon hammer2_icrc32( 85432b800e6SMatthew Dillon (char *)&hmp->voldata + 85532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 85632b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 857e513e77eSMatthew Dillon 8585d37f96dSMatthew Dillon if (hammer2_debug & 0x8000) { 8595d37f96dSMatthew Dillon /* debug only, avoid syslogd loop */ 860e513e77eSMatthew Dillon kprintf("syncvolhdr %016jx %016jx\n", 861e513e77eSMatthew Dillon hmp->voldata.mirror_tid, 862e513e77eSMatthew Dillon hmp->vchain.bref.mirror_tid); 8635d37f96dSMatthew Dillon } 86432b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 8650dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 86650456506SMatthew Dillon hammer2_voldata_unlock(hmp); 867a6cf1052SMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 86832b800e6SMatthew Dillon break; 86932b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 87032b800e6SMatthew Dillon /* 871da6f36f4SMatthew Dillon * Data elements have already been flushed via the 872da6f36f4SMatthew Dillon * logical file buffer cache. Their hash was set in 873a71db85dSMatthew Dillon * the bref by the vop_write code. Do not re-dirty. 87432b800e6SMatthew Dillon * 875da6f36f4SMatthew Dillon * Make sure any device buffer(s) have been flushed 876da6f36f4SMatthew Dillon * out here (there aren't usually any to flush) XXX. 87732b800e6SMatthew Dillon */ 87832b800e6SMatthew Dillon break; 879512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 8801a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 88191caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 882da6f36f4SMatthew Dillon /* 883da6f36f4SMatthew Dillon * Buffer I/O will be cleaned up when the volume is 884da6f36f4SMatthew Dillon * flushed (but the kernel is free to flush it before 885da6f36f4SMatthew Dillon * then, as well). 886da6f36f4SMatthew Dillon */ 88750456506SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 888a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 88950456506SMatthew Dillon break; 890da0cdd33SMatthew Dillon case HAMMER2_BREF_TYPE_DIRENT: 891da0cdd33SMatthew Dillon /* 892da0cdd33SMatthew Dillon * A directory entry can use the check area to store 893da0cdd33SMatthew Dillon * the filename for filenames <= 64 bytes, don't blow 894da0cdd33SMatthew Dillon * it up! 895da0cdd33SMatthew Dillon */ 896da0cdd33SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 897da0cdd33SMatthew Dillon if (chain->bytes) 898da0cdd33SMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 899da0cdd33SMatthew Dillon break; 90091caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 901a71db85dSMatthew Dillon /* 902a71db85dSMatthew Dillon * NOTE: We must call io_setdirty() to make any late 903a71db85dSMatthew Dillon * changes to the inode data, the system might 904a71db85dSMatthew Dillon * have already flushed the buffer. 905a71db85dSMatthew Dillon */ 906b0f58de8SMatthew Dillon if (chain->data->ipdata.meta.op_flags & 907da6f36f4SMatthew Dillon HAMMER2_OPFLAG_PFSROOT) { 908837bd39bSMatthew Dillon /* 909da6f36f4SMatthew Dillon * non-NULL pmp if mounted as a PFS. We must 91018e8ab5fSMatthew Dillon * sync fields cached in the pmp? XXX 911837bd39bSMatthew Dillon */ 912837bd39bSMatthew Dillon hammer2_inode_data_t *ipdata; 913837bd39bSMatthew Dillon 914a71db85dSMatthew Dillon hammer2_io_setdirty(chain->dio); 915837bd39bSMatthew Dillon ipdata = &chain->data->ipdata; 916e513e77eSMatthew Dillon if (chain->pmp) { 917b0f58de8SMatthew Dillon ipdata->meta.pfs_inum = 918e513e77eSMatthew Dillon chain->pmp->inode_tid; 919e513e77eSMatthew Dillon } 92050456506SMatthew Dillon } else { 92150456506SMatthew Dillon /* can't be mounted as a PFS */ 92250456506SMatthew Dillon } 923b3659de2SMatthew Dillon 924512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 925a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 9261a7cfe5aSMatthew Dillon break; 92732b800e6SMatthew Dillon default: 92891caa51cSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 929da6f36f4SMatthew Dillon panic("hammer2_flush_core: unsupported " 930da6f36f4SMatthew Dillon "embedded bref %d", 93191caa51cSMatthew Dillon chain->bref.type); 93291caa51cSMatthew Dillon /* NOT REACHED */ 93332b800e6SMatthew Dillon } 93432b800e6SMatthew Dillon 93532b800e6SMatthew Dillon /* 9363d4f397aSMatthew Dillon * If the chain was destroyed try to avoid unnecessary I/O 9373d4f397aSMatthew Dillon * that might not have yet occurred. Remove the data range 9383d4f397aSMatthew Dillon * from dedup candidacy and attempt to invalidation that 9393d4f397aSMatthew Dillon * potentially dirty portion of the I/O buffer. 940da6f36f4SMatthew Dillon */ 9417767d389SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_DESTROY) { 9420b8efeb7SMatthew Dillon hammer2_io_dedup_delete(hmp, 9430b8efeb7SMatthew Dillon chain->bref.type, 9443d4f397aSMatthew Dillon chain->bref.data_off, 9453d4f397aSMatthew Dillon chain->bytes); 9463d4f397aSMatthew Dillon #if 0 9477767d389SMatthew Dillon hammer2_io_t *dio; 9487767d389SMatthew Dillon if (chain->dio) { 9493d4f397aSMatthew Dillon hammer2_io_inval(chain->dio, 9507d565a4fSMatthew Dillon chain->bref.data_off, 9517d565a4fSMatthew Dillon chain->bytes); 9527767d389SMatthew Dillon } else if ((dio = hammer2_io_getquick(hmp, 9537767d389SMatthew Dillon chain->bref.data_off, 9543d4f397aSMatthew Dillon chain->bytes, 9553d4f397aSMatthew Dillon 1)) != NULL) { 9563d4f397aSMatthew Dillon hammer2_io_inval(dio, 9577767d389SMatthew Dillon chain->bref.data_off, 9587767d389SMatthew Dillon chain->bytes); 9597767d389SMatthew Dillon hammer2_io_putblk(&dio); 9607767d389SMatthew Dillon } 9613d4f397aSMatthew Dillon #endif 962da6f36f4SMatthew Dillon } 963da6f36f4SMatthew Dillon } 964da6f36f4SMatthew Dillon 965da6f36f4SMatthew Dillon /* 966da6f36f4SMatthew Dillon * If UPDATE is set the parent block table may need to be updated. 96765cacacfSMatthew Dillon * This can fail if the hammer2_chain_modify() fails. 968da6f36f4SMatthew Dillon * 969da6f36f4SMatthew Dillon * NOTE: UPDATE may be set on vchain or fchain in which case 970ecfe89b8SMatthew Dillon * parent could be NULL, or on an inode that has not yet 971ecfe89b8SMatthew Dillon * been inserted into the radix tree. It's easiest to allow 972ecfe89b8SMatthew Dillon * the case and test for NULL. parent can also wind up being 973ecfe89b8SMatthew Dillon * NULL due to a deletion so we need to handle the case anyway. 974ecfe89b8SMatthew Dillon * 975ecfe89b8SMatthew Dillon * NOTE: UPDATE can be set when chains are renamed into or out of 976ecfe89b8SMatthew Dillon * an indirect block, without the chain itself being flagged 977ecfe89b8SMatthew Dillon * MODIFIED. 978da6f36f4SMatthew Dillon * 979da6f36f4SMatthew Dillon * If no parent exists we can just clear the UPDATE bit. If the 980da6f36f4SMatthew Dillon * chain gets reattached later on the bit will simply get set 981da6f36f4SMatthew Dillon * again. 982da6f36f4SMatthew Dillon */ 9833f4ec3cfSMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL) 984da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 985da6f36f4SMatthew Dillon 986da6f36f4SMatthew Dillon /* 987ecfe89b8SMatthew Dillon * When flushing an inode outside of a FLUSH_FSSYNC we must NOT 988ecfe89b8SMatthew Dillon * update the parent block table to point at the flushed inode. 989ecfe89b8SMatthew Dillon * The block table should only ever be updated by the filesystem 990ecfe89b8SMatthew Dillon * sync code. If we do, inode<->inode dependencies (such as 991ecfe89b8SMatthew Dillon * directory entries vs inode nlink count) can wind up not being 992ecfe89b8SMatthew Dillon * flushed together and result in a broken topology if a crash/reboot 993ecfe89b8SMatthew Dillon * occurs at the wrong time. 994ecfe89b8SMatthew Dillon */ 995ecfe89b8SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE && 9966f445d15SMatthew Dillon (flags & HAMMER2_FLUSH_INODE_STOP) && 997ecfe89b8SMatthew Dillon (flags & HAMMER2_FLUSH_FSSYNC) == 0 && 998ecfe89b8SMatthew Dillon (flags & HAMMER2_FLUSH_ALL) == 0 && 999ecfe89b8SMatthew Dillon chain->pmp && chain->pmp->mp) { 10006f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC 10016f445d15SMatthew Dillon kprintf("inum %ld do not update parent, non-fssync\n", 10026f445d15SMatthew Dillon (long)chain->bref.key); 10036f445d15SMatthew Dillon #endif 1004ecfe89b8SMatthew Dillon goto skipupdate; 1005ecfe89b8SMatthew Dillon } 10066f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC 10076f445d15SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) 10086f445d15SMatthew Dillon kprintf("inum %ld update parent\n", (long)chain->bref.key); 10096f445d15SMatthew Dillon #endif 1010ecfe89b8SMatthew Dillon 1011ecfe89b8SMatthew Dillon /* 1012ecfe89b8SMatthew Dillon * The chain may need its blockrefs updated in the parent, normal 1013ecfe89b8SMatthew Dillon * path. 1014da6f36f4SMatthew Dillon */ 1015da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_UPDATE) { 1016da6f36f4SMatthew Dillon hammer2_blockref_t *base; 1017da6f36f4SMatthew Dillon int count; 1018da6f36f4SMatthew Dillon 1019da6f36f4SMatthew Dillon /* 1020a6cf1052SMatthew Dillon * Clear UPDATE flag, mark parent modified, update its 1021a6cf1052SMatthew Dillon * modify_tid if necessary, and adjust the parent blockmap. 1022da6f36f4SMatthew Dillon */ 1023da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 1024a6cf1052SMatthew Dillon 1025eedd52a3SMatthew Dillon /* 1026eedd52a3SMatthew Dillon * (optional code) 1027eedd52a3SMatthew Dillon * 1028eedd52a3SMatthew Dillon * Avoid actually modifying and updating the parent if it 1029eedd52a3SMatthew Dillon * was flagged for destruction. This can greatly reduce 1030eedd52a3SMatthew Dillon * disk I/O in large tree removals because the 1031eedd52a3SMatthew Dillon * hammer2_io_setinval() call in the upward recursion 1032eedd52a3SMatthew Dillon * (see MODIFIED code above) can only handle a few cases. 1033eedd52a3SMatthew Dillon */ 1034eedd52a3SMatthew Dillon if (parent->flags & HAMMER2_CHAIN_DESTROY) { 1035eedd52a3SMatthew Dillon if (parent->bref.modify_tid < chain->bref.modify_tid) { 1036eedd52a3SMatthew Dillon parent->bref.modify_tid = 1037eedd52a3SMatthew Dillon chain->bref.modify_tid; 1038eedd52a3SMatthew Dillon } 1039eedd52a3SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 1040eedd52a3SMatthew Dillon HAMMER2_CHAIN_BMAPUPD); 1041eedd52a3SMatthew Dillon goto skipupdate; 1042eedd52a3SMatthew Dillon } 1043eedd52a3SMatthew Dillon 1044eedd52a3SMatthew Dillon /* 1045470dad14SMatthew Dillon * The flusher is responsible for deleting empty indirect 1046470dad14SMatthew Dillon * blocks at this point. If we don't do this, no major harm 1047470dad14SMatthew Dillon * will be done but the empty indirect blocks will stay in 1048850d3f60SMatthew Dillon * the topology and make it a messy and inefficient. 104930b0abf3SMatthew Dillon * 1050850d3f60SMatthew Dillon * The flusher is also responsible for collapsing the 1051850d3f60SMatthew Dillon * content of an indirect block into its parent whenever 1052850d3f60SMatthew Dillon * possible (with some hysteresis). Not doing this will also 1053850d3f60SMatthew Dillon * not harm the topology, but would make it messy and 1054850d3f60SMatthew Dillon * inefficient. 1055470dad14SMatthew Dillon */ 1056850d3f60SMatthew Dillon if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) { 1057850d3f60SMatthew Dillon if (hammer2_chain_indirect_maintenance(parent, chain)) 1058470dad14SMatthew Dillon goto skipupdate; 1059470dad14SMatthew Dillon } 1060470dad14SMatthew Dillon 1061470dad14SMatthew Dillon /* 1062eedd52a3SMatthew Dillon * We are updating the parent's blockmap, the parent must 106365cacacfSMatthew Dillon * be set modified. If this fails we re-set the UPDATE flag 106465cacacfSMatthew Dillon * in the child. 106565cacacfSMatthew Dillon * 106665cacacfSMatthew Dillon * NOTE! A modification error can be ENOSPC. We still want 106765cacacfSMatthew Dillon * to flush modified chains recursively, not break out, 106865cacacfSMatthew Dillon * so we just skip the update in this situation and 106965cacacfSMatthew Dillon * continue. That is, we still need to try to clean 107065cacacfSMatthew Dillon * out dirty chains and buffers. 107165cacacfSMatthew Dillon * 107265cacacfSMatthew Dillon * This may not help bulkfree though. XXX 1073eedd52a3SMatthew Dillon */ 107465cacacfSMatthew Dillon save_error = hammer2_chain_modify(parent, 0, 0, 0); 107565cacacfSMatthew Dillon if (save_error) { 107665cacacfSMatthew Dillon info->error |= save_error; 107765cacacfSMatthew Dillon kprintf("hammer2_flush: %016jx.%02x error=%08x\n", 107865cacacfSMatthew Dillon parent->bref.data_off, parent->bref.type, 107965cacacfSMatthew Dillon save_error); 108065cacacfSMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 108165cacacfSMatthew Dillon goto skipupdate; 108265cacacfSMatthew Dillon } 1083a6cf1052SMatthew Dillon if (parent->bref.modify_tid < chain->bref.modify_tid) 1084a6cf1052SMatthew Dillon parent->bref.modify_tid = chain->bref.modify_tid; 1085da6f36f4SMatthew Dillon 1086da6f36f4SMatthew Dillon /* 1087da6f36f4SMatthew Dillon * Calculate blockmap pointer 1088da6f36f4SMatthew Dillon */ 1089da6f36f4SMatthew Dillon switch(parent->bref.type) { 1090da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 1091da6f36f4SMatthew Dillon /* 1092da6f36f4SMatthew Dillon * Access the inode's block array. However, there is 1093da6f36f4SMatthew Dillon * no block array if the inode is flagged DIRECTDATA. 1094da6f36f4SMatthew Dillon */ 1095da6f36f4SMatthew Dillon if (parent->data && 1096b0f58de8SMatthew Dillon (parent->data->ipdata.meta.op_flags & 1097da6f36f4SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0) { 1098da6f36f4SMatthew Dillon base = &parent->data-> 1099da6f36f4SMatthew Dillon ipdata.u.blockset.blockref[0]; 1100da6f36f4SMatthew Dillon } else { 1101da6f36f4SMatthew Dillon base = NULL; 1102da6f36f4SMatthew Dillon } 1103da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 1104da6f36f4SMatthew Dillon break; 1105da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 1106da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1107da6f36f4SMatthew Dillon if (parent->data) 1108da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 1109da6f36f4SMatthew Dillon else 1110da6f36f4SMatthew Dillon base = NULL; 1111da6f36f4SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 1112da6f36f4SMatthew Dillon break; 1113da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 1114da6f36f4SMatthew Dillon base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 1115da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 1116da6f36f4SMatthew Dillon break; 1117da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 1118da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 1119da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 1120da6f36f4SMatthew Dillon break; 1121da6f36f4SMatthew Dillon default: 1122da6f36f4SMatthew Dillon base = NULL; 1123da6f36f4SMatthew Dillon count = 0; 1124da6f36f4SMatthew Dillon panic("hammer2_flush_core: " 1125da6f36f4SMatthew Dillon "unrecognized blockref type: %d", 1126da6f36f4SMatthew Dillon parent->bref.type); 1127da6f36f4SMatthew Dillon } 1128da6f36f4SMatthew Dillon 1129da6f36f4SMatthew Dillon /* 1130da6f36f4SMatthew Dillon * Blocktable updates 1131b3659de2SMatthew Dillon * 1132b3659de2SMatthew Dillon * We synchronize pending statistics at this time. Delta 1133b3659de2SMatthew Dillon * adjustments designated for the current and upper level 1134b3659de2SMatthew Dillon * are synchronized. 1135da6f36f4SMatthew Dillon */ 1136da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) { 1137da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 11380cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 1139ecfe89b8SMatthew Dillon hammer2_base_delete(parent, base, count, chain, 1140ecfe89b8SMatthew Dillon NULL); 11410cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 1142b3659de2SMatthew Dillon /* base_delete clears both bits */ 1143b3659de2SMatthew Dillon } else { 1144b3659de2SMatthew Dillon atomic_clear_int(&chain->flags, 1145b3659de2SMatthew Dillon HAMMER2_CHAIN_BMAPUPD); 1146da6f36f4SMatthew Dillon } 1147da6f36f4SMatthew Dillon } 1148da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 11490cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 1150850d3f60SMatthew Dillon hammer2_base_insert(parent, base, count, 1151850d3f60SMatthew Dillon chain, &chain->bref); 11520cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 1153b3659de2SMatthew Dillon /* base_insert sets BMAPPED */ 1154da6f36f4SMatthew Dillon } 1155da6f36f4SMatthew Dillon } 1156eedd52a3SMatthew Dillon skipupdate: 1157fae225dcSMatthew Dillon if (parent) 1158fae225dcSMatthew Dillon hammer2_chain_unlock(parent); 1159da6f36f4SMatthew Dillon 1160da6f36f4SMatthew Dillon /* 11618138a154SMatthew Dillon * Final cleanup after flush 11628138a154SMatthew Dillon */ 11638138a154SMatthew Dillon done: 1164e513e77eSMatthew Dillon KKASSERT(chain->refs > 0); 1165850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 1166850687d2SMatthew Dillon if (info->debug == chain) 1167850687d2SMatthew Dillon info->debug = NULL; 1168850687d2SMatthew Dillon } 1169ecfe89b8SMatthew Dillon return retry; 11708138a154SMatthew Dillon } 11718138a154SMatthew Dillon 11728138a154SMatthew Dillon /* 1173da6f36f4SMatthew Dillon * Flush recursion helper, called from flush_core, calls flush_core. 11740dea3156SMatthew Dillon * 11758138a154SMatthew Dillon * Flushes the children of the caller's chain (info->parent), restricted 11768138a154SMatthew Dillon * by sync_tid. Set info->domodify if the child's blockref must propagate 11778138a154SMatthew Dillon * back up to the parent. 11780dea3156SMatthew Dillon * 117965cacacfSMatthew Dillon * This function may set info->error as a side effect. 118065cacacfSMatthew Dillon * 11818138a154SMatthew Dillon * Ripouts can move child from rbtree to dbtree or dbq but the caller's 11828138a154SMatthew Dillon * flush scan order prevents any chains from being lost. A child can be 1183da6f36f4SMatthew Dillon * executes more than once. 1184ea155208SMatthew Dillon * 11858138a154SMatthew Dillon * WARNING! If we do not call hammer2_flush_core() we must update 11868138a154SMatthew Dillon * bref.mirror_tid ourselves to indicate that the flush has 11878138a154SMatthew Dillon * processed the child. 1188925e4ad1SMatthew Dillon * 11898138a154SMatthew Dillon * WARNING! parent->core spinlock is held on entry and return. 119032b800e6SMatthew Dillon */ 11910dea3156SMatthew Dillon static int 1192da6f36f4SMatthew Dillon hammer2_flush_recurse(hammer2_chain_t *child, void *data) 119332b800e6SMatthew Dillon { 11940dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 11950dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 1196925e4ad1SMatthew Dillon 11976aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG 11986aaf5cb0SMatthew Dillon ++info->scan_count; 11996aaf5cb0SMatthew Dillon if (child->flags & HAMMER2_CHAIN_MODIFIED) 12006aaf5cb0SMatthew Dillon ++info->scan_mod_count; 12016aaf5cb0SMatthew Dillon if (child->flags & HAMMER2_CHAIN_UPDATE) 12026aaf5cb0SMatthew Dillon ++info->scan_upd_count; 12036aaf5cb0SMatthew Dillon if (child->flags & HAMMER2_CHAIN_ONFLUSH) 12046aaf5cb0SMatthew Dillon ++info->scan_onf_count; 12056aaf5cb0SMatthew Dillon #endif 12066aaf5cb0SMatthew Dillon 12070dea3156SMatthew Dillon /* 120810136ab6SMatthew Dillon * (child can never be fchain or vchain so a special check isn't 120910136ab6SMatthew Dillon * needed). 1210da6f36f4SMatthew Dillon * 1211a4dc31e0SMatthew Dillon * We must ref the child before unlocking the spinlock. 1212a4dc31e0SMatthew Dillon * 1213a4dc31e0SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 1214fae225dcSMatthew Dillon * unlock it in order to lock the child. However, if it no longer 1215fae225dcSMatthew Dillon * winds up being the child of the parent we must skip this child. 121665cacacfSMatthew Dillon * 121765cacacfSMatthew Dillon * NOTE! chain locking errors are fatal. They are never out-of-space 121865cacacfSMatthew Dillon * errors. 1219a4dc31e0SMatthew Dillon */ 1220ea155208SMatthew Dillon hammer2_chain_ref(child); 122194491fa0SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 12220dea3156SMatthew Dillon 12236aaf5cb0SMatthew Dillon hammer2_chain_ref_hold(parent); 12240dea3156SMatthew Dillon hammer2_chain_unlock(parent); 12250dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 1226fae225dcSMatthew Dillon if (child->parent != parent) { 1227fae225dcSMatthew Dillon kprintf("LOST CHILD1 %p->%p (actual parent %p)\n", 1228fae225dcSMatthew Dillon parent, child, child->parent); 1229fae225dcSMatthew Dillon goto done; 1230fae225dcSMatthew Dillon } 123165cacacfSMatthew Dillon if (child->error) { 123265cacacfSMatthew Dillon kprintf("CHILD ERROR DURING FLUSH LOCK %p->%p\n", 123365cacacfSMatthew Dillon parent, child); 123465cacacfSMatthew Dillon info->error |= child->error; 123565cacacfSMatthew Dillon goto done; 123665cacacfSMatthew Dillon } 12370dea3156SMatthew Dillon 123803faa7d5SMatthew Dillon /* 12398bbe5025SMatthew Dillon * Must propagate the DESTROY flag downwards, otherwise the 12408bbe5025SMatthew Dillon * parent could end up never being removed because it will 12418bbe5025SMatthew Dillon * be requeued to the flusher if it survives this run due to 12428bbe5025SMatthew Dillon * the flag. 12438bbe5025SMatthew Dillon */ 12448bbe5025SMatthew Dillon if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 12458bbe5025SMatthew Dillon atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROY); 12466aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG 12476aaf5cb0SMatthew Dillon if (child->flags & HAMMER2_CHAIN_DESTROY) 12486aaf5cb0SMatthew Dillon ++info->scan_del_count; 12496aaf5cb0SMatthew Dillon #endif 1250ecfe89b8SMatthew Dillon /* 1251ecfe89b8SMatthew Dillon * Special handling of the root inode. Because the root inode 1252ecfe89b8SMatthew Dillon * contains an index of all the inodes in the PFS in addition to 1253ecfe89b8SMatthew Dillon * its normal directory entries, any flush that is not part of a 1254ecfe89b8SMatthew Dillon * filesystem sync must only flush the directory entries, and not 1255ecfe89b8SMatthew Dillon * anything else. 1256ecfe89b8SMatthew Dillon * 1257ecfe89b8SMatthew Dillon * The child might be an indirect block, but H2 guarantees that 1258ecfe89b8SMatthew Dillon * the key-range will fully partition the inode index from the 1259ecfe89b8SMatthew Dillon * directory entries so the case just works naturally. 1260ecfe89b8SMatthew Dillon */ 1261ecfe89b8SMatthew Dillon if ((parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) && 1262ecfe89b8SMatthew Dillon (child->flags & HAMMER2_CHAIN_DESTROY) == 0 && 1263ecfe89b8SMatthew Dillon parent->bref.type == HAMMER2_BREF_TYPE_INODE && 1264ecfe89b8SMatthew Dillon (info->flags & HAMMER2_FLUSH_FSSYNC) == 0) { 1265ecfe89b8SMatthew Dillon if ((child->bref.key & HAMMER2_DIRHASH_VISIBLE) == 0) { 1266ecfe89b8SMatthew Dillon if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) { 1267ecfe89b8SMatthew Dillon hammer2_chain_setflush(parent); 1268ecfe89b8SMatthew Dillon } 12696f445d15SMatthew Dillon kprintf("inum %ld do not dive root inode\n", 12706f445d15SMatthew Dillon (long)parent->bref.key); 1271ecfe89b8SMatthew Dillon goto done; 1272ecfe89b8SMatthew Dillon } 1273ecfe89b8SMatthew Dillon } 12748bbe5025SMatthew Dillon 12758bbe5025SMatthew Dillon /* 1276e513e77eSMatthew Dillon * Recurse and collect deferral data. We're in the media flush, 1277e513e77eSMatthew Dillon * this can cross PFS boundaries. 127803faa7d5SMatthew Dillon */ 1279da6f36f4SMatthew Dillon if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) { 12806aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG 12816aaf5cb0SMatthew Dillon if (child->bref.type < 7) 12826aaf5cb0SMatthew Dillon ++info->scan_btype[child->bref.type]; 12836aaf5cb0SMatthew Dillon #endif 12840dea3156SMatthew Dillon ++info->depth; 128553f84d31SMatthew Dillon hammer2_flush_core(info, child, info->flags); 12860dea3156SMatthew Dillon --info->depth; 1287850687d2SMatthew Dillon } else if (hammer2_debug & 0x200) { 1288850687d2SMatthew Dillon if (info->debug == NULL) 1289850687d2SMatthew Dillon info->debug = child; 1290850687d2SMatthew Dillon ++info->depth; 129153f84d31SMatthew Dillon hammer2_flush_core(info, child, info->flags); 1292850687d2SMatthew Dillon --info->depth; 1293850687d2SMatthew Dillon if (info->debug == child) 1294850687d2SMatthew Dillon info->debug = NULL; 12958138a154SMatthew Dillon } 12960dea3156SMatthew Dillon 1297fae225dcSMatthew Dillon done: 1298a4dc31e0SMatthew Dillon /* 129965cacacfSMatthew Dillon * Relock to continue the loop. 1300a4dc31e0SMatthew Dillon */ 1301a4dc31e0SMatthew Dillon hammer2_chain_unlock(child); 1302ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 13036aaf5cb0SMatthew Dillon hammer2_chain_drop_unhold(parent); 130465cacacfSMatthew Dillon if (parent->error) { 130565cacacfSMatthew Dillon kprintf("PARENT ERROR DURING FLUSH LOCK %p->%p\n", 130665cacacfSMatthew Dillon parent, child); 130765cacacfSMatthew Dillon info->error |= parent->error; 130865cacacfSMatthew Dillon } 1309a4dc31e0SMatthew Dillon hammer2_chain_drop(child); 1310a4dc31e0SMatthew Dillon KKASSERT(info->parent == parent); 131194491fa0SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 13120dea3156SMatthew Dillon 13130dea3156SMatthew Dillon return (0); 13140dea3156SMatthew Dillon } 131512ff971cSMatthew Dillon 131653f84d31SMatthew Dillon /* 131712ff971cSMatthew Dillon * flush helper (backend threaded) 131812ff971cSMatthew Dillon * 131940498d1cSMatthew Dillon * Flushes chain topology for the specified inode. 132040498d1cSMatthew Dillon * 1321ecfe89b8SMatthew Dillon * HAMMER2_XOP_INODE_STOP The flush recursion stops at inode boundaries. 1322ecfe89b8SMatthew Dillon * Inodes belonging to the same flush are flushed 1323ecfe89b8SMatthew Dillon * separately. 132440498d1cSMatthew Dillon * 1325ecfe89b8SMatthew Dillon * chain->parent can be NULL, usually due to destroy races or detached inodes. 132612ff971cSMatthew Dillon * 132712ff971cSMatthew Dillon * Primarily called from vfs_sync(). 132812ff971cSMatthew Dillon */ 132912ff971cSMatthew Dillon void 1330c4421f07SMatthew Dillon hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex) 133112ff971cSMatthew Dillon { 133212ff971cSMatthew Dillon hammer2_xop_flush_t *xop = &arg->xop_flush; 133312ff971cSMatthew Dillon hammer2_chain_t *chain; 13346f445d15SMatthew Dillon hammer2_inode_t *ip; 133512ff971cSMatthew Dillon hammer2_dev_t *hmp; 13366f445d15SMatthew Dillon hammer2_pfs_t *pmp; 133765cacacfSMatthew Dillon int flush_error = 0; 133865cacacfSMatthew Dillon int fsync_error = 0; 133912ff971cSMatthew Dillon int total_error = 0; 134012ff971cSMatthew Dillon int j; 134140498d1cSMatthew Dillon int xflags; 134240498d1cSMatthew Dillon int ispfsroot = 0; 134340498d1cSMatthew Dillon 134440498d1cSMatthew Dillon xflags = HAMMER2_FLUSH_TOP; 134540498d1cSMatthew Dillon if (xop->head.flags & HAMMER2_XOP_INODE_STOP) 134640498d1cSMatthew Dillon xflags |= HAMMER2_FLUSH_INODE_STOP; 1347ecfe89b8SMatthew Dillon if (xop->head.flags & HAMMER2_XOP_FSSYNC) 1348ecfe89b8SMatthew Dillon xflags |= HAMMER2_FLUSH_FSSYNC; 134912ff971cSMatthew Dillon 135012ff971cSMatthew Dillon /* 135112ff971cSMatthew Dillon * Flush core chains 135212ff971cSMatthew Dillon */ 13536f445d15SMatthew Dillon ip = xop->head.ip1; 13546f445d15SMatthew Dillon pmp = ip->pmp; 13556f445d15SMatthew Dillon chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS); 135612ff971cSMatthew Dillon if (chain) { 135712ff971cSMatthew Dillon hmp = chain->hmp; 1358ecfe89b8SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) { 1359ecfe89b8SMatthew Dillon /* 1360ecfe89b8SMatthew Dillon * Due to flush partitioning the chain topology 1361ecfe89b8SMatthew Dillon * above the inode's chain may no longer be flagged. 1362ecfe89b8SMatthew Dillon * When asked to flush an inode, remark the topology 1363ecfe89b8SMatthew Dillon * leading to that inode. 1364ecfe89b8SMatthew Dillon */ 1365ecfe89b8SMatthew Dillon if (chain->parent) 1366ecfe89b8SMatthew Dillon hammer2_chain_setflush(chain->parent); 136740498d1cSMatthew Dillon hammer2_flush(chain, xflags); 1368ecfe89b8SMatthew Dillon 13696f445d15SMatthew Dillon /* XXX cluster */ 13706f445d15SMatthew Dillon if (ip == pmp->iroot && pmp != hmp->spmp) { 13716f445d15SMatthew Dillon hammer2_spin_ex(&pmp->inum_spin); 13726f445d15SMatthew Dillon pmp->pfs_iroot_blocksets[clindex] = 13736f445d15SMatthew Dillon chain->data->ipdata.u.blockset; 13746f445d15SMatthew Dillon hammer2_spin_unex(&pmp->inum_spin); 13756f445d15SMatthew Dillon } 13766f445d15SMatthew Dillon 1377ecfe89b8SMatthew Dillon #if 0 1378ecfe89b8SMatthew Dillon /* 1379ecfe89b8SMatthew Dillon * Propogate upwards but only cross an inode boundary 1380ecfe89b8SMatthew Dillon * for inodes associated with the current filesystem 1381ecfe89b8SMatthew Dillon * sync. 1382ecfe89b8SMatthew Dillon */ 1383ecfe89b8SMatthew Dillon if ((xop->head.flags & HAMMER2_XOP_PARENTONFLUSH) || 1384ecfe89b8SMatthew Dillon chain->bref.type != HAMMER2_BREF_TYPE_INODE) { 138512ff971cSMatthew Dillon parent = chain->parent; 138640498d1cSMatthew Dillon if (parent) 138712ff971cSMatthew Dillon hammer2_chain_setflush(parent); 138812ff971cSMatthew Dillon } 1389ecfe89b8SMatthew Dillon #endif 1390ecfe89b8SMatthew Dillon } 139140498d1cSMatthew Dillon if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) 139240498d1cSMatthew Dillon ispfsroot = 1; 139312ff971cSMatthew Dillon hammer2_chain_unlock(chain); 139412ff971cSMatthew Dillon hammer2_chain_drop(chain); 139512ff971cSMatthew Dillon chain = NULL; 139612ff971cSMatthew Dillon } else { 139712ff971cSMatthew Dillon hmp = NULL; 139812ff971cSMatthew Dillon } 139912ff971cSMatthew Dillon 140012ff971cSMatthew Dillon /* 140165c894ffSMatthew Dillon * Only flush the volume header if asked to, plus the inode must also 140265c894ffSMatthew Dillon * be the PFS root. 140340498d1cSMatthew Dillon */ 140465c894ffSMatthew Dillon if ((xop->head.flags & HAMMER2_XOP_VOLHDR) == 0) 140565c894ffSMatthew Dillon goto skip; 140640498d1cSMatthew Dillon if (ispfsroot == 0) 140740498d1cSMatthew Dillon goto skip; 140840498d1cSMatthew Dillon 140940498d1cSMatthew Dillon /* 141012ff971cSMatthew Dillon * Flush volume roots. Avoid replication, we only want to 141112ff971cSMatthew Dillon * flush each hammer2_dev (hmp) once. 141212ff971cSMatthew Dillon */ 1413c4421f07SMatthew Dillon for (j = clindex - 1; j >= 0; --j) { 14146f445d15SMatthew Dillon if ((chain = ip->cluster.array[j].chain) != NULL) { 141512ff971cSMatthew Dillon if (chain->hmp == hmp) { 141612ff971cSMatthew Dillon chain = NULL; /* safety */ 141712ff971cSMatthew Dillon goto skip; 141812ff971cSMatthew Dillon } 141912ff971cSMatthew Dillon } 142012ff971cSMatthew Dillon } 142112ff971cSMatthew Dillon chain = NULL; /* safety */ 142212ff971cSMatthew Dillon 142312ff971cSMatthew Dillon /* 142412ff971cSMatthew Dillon * spmp transaction. The super-root is never directly mounted so 142512ff971cSMatthew Dillon * there shouldn't be any vnodes, let alone any dirty vnodes 142653f84d31SMatthew Dillon * associated with it, so we shouldn't have to mess around with any 142753f84d31SMatthew Dillon * vnode flushes here. 142812ff971cSMatthew Dillon */ 142912ff971cSMatthew Dillon hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH); 143012ff971cSMatthew Dillon 143112ff971cSMatthew Dillon /* 14326f445d15SMatthew Dillon * We must flush the superroot down to the PFS iroot. Remember 14336f445d15SMatthew Dillon * that hammer2_chain_setflush() stops at inode boundaries, so 14346f445d15SMatthew Dillon * the pmp->iroot has been flushed and flagged down to the superroot, 14356f445d15SMatthew Dillon * but the volume root (vchain) probably has not yet been flagged. 14366f445d15SMatthew Dillon */ 14376f445d15SMatthew Dillon if (hmp->spmp->iroot) { 14386f445d15SMatthew Dillon chain = hmp->spmp->iroot->cluster.array[0].chain; 14396f445d15SMatthew Dillon if (chain) { 14406f445d15SMatthew Dillon hammer2_chain_ref(chain); 14416f445d15SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS); 14426f445d15SMatthew Dillon flush_error |= 14436f445d15SMatthew Dillon hammer2_flush(chain, 14446f445d15SMatthew Dillon HAMMER2_FLUSH_TOP | 14456f445d15SMatthew Dillon HAMMER2_FLUSH_INODE_STOP | 14466f445d15SMatthew Dillon HAMMER2_FLUSH_FSSYNC); 14476f445d15SMatthew Dillon hammer2_chain_unlock(chain); 14486f445d15SMatthew Dillon hammer2_chain_drop(chain); 14496f445d15SMatthew Dillon } 14506f445d15SMatthew Dillon } 14516f445d15SMatthew Dillon 14526f445d15SMatthew Dillon /* 145312ff971cSMatthew Dillon * Media mounts have two 'roots', vchain for the topology 145412ff971cSMatthew Dillon * and fchain for the free block table. Flush both. 145512ff971cSMatthew Dillon * 145612ff971cSMatthew Dillon * Note that the topology and free block table are handled 145712ff971cSMatthew Dillon * independently, so the free block table can wind up being 145812ff971cSMatthew Dillon * ahead of the topology. We depend on the bulk free scan 145912ff971cSMatthew Dillon * code to deal with any loose ends. 146065cacacfSMatthew Dillon * 146165cacacfSMatthew Dillon * vchain and fchain do not error on-lock since their data does 146265cacacfSMatthew Dillon * not have to be re-read from media. 146312ff971cSMatthew Dillon */ 146412ff971cSMatthew Dillon hammer2_chain_ref(&hmp->vchain); 146512ff971cSMatthew Dillon hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 146612ff971cSMatthew Dillon hammer2_chain_ref(&hmp->fchain); 146712ff971cSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 146812ff971cSMatthew Dillon if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 146912ff971cSMatthew Dillon /* 147012ff971cSMatthew Dillon * This will also modify vchain as a side effect, 147112ff971cSMatthew Dillon * mark vchain as modified now. 147212ff971cSMatthew Dillon */ 147312ff971cSMatthew Dillon hammer2_voldata_modify(hmp); 147412ff971cSMatthew Dillon chain = &hmp->fchain; 147565cacacfSMatthew Dillon flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP); 147612ff971cSMatthew Dillon KKASSERT(chain == &hmp->fchain); 147712ff971cSMatthew Dillon } 147812ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 147912ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->vchain); 148012ff971cSMatthew Dillon hammer2_chain_drop(&hmp->fchain); 148112ff971cSMatthew Dillon /* vchain dropped down below */ 148212ff971cSMatthew Dillon 148312ff971cSMatthew Dillon hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 148412ff971cSMatthew Dillon if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 148512ff971cSMatthew Dillon chain = &hmp->vchain; 148665cacacfSMatthew Dillon flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP); 148712ff971cSMatthew Dillon KKASSERT(chain == &hmp->vchain); 148812ff971cSMatthew Dillon } 148912ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->vchain); 149012ff971cSMatthew Dillon hammer2_chain_drop(&hmp->vchain); 149112ff971cSMatthew Dillon 149212ff971cSMatthew Dillon /* 149312ff971cSMatthew Dillon * We can't safely flush the volume header until we have 149412ff971cSMatthew Dillon * flushed any device buffers which have built up. 149512ff971cSMatthew Dillon * 149612ff971cSMatthew Dillon * XXX this isn't being incremental 149712ff971cSMatthew Dillon */ 149812ff971cSMatthew Dillon vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 149965cacacfSMatthew Dillon fsync_error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 150012ff971cSMatthew Dillon vn_unlock(hmp->devvp); 150165cacacfSMatthew Dillon if (fsync_error || flush_error) { 150265cacacfSMatthew Dillon kprintf("hammer2: sync error fsync=%d h2flush=0x%04x dev=%s\n", 150365cacacfSMatthew Dillon fsync_error, flush_error, hmp->devrepname); 150465cacacfSMatthew Dillon } 150512ff971cSMatthew Dillon 150612ff971cSMatthew Dillon /* 150712ff971cSMatthew Dillon * The flush code sets CHAIN_VOLUMESYNC to indicate that the 150812ff971cSMatthew Dillon * volume header needs synchronization via hmp->volsync. 150912ff971cSMatthew Dillon * 151012ff971cSMatthew Dillon * XXX synchronize the flag & data with only this flush XXX 151112ff971cSMatthew Dillon */ 151265cacacfSMatthew Dillon if (fsync_error == 0 && flush_error == 0 && 151312ff971cSMatthew Dillon (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 151412ff971cSMatthew Dillon struct buf *bp; 151565cacacfSMatthew Dillon int vol_error = 0; 151612ff971cSMatthew Dillon 151712ff971cSMatthew Dillon /* 151812ff971cSMatthew Dillon * Synchronize the disk before flushing the volume 151912ff971cSMatthew Dillon * header. 152012ff971cSMatthew Dillon */ 152112ff971cSMatthew Dillon bp = getpbuf(NULL); 152212ff971cSMatthew Dillon bp->b_bio1.bio_offset = 0; 152312ff971cSMatthew Dillon bp->b_bufsize = 0; 152412ff971cSMatthew Dillon bp->b_bcount = 0; 152512ff971cSMatthew Dillon bp->b_cmd = BUF_CMD_FLUSH; 152612ff971cSMatthew Dillon bp->b_bio1.bio_done = biodone_sync; 152712ff971cSMatthew Dillon bp->b_bio1.bio_flags |= BIO_SYNC; 152812ff971cSMatthew Dillon vn_strategy(hmp->devvp, &bp->b_bio1); 152965cacacfSMatthew Dillon fsync_error = biowait(&bp->b_bio1, "h2vol"); 153012ff971cSMatthew Dillon relpbuf(bp, NULL); 153112ff971cSMatthew Dillon 153212ff971cSMatthew Dillon /* 153312ff971cSMatthew Dillon * Then we can safely flush the version of the 153412ff971cSMatthew Dillon * volume header synchronized by the flush code. 153512ff971cSMatthew Dillon */ 153612ff971cSMatthew Dillon j = hmp->volhdrno + 1; 153719808ac9SMatthew Dillon if (j < 0) 153819808ac9SMatthew Dillon j = 0; 153912ff971cSMatthew Dillon if (j >= HAMMER2_NUM_VOLHDRS) 154012ff971cSMatthew Dillon j = 0; 154112ff971cSMatthew Dillon if (j * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 154212ff971cSMatthew Dillon hmp->volsync.volu_size) { 154312ff971cSMatthew Dillon j = 0; 154412ff971cSMatthew Dillon } 15455d37f96dSMatthew Dillon if (hammer2_debug & 0x8000) { 15465d37f96dSMatthew Dillon /* debug only, avoid syslogd loop */ 154712ff971cSMatthew Dillon kprintf("sync volhdr %d %jd\n", 154812ff971cSMatthew Dillon j, (intmax_t)hmp->volsync.volu_size); 15495d37f96dSMatthew Dillon } 155012ff971cSMatthew Dillon bp = getblk(hmp->devvp, j * HAMMER2_ZONE_BYTES64, 155104b8e839SMatthew Dillon HAMMER2_PBUFSIZE, GETBLK_KVABIO, 0); 155212ff971cSMatthew Dillon atomic_clear_int(&hmp->vchain.flags, 155312ff971cSMatthew Dillon HAMMER2_CHAIN_VOLUMESYNC); 155404b8e839SMatthew Dillon bkvasync(bp); 155512ff971cSMatthew Dillon bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 155665cacacfSMatthew Dillon vol_error = bwrite(bp); 155712ff971cSMatthew Dillon hmp->volhdrno = j; 155865cacacfSMatthew Dillon if (vol_error) 155965cacacfSMatthew Dillon fsync_error = vol_error; 156012ff971cSMatthew Dillon } 156165cacacfSMatthew Dillon if (flush_error) 156265cacacfSMatthew Dillon total_error = flush_error; 156365cacacfSMatthew Dillon if (fsync_error) 156465cacacfSMatthew Dillon total_error = hammer2_errno_to_error(fsync_error); 156512ff971cSMatthew Dillon 1566ecfe89b8SMatthew Dillon /* spmp trans */ 1567ecfe89b8SMatthew Dillon hammer2_trans_done(hmp->spmp, HAMMER2_TRANS_ISFLUSH); 156812ff971cSMatthew Dillon skip: 1569c4421f07SMatthew Dillon hammer2_xop_feed(&xop->head, NULL, clindex, total_error); 157012ff971cSMatthew Dillon } 1571