132b800e6SMatthew Dillon /* 28138a154SMatthew Dillon * Copyright (c) 2011-2014 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3550456506SMatthew Dillon /* 3650456506SMatthew Dillon * TRANSACTION AND FLUSH HANDLING 3750456506SMatthew Dillon * 3850456506SMatthew Dillon * Deceptively simple but actually fairly difficult to implement properly is 3950456506SMatthew Dillon * how I would describe it. 4050456506SMatthew Dillon * 41da6f36f4SMatthew Dillon * Flushing generally occurs bottom-up but requires a top-down scan to 42da6f36f4SMatthew Dillon * locate chains with MODIFIED and/or UPDATE bits set. The ONFLUSH flag 43da6f36f4SMatthew Dillon * tells how to recurse downward to find these chains. 4450456506SMatthew Dillon */ 4550456506SMatthew Dillon 4632b800e6SMatthew Dillon #include <sys/cdefs.h> 4732b800e6SMatthew Dillon #include <sys/param.h> 4832b800e6SMatthew Dillon #include <sys/systm.h> 4932b800e6SMatthew Dillon #include <sys/types.h> 5032b800e6SMatthew Dillon #include <sys/lock.h> 5132b800e6SMatthew Dillon #include <sys/uuid.h> 5232b800e6SMatthew Dillon 5332b800e6SMatthew Dillon #include "hammer2.h" 5432b800e6SMatthew Dillon 55925e4ad1SMatthew Dillon #define FLUSH_DEBUG 0 56925e4ad1SMatthew Dillon 57a71db85dSMatthew Dillon #define HAMMER2_FLUSH_DEPTH_LIMIT 10 /* stack recursion limit */ 58a71db85dSMatthew Dillon 59a71db85dSMatthew Dillon 6032b800e6SMatthew Dillon /* 6132b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 6232b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 6332b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 6432b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 6532b800e6SMatthew Dillon * processes. 6632b800e6SMatthew Dillon */ 6732b800e6SMatthew Dillon struct hammer2_flush_info { 680dea3156SMatthew Dillon hammer2_chain_t *parent; 690dea3156SMatthew Dillon hammer2_trans_t *trans; 7032b800e6SMatthew Dillon int depth; 710dea3156SMatthew Dillon int diddeferral; 721897c66eSMatthew Dillon int cache_index; 73da6f36f4SMatthew Dillon struct h2_flush_list flushq; 7450456506SMatthew Dillon hammer2_xid_t sync_xid; /* memory synchronization point */ 75e513e77eSMatthew Dillon hammer2_tid_t mirror_tid; /* avoid digging through hmp */ 76e513e77eSMatthew Dillon hammer2_tid_t modify_tid; 77850687d2SMatthew Dillon hammer2_chain_t *debug; 7832b800e6SMatthew Dillon }; 7932b800e6SMatthew Dillon 8032b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 8132b800e6SMatthew Dillon 828138a154SMatthew Dillon static void hammer2_flush_core(hammer2_flush_info_t *info, 83da6f36f4SMatthew Dillon hammer2_chain_t *chain, int deleting); 84da6f36f4SMatthew Dillon static int hammer2_flush_recurse(hammer2_chain_t *child, void *data); 8593f3933aSMatthew Dillon 8632b800e6SMatthew Dillon /* 8750456506SMatthew Dillon * For now use a global transaction manager. What we ultimately want to do 8850456506SMatthew Dillon * is give each non-overlapping hmp/pmp group its own transaction manager. 8950456506SMatthew Dillon * 9050456506SMatthew Dillon * Transactions govern XID tracking on the physical media (the hmp), but they 9150456506SMatthew Dillon * also govern TID tracking which is per-PFS and thus might cross multiple 92506bd6d1SMatthew Dillon * hmp's. So we can't just stuff tmanage into hammer2_dev or 93506bd6d1SMatthew Dillon * hammer2_pfs. 9450456506SMatthew Dillon */ 9550456506SMatthew Dillon static hammer2_trans_manage_t tmanage; 9650456506SMatthew Dillon 9750456506SMatthew Dillon void 9850456506SMatthew Dillon hammer2_trans_manage_init(void) 9950456506SMatthew Dillon { 10050456506SMatthew Dillon lockinit(&tmanage.translk, "h2trans", 0, 0); 10150456506SMatthew Dillon TAILQ_INIT(&tmanage.transq); 10250456506SMatthew Dillon tmanage.flush_xid = 1; 10350456506SMatthew Dillon tmanage.alloc_xid = tmanage.flush_xid + 1; 10450456506SMatthew Dillon } 10550456506SMatthew Dillon 10650456506SMatthew Dillon hammer2_xid_t 107506bd6d1SMatthew Dillon hammer2_trans_newxid(hammer2_pfs_t *pmp __unused) 10850456506SMatthew Dillon { 10950456506SMatthew Dillon hammer2_xid_t xid; 11050456506SMatthew Dillon 11150456506SMatthew Dillon for (;;) { 11250456506SMatthew Dillon xid = atomic_fetchadd_int(&tmanage.alloc_xid, 1); 11350456506SMatthew Dillon if (xid) 11450456506SMatthew Dillon break; 11550456506SMatthew Dillon } 11650456506SMatthew Dillon return xid; 11750456506SMatthew Dillon } 11850456506SMatthew Dillon 11950456506SMatthew Dillon /* 1200dea3156SMatthew Dillon * Transaction support functions for writing to the filesystem. 1210dea3156SMatthew Dillon * 12210136ab6SMatthew Dillon * Initializing a new transaction allocates a transaction ID. Typically 12310136ab6SMatthew Dillon * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can 12410136ab6SMatthew Dillon * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single 12510136ab6SMatthew Dillon * media target. The latter mode is used by the recovery code. 12610136ab6SMatthew Dillon * 127623d43d4SMatthew Dillon * TWO TRANSACTION IDs can run concurrently, where one is a flush and the 128623d43d4SMatthew Dillon * other is a set of any number of concurrent filesystem operations. We 129623d43d4SMatthew Dillon * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops> 130623d43d4SMatthew Dillon * or we can have <running_flush> + <concurrent_fs_ops>. 1310dea3156SMatthew Dillon * 132623d43d4SMatthew Dillon * During a flush, new fs_ops are only blocked until the fs_ops prior to 133623d43d4SMatthew Dillon * the flush complete. The new fs_ops can then run concurrent with the flush. 134d001f460SMatthew Dillon * 135623d43d4SMatthew Dillon * Buffer-cache transactions operate as fs_ops but never block. A 136623d43d4SMatthew Dillon * buffer-cache flush will run either before or after the current pending 137623d43d4SMatthew Dillon * flush depending on its state. 1380dea3156SMatthew Dillon */ 1390dea3156SMatthew Dillon void 140506bd6d1SMatthew Dillon hammer2_trans_init(hammer2_trans_t *trans, hammer2_pfs_t *pmp, int flags) 1410dea3156SMatthew Dillon { 14250456506SMatthew Dillon hammer2_trans_manage_t *tman; 143a4dc31e0SMatthew Dillon hammer2_trans_t *head; 144d001f460SMatthew Dillon 14550456506SMatthew Dillon tman = &tmanage; 146d001f460SMatthew Dillon 14750456506SMatthew Dillon bzero(trans, sizeof(*trans)); 14850456506SMatthew Dillon trans->pmp = pmp; 149d001f460SMatthew Dillon trans->flags = flags; 150d001f460SMatthew Dillon trans->td = curthread; 15150456506SMatthew Dillon 15250456506SMatthew Dillon lockmgr(&tman->translk, LK_EXCLUSIVE); 153d001f460SMatthew Dillon 154d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 155d001f460SMatthew Dillon /* 156355d67fcSMatthew Dillon * If multiple flushes are trying to run we have to 157a4dc31e0SMatthew Dillon * wait until it is our turn. All flushes are serialized. 158355d67fcSMatthew Dillon * 159a4dc31e0SMatthew Dillon * We queue ourselves and then wait to become the head 160a4dc31e0SMatthew Dillon * of the queue, allowing all prior flushes to complete. 161623d43d4SMatthew Dillon * 1628138a154SMatthew Dillon * Multiple normal transactions can share the current 1638138a154SMatthew Dillon * transaction id but a flush transaction needs its own 1648138a154SMatthew Dillon * unique TID for proper block table update accounting. 165355d67fcSMatthew Dillon */ 16650456506SMatthew Dillon ++tman->flushcnt; 167e513e77eSMatthew Dillon ++pmp->modify_tid; 16850456506SMatthew Dillon tman->flush_xid = hammer2_trans_newxid(pmp); 16950456506SMatthew Dillon trans->sync_xid = tman->flush_xid; 170e513e77eSMatthew Dillon trans->modify_tid = pmp->modify_tid; 17150456506SMatthew Dillon TAILQ_INSERT_TAIL(&tman->transq, trans, entry); 17250456506SMatthew Dillon if (TAILQ_FIRST(&tman->transq) != trans) { 173d001f460SMatthew Dillon trans->blocked = 1; 174d001f460SMatthew Dillon while (trans->blocked) { 17550456506SMatthew Dillon lksleep(&trans->sync_xid, &tman->translk, 176a4dc31e0SMatthew Dillon 0, "h2multf", hz); 177d001f460SMatthew Dillon } 178d001f460SMatthew Dillon } 17950456506SMatthew Dillon } else if (tman->flushcnt == 0) { 180a7720be7SMatthew Dillon /* 18150456506SMatthew Dillon * No flushes are pending, we can go. Use prior flush_xid + 1. 18250456506SMatthew Dillon * 183da6f36f4SMatthew Dillon * WARNING! Also see hammer2_chain_setflush() 184a7720be7SMatthew Dillon */ 18550456506SMatthew Dillon TAILQ_INSERT_TAIL(&tman->transq, trans, entry); 18650456506SMatthew Dillon trans->sync_xid = tman->flush_xid + 1; 187a7720be7SMatthew Dillon 188a4dc31e0SMatthew Dillon /* XXX improve/optimize inode allocation */ 189052e0aa0SMatthew Dillon } else if (trans->flags & HAMMER2_TRANS_BUFCACHE) { 190052e0aa0SMatthew Dillon /* 191052e0aa0SMatthew Dillon * A buffer cache transaction is requested while a flush 192052e0aa0SMatthew Dillon * is in progress. The flush's PREFLUSH flag must be set 193052e0aa0SMatthew Dillon * in this situation. 194052e0aa0SMatthew Dillon * 195052e0aa0SMatthew Dillon * The buffer cache flush takes on the main flush's 196052e0aa0SMatthew Dillon * transaction id. 197052e0aa0SMatthew Dillon */ 19850456506SMatthew Dillon TAILQ_FOREACH(head, &tman->transq, entry) { 199052e0aa0SMatthew Dillon if (head->flags & HAMMER2_TRANS_ISFLUSH) 200052e0aa0SMatthew Dillon break; 201052e0aa0SMatthew Dillon } 202052e0aa0SMatthew Dillon KKASSERT(head); 203052e0aa0SMatthew Dillon KKASSERT(head->flags & HAMMER2_TRANS_PREFLUSH); 204052e0aa0SMatthew Dillon trans->flags |= HAMMER2_TRANS_PREFLUSH; 20550456506SMatthew Dillon TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry); 20650456506SMatthew Dillon trans->sync_xid = head->sync_xid; 207e513e77eSMatthew Dillon trans->modify_tid = head->modify_tid; 208052e0aa0SMatthew Dillon trans->flags |= HAMMER2_TRANS_CONCURRENT; 209052e0aa0SMatthew Dillon /* not allowed to block */ 210a4dc31e0SMatthew Dillon } else { 211a4dc31e0SMatthew Dillon /* 212052e0aa0SMatthew Dillon * A normal transaction is requested while a flush is in 213052e0aa0SMatthew Dillon * progress. We insert after the current flush and may 21450456506SMatthew Dillon * block. 21550456506SMatthew Dillon * 216da6f36f4SMatthew Dillon * WARNING! Also see hammer2_chain_setflush() 217a4dc31e0SMatthew Dillon */ 21850456506SMatthew Dillon TAILQ_FOREACH(head, &tman->transq, entry) { 219a4dc31e0SMatthew Dillon if (head->flags & HAMMER2_TRANS_ISFLUSH) 220a4dc31e0SMatthew Dillon break; 221a7720be7SMatthew Dillon } 222a4dc31e0SMatthew Dillon KKASSERT(head); 22350456506SMatthew Dillon TAILQ_INSERT_AFTER(&tman->transq, head, trans, entry); 22450456506SMatthew Dillon trans->sync_xid = head->sync_xid + 1; 2258138a154SMatthew Dillon trans->flags |= HAMMER2_TRANS_CONCURRENT; 226a4dc31e0SMatthew Dillon 2278138a154SMatthew Dillon /* 228052e0aa0SMatthew Dillon * XXX for now we must block new transactions, synchronous 229052e0aa0SMatthew Dillon * flush mode is on by default. 230052e0aa0SMatthew Dillon * 2318138a154SMatthew Dillon * If synchronous flush mode is enabled concurrent 2328138a154SMatthew Dillon * frontend transactions during the flush are not 2338138a154SMatthew Dillon * allowed (except we don't have a choice for buffer 2348138a154SMatthew Dillon * cache ops). 2358138a154SMatthew Dillon */ 2368138a154SMatthew Dillon if (hammer2_synchronous_flush > 0 || 23750456506SMatthew Dillon TAILQ_FIRST(&tman->transq) != head) { 238a4dc31e0SMatthew Dillon trans->blocked = 1; 239a4dc31e0SMatthew Dillon while (trans->blocked) { 2400bdddbf4SMatthew Dillon lksleep(&trans->sync_xid, &tman->translk, 2410bdddbf4SMatthew Dillon 0, "h2multf", hz); 242a4dc31e0SMatthew Dillon } 243a4dc31e0SMatthew Dillon } 244a4dc31e0SMatthew Dillon } 245044541cdSMatthew Dillon if (flags & HAMMER2_TRANS_NEWINODE) { 24650456506SMatthew Dillon if (pmp->spmp_hmp) { 24750456506SMatthew Dillon /* 24850456506SMatthew Dillon * Super-root transaction, all new inodes have an 24950456506SMatthew Dillon * inode number of 1. Normal pfs inode cache 25050456506SMatthew Dillon * semantics are not used. 25150456506SMatthew Dillon */ 25250456506SMatthew Dillon trans->inode_tid = 1; 25350456506SMatthew Dillon } else { 25450456506SMatthew Dillon /* 25550456506SMatthew Dillon * Normal transaction 25650456506SMatthew Dillon */ 25750456506SMatthew Dillon if (pmp->inode_tid < HAMMER2_INODE_START) 25850456506SMatthew Dillon pmp->inode_tid = HAMMER2_INODE_START; 25950456506SMatthew Dillon trans->inode_tid = pmp->inode_tid++; 260044541cdSMatthew Dillon } 26150456506SMatthew Dillon } 26250456506SMatthew Dillon 26350456506SMatthew Dillon lockmgr(&tman->translk, LK_RELEASE); 264a7720be7SMatthew Dillon } 265a7720be7SMatthew Dillon 2660dea3156SMatthew Dillon void 2670dea3156SMatthew Dillon hammer2_trans_done(hammer2_trans_t *trans) 2680dea3156SMatthew Dillon { 26950456506SMatthew Dillon hammer2_trans_manage_t *tman; 270a4dc31e0SMatthew Dillon hammer2_trans_t *head; 271d001f460SMatthew Dillon hammer2_trans_t *scan; 272a02dfba1SMatthew Dillon 27350456506SMatthew Dillon tman = &tmanage; 274a5913bdfSMatthew Dillon 275a4dc31e0SMatthew Dillon /* 2768138a154SMatthew Dillon * Remove. 277a4dc31e0SMatthew Dillon */ 27850456506SMatthew Dillon lockmgr(&tman->translk, LK_EXCLUSIVE); 27950456506SMatthew Dillon TAILQ_REMOVE(&tman->transq, trans, entry); 28050456506SMatthew Dillon head = TAILQ_FIRST(&tman->transq); 2818138a154SMatthew Dillon 2828138a154SMatthew Dillon /* 2838138a154SMatthew Dillon * Adjust flushcnt if this was a flush, clear TRANS_CONCURRENT 2848138a154SMatthew Dillon * up through the next flush. (If the head is a flush then we 2858138a154SMatthew Dillon * stop there, unlike the unblock code following this section). 2868138a154SMatthew Dillon */ 2878138a154SMatthew Dillon if (trans->flags & HAMMER2_TRANS_ISFLUSH) { 28850456506SMatthew Dillon --tman->flushcnt; 2898138a154SMatthew Dillon scan = head; 2908138a154SMatthew Dillon while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { 2918138a154SMatthew Dillon atomic_clear_int(&scan->flags, 2928138a154SMatthew Dillon HAMMER2_TRANS_CONCURRENT); 293052e0aa0SMatthew Dillon scan = TAILQ_NEXT(scan, entry); 2948138a154SMatthew Dillon } 2958138a154SMatthew Dillon } 296a4dc31e0SMatthew Dillon 297355d67fcSMatthew Dillon /* 298a4dc31e0SMatthew Dillon * Unblock the head of the queue and any additional transactions 2998138a154SMatthew Dillon * up to the next flush. The head can be a flush and it will be 3008138a154SMatthew Dillon * unblocked along with the non-flush transactions following it 3018138a154SMatthew Dillon * (which are allowed to run concurrently with it). 3028138a154SMatthew Dillon * 3038138a154SMatthew Dillon * In synchronous flush mode we stop if the head transaction is 3048138a154SMatthew Dillon * a flush. 305355d67fcSMatthew Dillon */ 306a4dc31e0SMatthew Dillon if (head && head->blocked) { 307a4dc31e0SMatthew Dillon head->blocked = 0; 30850456506SMatthew Dillon wakeup(&head->sync_xid); 309a4dc31e0SMatthew Dillon 3108138a154SMatthew Dillon if (hammer2_synchronous_flush > 0) 3118138a154SMatthew Dillon scan = head; 3128138a154SMatthew Dillon else 313a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(head, entry); 314a4dc31e0SMatthew Dillon while (scan && (scan->flags & HAMMER2_TRANS_ISFLUSH) == 0) { 315925e4ad1SMatthew Dillon if (scan->blocked) { 316a4dc31e0SMatthew Dillon scan->blocked = 0; 31750456506SMatthew Dillon wakeup(&scan->sync_xid); 318925e4ad1SMatthew Dillon } 319a4dc31e0SMatthew Dillon scan = TAILQ_NEXT(scan, entry); 320a02dfba1SMatthew Dillon } 321a02dfba1SMatthew Dillon } 32250456506SMatthew Dillon lockmgr(&tman->translk, LK_RELEASE); 323a02dfba1SMatthew Dillon } 324a02dfba1SMatthew Dillon 3250dea3156SMatthew Dillon /* 3260dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 327e513e77eSMatthew Dillon * synchronization point, propagating parent chain modifications, modify_tid, 328e513e77eSMatthew Dillon * and mirror_tid updates back up as needed. 3290dea3156SMatthew Dillon * 3300dea3156SMatthew Dillon * Caller must have interlocked against any non-flush-related modifying 331da6f36f4SMatthew Dillon * operations in progress whos XXX values are less than or equal 33250456506SMatthew Dillon * to the passed sync_xid. 3330dea3156SMatthew Dillon * 3340dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 3350dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 3360dea3156SMatthew Dillon * these sorts of synchronization points. 3370dea3156SMatthew Dillon * 33832b800e6SMatthew Dillon * This routine can be called from several places but the most important 3398138a154SMatthew Dillon * is from VFS_SYNC. 34032b800e6SMatthew Dillon * 341da6f36f4SMatthew Dillon * chain is locked on call and will remain locked on return. The chain's 342da6f36f4SMatthew Dillon * UPDATE flag indicates that its parent's block table (which is not yet 343da6f36f4SMatthew Dillon * part of the flush) should be updated. The chain may be replaced by 344da6f36f4SMatthew Dillon * the call if it was modified. 34532b800e6SMatthew Dillon */ 34632b800e6SMatthew Dillon void 347da6f36f4SMatthew Dillon hammer2_flush(hammer2_trans_t *trans, hammer2_chain_t *chain) 34832b800e6SMatthew Dillon { 34932b800e6SMatthew Dillon hammer2_chain_t *scan; 35032b800e6SMatthew Dillon hammer2_flush_info_t info; 351925e4ad1SMatthew Dillon int loops; 35232b800e6SMatthew Dillon 35332b800e6SMatthew Dillon /* 35432b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 35532b800e6SMatthew Dillon * 35632b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 35732b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 35832b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 35932b800e6SMatthew Dillon * for re-execution after the stack has been popped. 36032b800e6SMatthew Dillon */ 36132b800e6SMatthew Dillon bzero(&info, sizeof(info)); 362da6f36f4SMatthew Dillon TAILQ_INIT(&info.flushq); 3630dea3156SMatthew Dillon info.trans = trans; 36450456506SMatthew Dillon info.sync_xid = trans->sync_xid; 3651897c66eSMatthew Dillon info.cache_index = -1; 36632b800e6SMatthew Dillon 367da6f36f4SMatthew Dillon /* 368da6f36f4SMatthew Dillon * Calculate parent (can be NULL), if not NULL the flush core 369da6f36f4SMatthew Dillon * expects the parent to be referenced so it can easily lock/unlock 370da6f36f4SMatthew Dillon * it without it getting ripped up. 371da6f36f4SMatthew Dillon */ 372da6f36f4SMatthew Dillon if ((info.parent = chain->parent) != NULL) 373da6f36f4SMatthew Dillon hammer2_chain_ref(info.parent); 374731b2a84SMatthew Dillon 375a7720be7SMatthew Dillon /* 376a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 377a7720be7SMatthew Dillon * chain. 378a7720be7SMatthew Dillon */ 379a7720be7SMatthew Dillon hammer2_chain_ref(chain); 380925e4ad1SMatthew Dillon loops = 0; 381a7720be7SMatthew Dillon 3820dea3156SMatthew Dillon for (;;) { 38332b800e6SMatthew Dillon /* 3840dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 3858138a154SMatthew Dillon * can leave the FLUSH_* bits set for these chains, which 3868138a154SMatthew Dillon * will be handled when we [re]flush chain after the unwind. 38732b800e6SMatthew Dillon */ 388da6f36f4SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flushq)) != NULL) { 38932b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 390da6f36f4SMatthew Dillon TAILQ_REMOVE(&info.flushq, scan, flush_node); 39132b800e6SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED); 39232b800e6SMatthew Dillon 39332b800e6SMatthew Dillon /* 39432b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 39532b800e6SMatthew Dillon * recursion on the deferred elements. 396053e752cSMatthew Dillon * 3978138a154SMatthew Dillon * NOTE: hammer2_flush() may replace scan. 39832b800e6SMatthew Dillon */ 39932b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 400053e752cSMatthew Dillon kprintf("deferred flush %p\n", scan); 4010dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 402da6f36f4SMatthew Dillon hammer2_flush(trans, scan); 4030dea3156SMatthew Dillon hammer2_chain_unlock(scan); 404e513e77eSMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 40532b800e6SMatthew Dillon } 40632b800e6SMatthew Dillon 40732b800e6SMatthew Dillon /* 408925e4ad1SMatthew Dillon * [re]flush chain. 40932b800e6SMatthew Dillon */ 4100dea3156SMatthew Dillon info.diddeferral = 0; 411da6f36f4SMatthew Dillon hammer2_flush_core(&info, chain, 0); 41232b800e6SMatthew Dillon 41332b800e6SMatthew Dillon /* 4140dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 41532b800e6SMatthew Dillon */ 416da6f36f4SMatthew Dillon if (TAILQ_EMPTY(&info.flushq)) 41732b800e6SMatthew Dillon break; 418925e4ad1SMatthew Dillon 419925e4ad1SMatthew Dillon if (++loops % 1000 == 0) { 4208138a154SMatthew Dillon kprintf("hammer2_flush: excessive loops on %p\n", 421925e4ad1SMatthew Dillon chain); 422925e4ad1SMatthew Dillon if (hammer2_debug & 0x100000) 423925e4ad1SMatthew Dillon Debugger("hell4"); 424925e4ad1SMatthew Dillon } 42532b800e6SMatthew Dillon } 426a7720be7SMatthew Dillon hammer2_chain_drop(chain); 427da6f36f4SMatthew Dillon if (info.parent) 428da6f36f4SMatthew Dillon hammer2_chain_drop(info.parent); 42932b800e6SMatthew Dillon } 43032b800e6SMatthew Dillon 431476d2aadSMatthew Dillon /* 432ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 433a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 4348138a154SMatthew Dillon * locked and will have an extra ref on return. Upon return, the caller can 435da6f36f4SMatthew Dillon * test the UPDATE bit on the child to determine if the parent needs updating. 436a7720be7SMatthew Dillon * 4378138a154SMatthew Dillon * (1) Determine if this node is a candidate for the flush, return if it is 4388138a154SMatthew Dillon * not. fchain and vchain are always candidates for the flush. 4390dea3156SMatthew Dillon * 4408138a154SMatthew Dillon * (2) If we recurse too deep the chain is entered onto the deferral list and 4418138a154SMatthew Dillon * the current flush stack is aborted until after the deferral list is 4428138a154SMatthew Dillon * run. 4438138a154SMatthew Dillon * 4448138a154SMatthew Dillon * (3) Recursively flush live children (rbtree). This can create deferrals. 445da6f36f4SMatthew Dillon * A successful flush clears the MODIFIED and UPDATE bits on the children 446da6f36f4SMatthew Dillon * and typically causes the parent to be marked MODIFIED as the children 447da6f36f4SMatthew Dillon * update the parent's block table. A parent might already be marked 448da6f36f4SMatthew Dillon * MODIFIED due to a deletion (whos blocktable update in the parent is 449da6f36f4SMatthew Dillon * handled by the frontend), or if the parent itself is modified by the 450da6f36f4SMatthew Dillon * frontend for other reasons. 4518138a154SMatthew Dillon * 452da6f36f4SMatthew Dillon * (4) Permanently disconnected sub-trees are cleaned up by the front-end. 453da6f36f4SMatthew Dillon * Deleted-but-open inodes can still be individually flushed via the 454da6f36f4SMatthew Dillon * filesystem syncer. 4558138a154SMatthew Dillon * 456da6f36f4SMatthew Dillon * (5) Note that an unmodified child may still need the block table in its 457da6f36f4SMatthew Dillon * parent updated (e.g. rename/move). The child will have UPDATE set 458da6f36f4SMatthew Dillon * in this case. 4598138a154SMatthew Dillon * 46050456506SMatthew Dillon * WARNING ON BREF MODIFY_TID/MIRROR_TID 461925e4ad1SMatthew Dillon * 462e513e77eSMatthew Dillon * blockref.modify_tid is consistent only within a PFS, and will not be 463e513e77eSMatthew Dillon * consistent during synchronization. mirror_tid is consistent across the 464e513e77eSMatthew Dillon * block device regardless of the PFS. 465476d2aadSMatthew Dillon */ 46632b800e6SMatthew Dillon static void 467da6f36f4SMatthew Dillon hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain, 4688138a154SMatthew Dillon int deleting) 46932b800e6SMatthew Dillon { 470da6f36f4SMatthew Dillon hammer2_chain_t *parent; 471506bd6d1SMatthew Dillon hammer2_dev_t *hmp; 472925e4ad1SMatthew Dillon int diddeferral; 473da6f36f4SMatthew Dillon 474da6f36f4SMatthew Dillon /* 475da6f36f4SMatthew Dillon * (1) Optimize downward recursion to locate nodes needing action. 476da6f36f4SMatthew Dillon * Nothing to do if none of these flags are set. 477da6f36f4SMatthew Dillon */ 478850687d2SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0) { 479850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 480850687d2SMatthew Dillon if (info->debug == NULL) 481850687d2SMatthew Dillon info->debug = chain; 482850687d2SMatthew Dillon } else { 483da6f36f4SMatthew Dillon return; 484850687d2SMatthew Dillon } 485850687d2SMatthew Dillon } 48632b800e6SMatthew Dillon 487a5913bdfSMatthew Dillon hmp = chain->hmp; 488925e4ad1SMatthew Dillon diddeferral = info->diddeferral; 489da6f36f4SMatthew Dillon parent = info->parent; /* can be NULL */ 490925e4ad1SMatthew Dillon 4910924b3f8SMatthew Dillon /* 492da6f36f4SMatthew Dillon * Downward search recursion 493ea155208SMatthew Dillon */ 494925e4ad1SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_DEFERRED) { 495da6f36f4SMatthew Dillon /* 496da6f36f4SMatthew Dillon * Already deferred. 497da6f36f4SMatthew Dillon */ 498925e4ad1SMatthew Dillon ++info->diddeferral; 499925e4ad1SMatthew Dillon } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 500da6f36f4SMatthew Dillon /* 501da6f36f4SMatthew Dillon * Recursion depth reached. 502da6f36f4SMatthew Dillon */ 5030dea3156SMatthew Dillon hammer2_chain_ref(chain); 504da6f36f4SMatthew Dillon TAILQ_INSERT_TAIL(&info->flushq, chain, flush_node); 505da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED); 506925e4ad1SMatthew Dillon ++info->diddeferral; 507da6f36f4SMatthew Dillon } else if (chain->flags & HAMMER2_CHAIN_ONFLUSH) { 5088138a154SMatthew Dillon /* 509da6f36f4SMatthew Dillon * Downward recursion search (actual flush occurs bottom-up). 510da6f36f4SMatthew Dillon * pre-clear ONFLUSH. It can get set again due to races, 511da6f36f4SMatthew Dillon * which we want so the scan finds us again in the next flush. 5128138a154SMatthew Dillon */ 513da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 5148138a154SMatthew Dillon info->parent = chain; 51594491fa0SMatthew Dillon hammer2_spin_ex(&chain->core.spin); 516da6f36f4SMatthew Dillon RB_SCAN(hammer2_chain_tree, &chain->core.rbtree, 517da6f36f4SMatthew Dillon NULL, hammer2_flush_recurse, info); 51894491fa0SMatthew Dillon hammer2_spin_unex(&chain->core.spin); 519da6f36f4SMatthew Dillon info->parent = parent; 520da6f36f4SMatthew Dillon if (info->diddeferral) 521da6f36f4SMatthew Dillon hammer2_chain_setflush(info->trans, chain); 5228138a154SMatthew Dillon } 5230924b3f8SMatthew Dillon 52432b800e6SMatthew Dillon /* 525da6f36f4SMatthew Dillon * Now we are in the bottom-up part of the recursion. 526da6f36f4SMatthew Dillon * 527da6f36f4SMatthew Dillon * Do not update chain if lower layers were deferred. 5288138a154SMatthew Dillon */ 529da6f36f4SMatthew Dillon if (info->diddeferral) 5308138a154SMatthew Dillon goto done; 5318138a154SMatthew Dillon 5328138a154SMatthew Dillon /* 533da6f36f4SMatthew Dillon * Propagate the DESTROY flag downwards. This dummies up the flush 534da6f36f4SMatthew Dillon * code and tries to invalidate related buffer cache buffers to 535da6f36f4SMatthew Dillon * avoid the disk write. 536623d43d4SMatthew Dillon */ 537da6f36f4SMatthew Dillon if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 538da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 539623d43d4SMatthew Dillon 540623d43d4SMatthew Dillon /* 541da6f36f4SMatthew Dillon * Chain was already modified or has become modified, flush it out. 542da6f36f4SMatthew Dillon */ 543da6f36f4SMatthew Dillon again: 544850687d2SMatthew Dillon if ((hammer2_debug & 0x200) && 545850687d2SMatthew Dillon info->debug && 546850687d2SMatthew Dillon (chain->flags & (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_UPDATE))) { 547850687d2SMatthew Dillon hammer2_chain_t *scan = chain; 548850687d2SMatthew Dillon 549850687d2SMatthew Dillon kprintf("DISCONNECTED FLUSH %p->%p\n", info->debug, chain); 550850687d2SMatthew Dillon while (scan) { 551850687d2SMatthew Dillon kprintf(" chain %p [%08x] bref=%016jx:%02x\n", 552850687d2SMatthew Dillon scan, scan->flags, 553850687d2SMatthew Dillon scan->bref.key, scan->bref.type); 554850687d2SMatthew Dillon if (scan == info->debug) 555850687d2SMatthew Dillon break; 556850687d2SMatthew Dillon scan = scan->parent; 557850687d2SMatthew Dillon } 558850687d2SMatthew Dillon } 559850687d2SMatthew Dillon 560da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 561da6f36f4SMatthew Dillon /* 562e513e77eSMatthew Dillon * Dispose of the modified bit. 563e513e77eSMatthew Dillon * 564e513e77eSMatthew Dillon * UPDATE should already be set. 565e513e77eSMatthew Dillon * bref.mirror_tid should already be set. 56632b800e6SMatthew Dillon */ 567da6f36f4SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) || 56850456506SMatthew Dillon chain == &hmp->vchain); 5690dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 5708db69c9fSMatthew Dillon 5718db69c9fSMatthew Dillon /* 572e513e77eSMatthew Dillon * Manage threads waiting for excessive dirty memory to 573e513e77eSMatthew Dillon * be retired. 5748db69c9fSMatthew Dillon */ 575e513e77eSMatthew Dillon if (chain->pmp) 576e513e77eSMatthew Dillon hammer2_pfs_memory_wakeup(chain->pmp); 5778138a154SMatthew Dillon 578da6f36f4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) || 5798138a154SMatthew Dillon chain == &hmp->vchain || 5808138a154SMatthew Dillon chain == &hmp->fchain) { 5818138a154SMatthew Dillon /* 5828138a154SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared, 5838138a154SMatthew Dillon * net -1 ref. 5848138a154SMatthew Dillon */ 5850dea3156SMatthew Dillon hammer2_chain_drop(chain); 5868138a154SMatthew Dillon } else { 5878138a154SMatthew Dillon /* 5888138a154SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared and 589da6f36f4SMatthew Dillon * set a ref for the UPDATE bit we are setting. Net 590da6f36f4SMatthew Dillon * 0 refs. 5918138a154SMatthew Dillon */ 592da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 5930dea3156SMatthew Dillon } 5940dea3156SMatthew Dillon 5950dea3156SMatthew Dillon /* 596a71db85dSMatthew Dillon * Issue the flush. This is indirect via the DIO. 5970dea3156SMatthew Dillon * 598a71db85dSMatthew Dillon * NOTE: A DELETED node that reaches this point must be 599a71db85dSMatthew Dillon * flushed for synchronization point consistency. 600a71db85dSMatthew Dillon * 601a71db85dSMatthew Dillon * NOTE: Even though MODIFIED was already set, the related DIO 602a71db85dSMatthew Dillon * might not be dirty due to a system buffer cache 603a71db85dSMatthew Dillon * flush and must be set dirty if we are going to make 604a71db85dSMatthew Dillon * further modifications to the buffer. Chains with 605a71db85dSMatthew Dillon * embedded data don't need this. 6060dea3156SMatthew Dillon */ 607a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 608da6f36f4SMatthew Dillon kprintf("Flush %p.%d %016jx/%d sync_xid=%08x " 609da6f36f4SMatthew Dillon "data=%016jx\n", 610a7720be7SMatthew Dillon chain, chain->bref.type, 611a7720be7SMatthew Dillon chain->bref.key, chain->bref.keybits, 612da6f36f4SMatthew Dillon info->sync_xid, 613da6f36f4SMatthew Dillon chain->bref.data_off); 614a7720be7SMatthew Dillon } 615a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 616a7720be7SMatthew Dillon Debugger("Flush hell"); 617a7720be7SMatthew Dillon } 61810136ab6SMatthew Dillon 61932b800e6SMatthew Dillon /* 620da6f36f4SMatthew Dillon * Update chain CRCs for flush. 62132b800e6SMatthew Dillon * 622da6f36f4SMatthew Dillon * NOTE: Volume headers are NOT flushed here as they require 623da6f36f4SMatthew Dillon * special processing. 62432b800e6SMatthew Dillon */ 62532b800e6SMatthew Dillon switch(chain->bref.type) { 6261a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 627a71db85dSMatthew Dillon /* 628e513e77eSMatthew Dillon * Update the volume header's freemap_tid to the 629e513e77eSMatthew Dillon * freemap's flushing mirror_tid. 630e513e77eSMatthew Dillon * 631a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 632a71db85dSMatthew Dillon */ 63350456506SMatthew Dillon KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED); 634e513e77eSMatthew Dillon KKASSERT(chain == &hmp->fchain); 635e513e77eSMatthew Dillon hmp->voldata.freemap_tid = chain->bref.mirror_tid; 636e513e77eSMatthew Dillon kprintf("sync freemap mirror_tid %08jx\n", 637e513e77eSMatthew Dillon (intmax_t)chain->bref.mirror_tid); 638e513e77eSMatthew Dillon 639e513e77eSMatthew Dillon /* 640e513e77eSMatthew Dillon * The freemap can be flushed independently of the 641e513e77eSMatthew Dillon * main topology, but for the case where it is 642e513e77eSMatthew Dillon * flushed in the same transaction, and flushed 643e513e77eSMatthew Dillon * before vchain (a case we want to allow for 644e513e77eSMatthew Dillon * performance reasons), make sure modifications 645e513e77eSMatthew Dillon * made during the flush under vchain use a new 646e513e77eSMatthew Dillon * transaction id. 647e513e77eSMatthew Dillon * 648e513e77eSMatthew Dillon * Otherwise the mount recovery code will get confused. 649e513e77eSMatthew Dillon */ 650e513e77eSMatthew Dillon ++hmp->voldata.mirror_tid; 6511a7cfe5aSMatthew Dillon break; 65232b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 65332b800e6SMatthew Dillon /* 654e513e77eSMatthew Dillon * The free block table is flushed by 655e513e77eSMatthew Dillon * hammer2_vfs_sync() before it flushes vchain. 656e513e77eSMatthew Dillon * We must still hold fchain locked while copying 657e513e77eSMatthew Dillon * voldata to volsync, however. 658a71db85dSMatthew Dillon * 659a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 6601a7cfe5aSMatthew Dillon */ 66150456506SMatthew Dillon hammer2_voldata_lock(hmp); 662da6f36f4SMatthew Dillon hammer2_chain_lock(&hmp->fchain, 663da6f36f4SMatthew Dillon HAMMER2_RESOLVE_ALWAYS); 664e513e77eSMatthew Dillon kprintf("sync volume mirror_tid %08jx\n", 665da6f36f4SMatthew Dillon (intmax_t)chain->bref.mirror_tid); 6661a7cfe5aSMatthew Dillon 6671a7cfe5aSMatthew Dillon /* 668e513e77eSMatthew Dillon * Update the volume header's mirror_tid to the 669e513e77eSMatthew Dillon * main topology's flushing mirror_tid. It is 670e513e77eSMatthew Dillon * possible that voldata.mirror_tid is already 671e513e77eSMatthew Dillon * beyond bref.mirror_tid due to the bump we made 672e513e77eSMatthew Dillon * above in BREF_TYPE_FREEMAP. 673e513e77eSMatthew Dillon */ 674e513e77eSMatthew Dillon if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) { 675e513e77eSMatthew Dillon hmp->voldata.mirror_tid = 676e513e77eSMatthew Dillon chain->bref.mirror_tid; 677e513e77eSMatthew Dillon } 678e513e77eSMatthew Dillon 679e513e77eSMatthew Dillon /* 680da6f36f4SMatthew Dillon * The volume header is flushed manually by the 681da6f36f4SMatthew Dillon * syncer, not here. All we do here is adjust the 682da6f36f4SMatthew Dillon * crc's. 68332b800e6SMatthew Dillon */ 68432b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 685fdf62707SMatthew Dillon KKASSERT(chain->dio == NULL); 68632b800e6SMatthew Dillon 68732b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 68832b800e6SMatthew Dillon hammer2_icrc32( 68932b800e6SMatthew Dillon (char *)&hmp->voldata + 69032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 69132b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 69232b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 69332b800e6SMatthew Dillon hammer2_icrc32( 69432b800e6SMatthew Dillon (char *)&hmp->voldata + 69532b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 69632b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 69732b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 69832b800e6SMatthew Dillon hammer2_icrc32( 69932b800e6SMatthew Dillon (char *)&hmp->voldata + 70032b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 70132b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 702e513e77eSMatthew Dillon 703e513e77eSMatthew Dillon kprintf("syncvolhdr %016jx %016jx\n", 704e513e77eSMatthew Dillon hmp->voldata.mirror_tid, 705e513e77eSMatthew Dillon hmp->vchain.bref.mirror_tid); 70632b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 7070dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 70893f3933aSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 70950456506SMatthew Dillon hammer2_voldata_unlock(hmp); 71032b800e6SMatthew Dillon break; 71132b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 71232b800e6SMatthew Dillon /* 713da6f36f4SMatthew Dillon * Data elements have already been flushed via the 714da6f36f4SMatthew Dillon * logical file buffer cache. Their hash was set in 715a71db85dSMatthew Dillon * the bref by the vop_write code. Do not re-dirty. 71632b800e6SMatthew Dillon * 717da6f36f4SMatthew Dillon * Make sure any device buffer(s) have been flushed 718da6f36f4SMatthew Dillon * out here (there aren't usually any to flush) XXX. 71932b800e6SMatthew Dillon */ 72032b800e6SMatthew Dillon break; 721512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 7221a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 72391caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 724da6f36f4SMatthew Dillon /* 725da6f36f4SMatthew Dillon * Buffer I/O will be cleaned up when the volume is 726da6f36f4SMatthew Dillon * flushed (but the kernel is free to flush it before 727da6f36f4SMatthew Dillon * then, as well). 728da6f36f4SMatthew Dillon */ 72950456506SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 730a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 73150456506SMatthew Dillon break; 73291caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 733a71db85dSMatthew Dillon /* 734a71db85dSMatthew Dillon * NOTE: We must call io_setdirty() to make any late 735a71db85dSMatthew Dillon * changes to the inode data, the system might 736a71db85dSMatthew Dillon * have already flushed the buffer. 737a71db85dSMatthew Dillon */ 738da6f36f4SMatthew Dillon if (chain->data->ipdata.op_flags & 739da6f36f4SMatthew Dillon HAMMER2_OPFLAG_PFSROOT) { 740837bd39bSMatthew Dillon /* 741da6f36f4SMatthew Dillon * non-NULL pmp if mounted as a PFS. We must 74218e8ab5fSMatthew Dillon * sync fields cached in the pmp? XXX 743837bd39bSMatthew Dillon */ 744837bd39bSMatthew Dillon hammer2_inode_data_t *ipdata; 745837bd39bSMatthew Dillon 746a71db85dSMatthew Dillon hammer2_io_setdirty(chain->dio); 747837bd39bSMatthew Dillon ipdata = &chain->data->ipdata; 748e513e77eSMatthew Dillon if (chain->pmp) { 749e513e77eSMatthew Dillon ipdata->pfs_inum = 750e513e77eSMatthew Dillon chain->pmp->inode_tid; 751e513e77eSMatthew Dillon } 75250456506SMatthew Dillon } else { 75350456506SMatthew Dillon /* can't be mounted as a PFS */ 75450456506SMatthew Dillon } 755b3659de2SMatthew Dillon 756b3659de2SMatthew Dillon /* 757b3659de2SMatthew Dillon * Update inode statistics. Pending stats in chain 758b3659de2SMatthew Dillon * are cleared out on UPDATE so expect that bit to 759b3659de2SMatthew Dillon * be set here too or the statistics will not be 760b3659de2SMatthew Dillon * rolled-up properly. 761*0cc33e20SMatthew Dillon * 762*0cc33e20SMatthew Dillon * (note: rollup data does not effect modify_tid 763*0cc33e20SMatthew Dillon * based synchronization checks and can be 764*0cc33e20SMatthew Dillon * different). 765b3659de2SMatthew Dillon */ 766a71db85dSMatthew Dillon if (chain->data_count || chain->inode_count) { 767b3659de2SMatthew Dillon hammer2_inode_data_t *ipdata; 768b3659de2SMatthew Dillon 769b3659de2SMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_UPDATE); 770a71db85dSMatthew Dillon hammer2_io_setdirty(chain->dio); 771b3659de2SMatthew Dillon ipdata = &chain->data->ipdata; 772b3659de2SMatthew Dillon ipdata->data_count += chain->data_count; 773b3659de2SMatthew Dillon ipdata->inode_count += chain->inode_count; 774b3659de2SMatthew Dillon } 775512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 776a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 7771a7cfe5aSMatthew Dillon break; 77832b800e6SMatthew Dillon default: 77991caa51cSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 780da6f36f4SMatthew Dillon panic("hammer2_flush_core: unsupported " 781da6f36f4SMatthew Dillon "embedded bref %d", 78291caa51cSMatthew Dillon chain->bref.type); 78391caa51cSMatthew Dillon /* NOT REACHED */ 78432b800e6SMatthew Dillon } 78532b800e6SMatthew Dillon 78632b800e6SMatthew Dillon /* 787da6f36f4SMatthew Dillon * If the chain was destroyed try to avoid unnecessary I/O. 788da6f36f4SMatthew Dillon * (this only really works if the DIO system buffer is the 789da6f36f4SMatthew Dillon * same size as chain->bytes). 790da6f36f4SMatthew Dillon */ 79105dd26e4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DESTROY) && chain->dio) { 792da6f36f4SMatthew Dillon hammer2_io_setinval(chain->dio, chain->bytes); 793da6f36f4SMatthew Dillon } 794da6f36f4SMatthew Dillon } 795da6f36f4SMatthew Dillon 796da6f36f4SMatthew Dillon /* 797da6f36f4SMatthew Dillon * If UPDATE is set the parent block table may need to be updated. 798da6f36f4SMatthew Dillon * 799da6f36f4SMatthew Dillon * NOTE: UPDATE may be set on vchain or fchain in which case 800da6f36f4SMatthew Dillon * parent could be NULL. It's easiest to allow the case 801da6f36f4SMatthew Dillon * and test for NULL. parent can also wind up being NULL 802da6f36f4SMatthew Dillon * due to a deletion so we need to handle the case anyway. 803da6f36f4SMatthew Dillon * 804da6f36f4SMatthew Dillon * If no parent exists we can just clear the UPDATE bit. If the 805da6f36f4SMatthew Dillon * chain gets reattached later on the bit will simply get set 806da6f36f4SMatthew Dillon * again. 807da6f36f4SMatthew Dillon */ 808da6f36f4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL) { 809da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 810da6f36f4SMatthew Dillon hammer2_chain_drop(chain); 811da6f36f4SMatthew Dillon } 812da6f36f4SMatthew Dillon 813da6f36f4SMatthew Dillon /* 814da6f36f4SMatthew Dillon * The chain may need its blockrefs updated in the parent. This 815da6f36f4SMatthew Dillon * requires some fancy footwork. 816da6f36f4SMatthew Dillon */ 817da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_UPDATE) { 818da6f36f4SMatthew Dillon hammer2_blockref_t *base; 819da6f36f4SMatthew Dillon int count; 820da6f36f4SMatthew Dillon 821da6f36f4SMatthew Dillon /* 822da6f36f4SMatthew Dillon * Both parent and chain must be locked. This requires 823da6f36f4SMatthew Dillon * temporarily unlocking the chain. We have to deal with 824da6f36f4SMatthew Dillon * the case where the chain might be reparented or modified 825da6f36f4SMatthew Dillon * while it was unlocked. 826da6f36f4SMatthew Dillon */ 827da6f36f4SMatthew Dillon hammer2_chain_unlock(chain); 828da6f36f4SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 829da6f36f4SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE); 830da6f36f4SMatthew Dillon if (chain->parent != parent) { 831da6f36f4SMatthew Dillon kprintf("PARENT MISMATCH ch=%p p=%p/%p\n", chain, chain->parent, parent); 832da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 833da6f36f4SMatthew Dillon goto done; 834da6f36f4SMatthew Dillon } 835da6f36f4SMatthew Dillon 836da6f36f4SMatthew Dillon /* 837da6f36f4SMatthew Dillon * Check race condition. If someone got in and modified 838da6f36f4SMatthew Dillon * it again while it was unlocked, we have to loop up. 839da6f36f4SMatthew Dillon */ 840da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 841da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 842da6f36f4SMatthew Dillon kprintf("hammer2_flush: chain %p flush-mod race\n", 843da6f36f4SMatthew Dillon chain); 844da6f36f4SMatthew Dillon goto again; 845da6f36f4SMatthew Dillon } 846da6f36f4SMatthew Dillon 847da6f36f4SMatthew Dillon /* 848da6f36f4SMatthew Dillon * Clear UPDATE flag 849da6f36f4SMatthew Dillon */ 850da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_UPDATE) { 851da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 852da6f36f4SMatthew Dillon hammer2_chain_drop(chain); 853da6f36f4SMatthew Dillon } 854da6f36f4SMatthew Dillon hammer2_chain_modify(info->trans, parent, 0); 855da6f36f4SMatthew Dillon 856da6f36f4SMatthew Dillon /* 857da6f36f4SMatthew Dillon * Calculate blockmap pointer 858da6f36f4SMatthew Dillon */ 859da6f36f4SMatthew Dillon switch(parent->bref.type) { 860da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 861da6f36f4SMatthew Dillon /* 862da6f36f4SMatthew Dillon * Access the inode's block array. However, there is 863da6f36f4SMatthew Dillon * no block array if the inode is flagged DIRECTDATA. 864da6f36f4SMatthew Dillon */ 865da6f36f4SMatthew Dillon if (parent->data && 866da6f36f4SMatthew Dillon (parent->data->ipdata.op_flags & 867da6f36f4SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0) { 868da6f36f4SMatthew Dillon base = &parent->data-> 869da6f36f4SMatthew Dillon ipdata.u.blockset.blockref[0]; 870da6f36f4SMatthew Dillon } else { 871da6f36f4SMatthew Dillon base = NULL; 872da6f36f4SMatthew Dillon } 873da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 874da6f36f4SMatthew Dillon break; 875da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 876da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 877da6f36f4SMatthew Dillon if (parent->data) 878da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 879da6f36f4SMatthew Dillon else 880da6f36f4SMatthew Dillon base = NULL; 881da6f36f4SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 882da6f36f4SMatthew Dillon break; 883da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 884da6f36f4SMatthew Dillon base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 885da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 886da6f36f4SMatthew Dillon break; 887da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 888da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 889da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 890da6f36f4SMatthew Dillon break; 891da6f36f4SMatthew Dillon default: 892da6f36f4SMatthew Dillon base = NULL; 893da6f36f4SMatthew Dillon count = 0; 894da6f36f4SMatthew Dillon panic("hammer2_flush_core: " 895da6f36f4SMatthew Dillon "unrecognized blockref type: %d", 896da6f36f4SMatthew Dillon parent->bref.type); 897da6f36f4SMatthew Dillon } 898da6f36f4SMatthew Dillon 899da6f36f4SMatthew Dillon /* 900da6f36f4SMatthew Dillon * Blocktable updates 901b3659de2SMatthew Dillon * 902b3659de2SMatthew Dillon * We synchronize pending statistics at this time. Delta 903b3659de2SMatthew Dillon * adjustments designated for the current and upper level 904b3659de2SMatthew Dillon * are synchronized. 905da6f36f4SMatthew Dillon */ 906da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) { 907da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 908*0cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 909da6f36f4SMatthew Dillon hammer2_base_delete(info->trans, parent, 910da6f36f4SMatthew Dillon base, count, 911da6f36f4SMatthew Dillon &info->cache_index, chain); 912*0cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 913b3659de2SMatthew Dillon /* base_delete clears both bits */ 914b3659de2SMatthew Dillon } else { 915b3659de2SMatthew Dillon atomic_clear_int(&chain->flags, 916b3659de2SMatthew Dillon HAMMER2_CHAIN_BMAPUPD); 917da6f36f4SMatthew Dillon } 918da6f36f4SMatthew Dillon } 919da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 920b3659de2SMatthew Dillon parent->data_count += chain->data_count + 921b3659de2SMatthew Dillon chain->data_count_up; 922b3659de2SMatthew Dillon parent->inode_count += chain->inode_count + 923b3659de2SMatthew Dillon chain->inode_count_up; 924b3659de2SMatthew Dillon chain->data_count = 0; 925b3659de2SMatthew Dillon chain->inode_count = 0; 926b3659de2SMatthew Dillon chain->data_count_up = 0; 927b3659de2SMatthew Dillon chain->inode_count_up = 0; 928*0cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 929da6f36f4SMatthew Dillon hammer2_base_insert(info->trans, parent, 930da6f36f4SMatthew Dillon base, count, 931da6f36f4SMatthew Dillon &info->cache_index, chain); 932*0cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 933b3659de2SMatthew Dillon /* base_insert sets BMAPPED */ 934da6f36f4SMatthew Dillon } 935da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 936da6f36f4SMatthew Dillon } 937da6f36f4SMatthew Dillon 938da6f36f4SMatthew Dillon /* 9398138a154SMatthew Dillon * Final cleanup after flush 9408138a154SMatthew Dillon */ 9418138a154SMatthew Dillon done: 942e513e77eSMatthew Dillon KKASSERT(chain->refs > 0); 943850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 944850687d2SMatthew Dillon if (info->debug == chain) 945850687d2SMatthew Dillon info->debug = NULL; 946850687d2SMatthew Dillon } 9478138a154SMatthew Dillon } 9488138a154SMatthew Dillon 9498138a154SMatthew Dillon /* 950da6f36f4SMatthew Dillon * Flush recursion helper, called from flush_core, calls flush_core. 9510dea3156SMatthew Dillon * 9528138a154SMatthew Dillon * Flushes the children of the caller's chain (info->parent), restricted 9538138a154SMatthew Dillon * by sync_tid. Set info->domodify if the child's blockref must propagate 9548138a154SMatthew Dillon * back up to the parent. 9550dea3156SMatthew Dillon * 9568138a154SMatthew Dillon * Ripouts can move child from rbtree to dbtree or dbq but the caller's 9578138a154SMatthew Dillon * flush scan order prevents any chains from being lost. A child can be 958da6f36f4SMatthew Dillon * executes more than once. 959ea155208SMatthew Dillon * 9608138a154SMatthew Dillon * WARNING! If we do not call hammer2_flush_core() we must update 9618138a154SMatthew Dillon * bref.mirror_tid ourselves to indicate that the flush has 9628138a154SMatthew Dillon * processed the child. 963925e4ad1SMatthew Dillon * 9648138a154SMatthew Dillon * WARNING! parent->core spinlock is held on entry and return. 96550456506SMatthew Dillon * 96650456506SMatthew Dillon * WARNING! Flushes do not cross PFS boundaries. Specifically, a flush must 96750456506SMatthew Dillon * not cross a pfs-root boundary. 96832b800e6SMatthew Dillon */ 9690dea3156SMatthew Dillon static int 970da6f36f4SMatthew Dillon hammer2_flush_recurse(hammer2_chain_t *child, void *data) 97132b800e6SMatthew Dillon { 9720dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 973da6f36f4SMatthew Dillon /*hammer2_trans_t *trans = info->trans;*/ 9740dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 975925e4ad1SMatthew Dillon 9760dea3156SMatthew Dillon /* 97710136ab6SMatthew Dillon * (child can never be fchain or vchain so a special check isn't 97810136ab6SMatthew Dillon * needed). 979da6f36f4SMatthew Dillon * 980a4dc31e0SMatthew Dillon * We must ref the child before unlocking the spinlock. 981a4dc31e0SMatthew Dillon * 982a4dc31e0SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 983a4dc31e0SMatthew Dillon * unlock it in order to lock the child. 984a4dc31e0SMatthew Dillon */ 985ea155208SMatthew Dillon hammer2_chain_ref(child); 98694491fa0SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 9870dea3156SMatthew Dillon 9880dea3156SMatthew Dillon hammer2_chain_unlock(parent); 9890dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 9900dea3156SMatthew Dillon 99103faa7d5SMatthew Dillon /* 992e513e77eSMatthew Dillon * Recurse and collect deferral data. We're in the media flush, 993e513e77eSMatthew Dillon * this can cross PFS boundaries. 99403faa7d5SMatthew Dillon */ 995da6f36f4SMatthew Dillon if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) { 9960dea3156SMatthew Dillon ++info->depth; 997da6f36f4SMatthew Dillon hammer2_flush_core(info, child, 0); /* XXX deleting */ 9980dea3156SMatthew Dillon --info->depth; 999850687d2SMatthew Dillon } else if (hammer2_debug & 0x200) { 1000850687d2SMatthew Dillon if (info->debug == NULL) 1001850687d2SMatthew Dillon info->debug = child; 1002850687d2SMatthew Dillon ++info->depth; 1003850687d2SMatthew Dillon hammer2_flush_core(info, child, 0); /* XXX deleting */ 1004850687d2SMatthew Dillon --info->depth; 1005850687d2SMatthew Dillon if (info->debug == child) 1006850687d2SMatthew Dillon info->debug = NULL; 10078138a154SMatthew Dillon } 10080dea3156SMatthew Dillon 1009a4dc31e0SMatthew Dillon /* 1010a4dc31e0SMatthew Dillon * Relock to continue the loop 1011a4dc31e0SMatthew Dillon */ 1012a4dc31e0SMatthew Dillon hammer2_chain_unlock(child); 1013ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 1014a4dc31e0SMatthew Dillon hammer2_chain_drop(child); 1015a4dc31e0SMatthew Dillon KKASSERT(info->parent == parent); 101694491fa0SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 10170dea3156SMatthew Dillon 10180dea3156SMatthew Dillon return (0); 10190dea3156SMatthew Dillon } 1020