132b800e6SMatthew Dillon /* 2d34788efSMatthew Dillon * Copyright (c) 2011-2015 The DragonFly Project. All rights reserved. 332b800e6SMatthew Dillon * 432b800e6SMatthew Dillon * This code is derived from software contributed to The DragonFly Project 532b800e6SMatthew Dillon * by Matthew Dillon <dillon@dragonflybsd.org> 632b800e6SMatthew Dillon * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org> 732b800e6SMatthew Dillon * 832b800e6SMatthew Dillon * Redistribution and use in source and binary forms, with or without 932b800e6SMatthew Dillon * modification, are permitted provided that the following conditions 1032b800e6SMatthew Dillon * are met: 1132b800e6SMatthew Dillon * 1232b800e6SMatthew Dillon * 1. Redistributions of source code must retain the above copyright 1332b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer. 1432b800e6SMatthew Dillon * 2. Redistributions in binary form must reproduce the above copyright 1532b800e6SMatthew Dillon * notice, this list of conditions and the following disclaimer in 1632b800e6SMatthew Dillon * the documentation and/or other materials provided with the 1732b800e6SMatthew Dillon * distribution. 1832b800e6SMatthew Dillon * 3. Neither the name of The DragonFly Project nor the names of its 1932b800e6SMatthew Dillon * contributors may be used to endorse or promote products derived 2032b800e6SMatthew Dillon * from this software without specific, prior written permission. 2132b800e6SMatthew Dillon * 2232b800e6SMatthew Dillon * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 2332b800e6SMatthew Dillon * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 2432b800e6SMatthew Dillon * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 2532b800e6SMatthew Dillon * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 2632b800e6SMatthew Dillon * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 2732b800e6SMatthew Dillon * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 2832b800e6SMatthew Dillon * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 2932b800e6SMatthew Dillon * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 3032b800e6SMatthew Dillon * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 3132b800e6SMatthew Dillon * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 3232b800e6SMatthew Dillon * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 3332b800e6SMatthew Dillon * SUCH DAMAGE. 3432b800e6SMatthew Dillon */ 3550456506SMatthew Dillon /* 3650456506SMatthew Dillon * TRANSACTION AND FLUSH HANDLING 3750456506SMatthew Dillon * 3850456506SMatthew Dillon * Deceptively simple but actually fairly difficult to implement properly is 3950456506SMatthew Dillon * how I would describe it. 4050456506SMatthew Dillon * 41da6f36f4SMatthew Dillon * Flushing generally occurs bottom-up but requires a top-down scan to 42da6f36f4SMatthew Dillon * locate chains with MODIFIED and/or UPDATE bits set. The ONFLUSH flag 43da6f36f4SMatthew Dillon * tells how to recurse downward to find these chains. 4450456506SMatthew Dillon */ 4550456506SMatthew Dillon 4632b800e6SMatthew Dillon #include <sys/cdefs.h> 4732b800e6SMatthew Dillon #include <sys/param.h> 4832b800e6SMatthew Dillon #include <sys/systm.h> 4932b800e6SMatthew Dillon #include <sys/types.h> 5032b800e6SMatthew Dillon #include <sys/lock.h> 5132b800e6SMatthew Dillon #include <sys/uuid.h> 5232b800e6SMatthew Dillon 5332b800e6SMatthew Dillon #include "hammer2.h" 5432b800e6SMatthew Dillon 55925e4ad1SMatthew Dillon #define FLUSH_DEBUG 0 56925e4ad1SMatthew Dillon 57a71db85dSMatthew Dillon #define HAMMER2_FLUSH_DEPTH_LIMIT 10 /* stack recursion limit */ 58a71db85dSMatthew Dillon 59a71db85dSMatthew Dillon 6032b800e6SMatthew Dillon /* 6132b800e6SMatthew Dillon * Recursively flush the specified chain. The chain is locked and 6232b800e6SMatthew Dillon * referenced by the caller and will remain so on return. The chain 6332b800e6SMatthew Dillon * will remain referenced throughout but can temporarily lose its 6432b800e6SMatthew Dillon * lock during the recursion to avoid unnecessarily stalling user 6532b800e6SMatthew Dillon * processes. 6632b800e6SMatthew Dillon */ 6732b800e6SMatthew Dillon struct hammer2_flush_info { 680dea3156SMatthew Dillon hammer2_chain_t *parent; 6932b800e6SMatthew Dillon int depth; 700dea3156SMatthew Dillon int diddeferral; 711897c66eSMatthew Dillon int cache_index; 72da6f36f4SMatthew Dillon struct h2_flush_list flushq; 73850687d2SMatthew Dillon hammer2_chain_t *debug; 7432b800e6SMatthew Dillon }; 7532b800e6SMatthew Dillon 7632b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t; 7732b800e6SMatthew Dillon 788138a154SMatthew Dillon static void hammer2_flush_core(hammer2_flush_info_t *info, 79da6f36f4SMatthew Dillon hammer2_chain_t *chain, int deleting); 80da6f36f4SMatthew Dillon static int hammer2_flush_recurse(hammer2_chain_t *child, void *data); 8193f3933aSMatthew Dillon 8232b800e6SMatthew Dillon /* 83c603b86bSMatthew Dillon * Any per-pfs transaction initialization goes here. 8450456506SMatthew Dillon */ 8550456506SMatthew Dillon void 86c603b86bSMatthew Dillon hammer2_trans_manage_init(hammer2_pfs_t *pmp) 8750456506SMatthew Dillon { 8850456506SMatthew Dillon } 8950456506SMatthew Dillon 9050456506SMatthew Dillon /* 91d34788efSMatthew Dillon * Transaction support for any modifying operation. Transactions are used 92d34788efSMatthew Dillon * in the pmp layer by the frontend and in the spmp layer by the backend. 93c603b86bSMatthew Dillon * 94c603b86bSMatthew Dillon * 0 - Normal transaction, interlocked against flush 95c603b86bSMatthew Dillon * transaction. 96c603b86bSMatthew Dillon * 97c603b86bSMatthew Dillon * TRANS_ISFLUSH - Flush transaction, interlocked against normal 98c603b86bSMatthew Dillon * transaction. 99c603b86bSMatthew Dillon * 100c603b86bSMatthew Dillon * TRANS_BUFCACHE - Buffer cache transaction, no interlock. 1010dea3156SMatthew Dillon * 10210136ab6SMatthew Dillon * Initializing a new transaction allocates a transaction ID. Typically 10310136ab6SMatthew Dillon * passed a pmp (hmp passed as NULL), indicating a cluster transaction. Can 10410136ab6SMatthew Dillon * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single 10510136ab6SMatthew Dillon * media target. The latter mode is used by the recovery code. 10610136ab6SMatthew Dillon * 107623d43d4SMatthew Dillon * TWO TRANSACTION IDs can run concurrently, where one is a flush and the 108623d43d4SMatthew Dillon * other is a set of any number of concurrent filesystem operations. We 109623d43d4SMatthew Dillon * can either have <running_fs_ops> + <waiting_flush> + <blocked_fs_ops> 110623d43d4SMatthew Dillon * or we can have <running_flush> + <concurrent_fs_ops>. 1110dea3156SMatthew Dillon * 112623d43d4SMatthew Dillon * During a flush, new fs_ops are only blocked until the fs_ops prior to 113623d43d4SMatthew Dillon * the flush complete. The new fs_ops can then run concurrent with the flush. 114d001f460SMatthew Dillon * 115623d43d4SMatthew Dillon * Buffer-cache transactions operate as fs_ops but never block. A 116623d43d4SMatthew Dillon * buffer-cache flush will run either before or after the current pending 117623d43d4SMatthew Dillon * flush depending on its state. 1180dea3156SMatthew Dillon */ 1190dea3156SMatthew Dillon void 120c603b86bSMatthew Dillon hammer2_trans_init(hammer2_pfs_t *pmp, uint32_t flags) 1210dea3156SMatthew Dillon { 122c603b86bSMatthew Dillon uint32_t oflags; 123c603b86bSMatthew Dillon uint32_t nflags; 124c603b86bSMatthew Dillon int dowait; 125d001f460SMatthew Dillon 126c603b86bSMatthew Dillon for (;;) { 127c603b86bSMatthew Dillon oflags = pmp->trans.flags; 128c603b86bSMatthew Dillon cpu_ccfence(); 129c603b86bSMatthew Dillon dowait = 0; 130d001f460SMatthew Dillon 131d001f460SMatthew Dillon if (flags & HAMMER2_TRANS_ISFLUSH) { 132d001f460SMatthew Dillon /* 133c603b86bSMatthew Dillon * Requesting flush transaction. Wait for all 134c603b86bSMatthew Dillon * currently running transactions to finish. 135355d67fcSMatthew Dillon */ 136c603b86bSMatthew Dillon if (oflags & HAMMER2_TRANS_MASK) { 137c603b86bSMatthew Dillon nflags = oflags | HAMMER2_TRANS_FPENDING | 138c603b86bSMatthew Dillon HAMMER2_TRANS_WAITING; 139c603b86bSMatthew Dillon dowait = 1; 140c603b86bSMatthew Dillon } else { 141c603b86bSMatthew Dillon nflags = (oflags | flags) + 1; 142c603b86bSMatthew Dillon } 143e513e77eSMatthew Dillon ++pmp->modify_tid; 144c603b86bSMatthew Dillon } else if (flags & HAMMER2_TRANS_BUFCACHE) { 145a7720be7SMatthew Dillon /* 146c603b86bSMatthew Dillon * Requesting strategy transaction. Generally 147c603b86bSMatthew Dillon * allowed in all situations unless a flush 148c603b86bSMatthew Dillon * is running without the preflush flag. 149a7720be7SMatthew Dillon */ 150c603b86bSMatthew Dillon if ((oflags & (HAMMER2_TRANS_ISFLUSH | 151c603b86bSMatthew Dillon HAMMER2_TRANS_PREFLUSH)) == 152c603b86bSMatthew Dillon HAMMER2_TRANS_ISFLUSH) { 153c603b86bSMatthew Dillon nflags = oflags | HAMMER2_TRANS_WAITING; 154c603b86bSMatthew Dillon dowait = 1; 155c603b86bSMatthew Dillon } else { 156c603b86bSMatthew Dillon nflags = (oflags | flags) + 1; 157052e0aa0SMatthew Dillon } 158a4dc31e0SMatthew Dillon } else { 159a4dc31e0SMatthew Dillon /* 160c603b86bSMatthew Dillon * Requesting normal transaction. Wait for any 161c603b86bSMatthew Dillon * flush to finish before allowing. 162a4dc31e0SMatthew Dillon */ 163c603b86bSMatthew Dillon if (oflags & HAMMER2_TRANS_ISFLUSH) { 164c603b86bSMatthew Dillon nflags = oflags | HAMMER2_TRANS_WAITING; 165c603b86bSMatthew Dillon dowait = 1; 166c603b86bSMatthew Dillon } else { 167c603b86bSMatthew Dillon nflags = (oflags | flags) + 1; 168c603b86bSMatthew Dillon } 169c603b86bSMatthew Dillon } 170c603b86bSMatthew Dillon if (dowait) 171c603b86bSMatthew Dillon tsleep_interlock(&pmp->trans.sync_wait, 0); 172c603b86bSMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 173c603b86bSMatthew Dillon if (dowait == 0) 174a4dc31e0SMatthew Dillon break; 175c603b86bSMatthew Dillon tsleep(&pmp->trans.sync_wait, PINTERLOCKED, 176c603b86bSMatthew Dillon "h2trans", hz); 177c603b86bSMatthew Dillon } else { 178c603b86bSMatthew Dillon cpu_pause(); 179a7720be7SMatthew Dillon } 180c603b86bSMatthew Dillon /* retry */ 181c603b86bSMatthew Dillon } 182c603b86bSMatthew Dillon } 183a4dc31e0SMatthew Dillon 184c603b86bSMatthew Dillon void 185c603b86bSMatthew Dillon hammer2_trans_done(hammer2_pfs_t *pmp) 186c603b86bSMatthew Dillon { 187c603b86bSMatthew Dillon uint32_t oflags; 188c603b86bSMatthew Dillon uint32_t nflags; 189c603b86bSMatthew Dillon 190c603b86bSMatthew Dillon for (;;) { 191c603b86bSMatthew Dillon oflags = pmp->trans.flags; 192c603b86bSMatthew Dillon cpu_ccfence(); 193c603b86bSMatthew Dillon KKASSERT(oflags & HAMMER2_TRANS_MASK); 194c603b86bSMatthew Dillon if ((oflags & HAMMER2_TRANS_MASK) == 1) { 1958138a154SMatthew Dillon /* 196c603b86bSMatthew Dillon * This was the last transaction 1978138a154SMatthew Dillon */ 198c603b86bSMatthew Dillon nflags = (oflags - 1) & ~(HAMMER2_TRANS_ISFLUSH | 199c603b86bSMatthew Dillon HAMMER2_TRANS_BUFCACHE | 200c603b86bSMatthew Dillon HAMMER2_TRANS_PREFLUSH | 201c603b86bSMatthew Dillon HAMMER2_TRANS_FPENDING | 202c603b86bSMatthew Dillon HAMMER2_TRANS_WAITING); 20350456506SMatthew Dillon } else { 20450456506SMatthew Dillon /* 205c603b86bSMatthew Dillon * Still transactions pending 20650456506SMatthew Dillon */ 207c603b86bSMatthew Dillon nflags = oflags - 1; 208c603b86bSMatthew Dillon } 209c603b86bSMatthew Dillon if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) { 210c603b86bSMatthew Dillon if ((nflags & HAMMER2_TRANS_MASK) == 0 && 211c603b86bSMatthew Dillon (oflags & HAMMER2_TRANS_WAITING)) { 212c603b86bSMatthew Dillon wakeup(&pmp->trans.sync_wait); 213c603b86bSMatthew Dillon } 214c603b86bSMatthew Dillon break; 215c603b86bSMatthew Dillon } else { 216c603b86bSMatthew Dillon cpu_pause(); 217c603b86bSMatthew Dillon } 218c603b86bSMatthew Dillon /* retry */ 219044541cdSMatthew Dillon } 22050456506SMatthew Dillon } 22150456506SMatthew Dillon 222c603b86bSMatthew Dillon /* 223c603b86bSMatthew Dillon * Obtain new, unique inode number (not serialized by caller). 224c603b86bSMatthew Dillon */ 225c603b86bSMatthew Dillon hammer2_tid_t 226c603b86bSMatthew Dillon hammer2_trans_newinum(hammer2_pfs_t *pmp) 227c603b86bSMatthew Dillon { 228c603b86bSMatthew Dillon hammer2_tid_t tid; 229c603b86bSMatthew Dillon 230c603b86bSMatthew Dillon KKASSERT(sizeof(long) == 8); 231c603b86bSMatthew Dillon tid = atomic_fetchadd_long(&pmp->inode_tid, 1); 232c603b86bSMatthew Dillon 233c603b86bSMatthew Dillon return tid; 234a7720be7SMatthew Dillon } 235a7720be7SMatthew Dillon 236c603b86bSMatthew Dillon /* 237c603b86bSMatthew Dillon * Assert that a strategy call is ok here. Strategy calls are legal 238c603b86bSMatthew Dillon * 239c603b86bSMatthew Dillon * (1) In a normal transaction. 240c603b86bSMatthew Dillon * (2) In a flush transaction only if PREFLUSH is also set. 241c603b86bSMatthew Dillon */ 2420dea3156SMatthew Dillon void 2439450e866SMatthew Dillon hammer2_trans_assert_strategy(hammer2_pfs_t *pmp) 244c7916d0bSMatthew Dillon { 245c603b86bSMatthew Dillon KKASSERT((pmp->trans.flags & HAMMER2_TRANS_ISFLUSH) == 0 || 246c603b86bSMatthew Dillon (pmp->trans.flags & HAMMER2_TRANS_PREFLUSH)); 247c7916d0bSMatthew Dillon } 248c7916d0bSMatthew Dillon 249a02dfba1SMatthew Dillon 2500dea3156SMatthew Dillon /* 251eedd52a3SMatthew Dillon * Chains undergoing destruction are removed from the in-memory topology. 252eedd52a3SMatthew Dillon * To avoid getting lost these chains are placed on the delayed flush 253eedd52a3SMatthew Dillon * queue which will properly dispose of them. 254eedd52a3SMatthew Dillon * 255eedd52a3SMatthew Dillon * We do this instead of issuing an immediate flush in order to give 256eedd52a3SMatthew Dillon * recursive deletions (rm -rf, etc) a chance to remove more of the 257eedd52a3SMatthew Dillon * hierarchy, potentially allowing an enormous amount of write I/O to 258eedd52a3SMatthew Dillon * be avoided. 259eedd52a3SMatthew Dillon */ 260eedd52a3SMatthew Dillon void 261c603b86bSMatthew Dillon hammer2_delayed_flush(hammer2_chain_t *chain) 262eedd52a3SMatthew Dillon { 263eedd52a3SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DELAYED) == 0) { 264eedd52a3SMatthew Dillon hammer2_spin_ex(&chain->hmp->list_spin); 265eedd52a3SMatthew Dillon if ((chain->flags & (HAMMER2_CHAIN_DELAYED | 266eedd52a3SMatthew Dillon HAMMER2_CHAIN_DEFERRED)) == 0) { 267eedd52a3SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DELAYED | 268eedd52a3SMatthew Dillon HAMMER2_CHAIN_DEFERRED); 269eedd52a3SMatthew Dillon TAILQ_INSERT_TAIL(&chain->hmp->flushq, 270eedd52a3SMatthew Dillon chain, flush_node); 271eedd52a3SMatthew Dillon hammer2_chain_ref(chain); 272eedd52a3SMatthew Dillon } 273eedd52a3SMatthew Dillon hammer2_spin_unex(&chain->hmp->list_spin); 274eedd52a3SMatthew Dillon } 275eedd52a3SMatthew Dillon } 276eedd52a3SMatthew Dillon 277eedd52a3SMatthew Dillon /* 2780dea3156SMatthew Dillon * Flush the chain and all modified sub-chains through the specified 279e513e77eSMatthew Dillon * synchronization point, propagating parent chain modifications, modify_tid, 280e513e77eSMatthew Dillon * and mirror_tid updates back up as needed. 2810dea3156SMatthew Dillon * 2820dea3156SMatthew Dillon * Caller must have already vetted synchronization points to ensure they 2830dea3156SMatthew Dillon * are properly flushed. Only snapshots and cluster flushes can create 2840dea3156SMatthew Dillon * these sorts of synchronization points. 2850dea3156SMatthew Dillon * 28632b800e6SMatthew Dillon * This routine can be called from several places but the most important 2878138a154SMatthew Dillon * is from VFS_SYNC. 28832b800e6SMatthew Dillon * 289da6f36f4SMatthew Dillon * chain is locked on call and will remain locked on return. The chain's 290da6f36f4SMatthew Dillon * UPDATE flag indicates that its parent's block table (which is not yet 291da6f36f4SMatthew Dillon * part of the flush) should be updated. The chain may be replaced by 292da6f36f4SMatthew Dillon * the call if it was modified. 29332b800e6SMatthew Dillon */ 29432b800e6SMatthew Dillon void 295c603b86bSMatthew Dillon hammer2_flush(hammer2_chain_t *chain, int istop) 29632b800e6SMatthew Dillon { 29732b800e6SMatthew Dillon hammer2_chain_t *scan; 29832b800e6SMatthew Dillon hammer2_flush_info_t info; 299eedd52a3SMatthew Dillon hammer2_dev_t *hmp; 300925e4ad1SMatthew Dillon int loops; 30132b800e6SMatthew Dillon 30232b800e6SMatthew Dillon /* 30332b800e6SMatthew Dillon * Execute the recursive flush and handle deferrals. 30432b800e6SMatthew Dillon * 30532b800e6SMatthew Dillon * Chains can be ridiculously long (thousands deep), so to 30632b800e6SMatthew Dillon * avoid blowing out the kernel stack the recursive flush has a 30732b800e6SMatthew Dillon * depth limit. Elements at the limit are placed on a list 30832b800e6SMatthew Dillon * for re-execution after the stack has been popped. 30932b800e6SMatthew Dillon */ 31032b800e6SMatthew Dillon bzero(&info, sizeof(info)); 311da6f36f4SMatthew Dillon TAILQ_INIT(&info.flushq); 3121897c66eSMatthew Dillon info.cache_index = -1; 31332b800e6SMatthew Dillon 314da6f36f4SMatthew Dillon /* 315da6f36f4SMatthew Dillon * Calculate parent (can be NULL), if not NULL the flush core 316da6f36f4SMatthew Dillon * expects the parent to be referenced so it can easily lock/unlock 317da6f36f4SMatthew Dillon * it without it getting ripped up. 318da6f36f4SMatthew Dillon */ 319da6f36f4SMatthew Dillon if ((info.parent = chain->parent) != NULL) 320da6f36f4SMatthew Dillon hammer2_chain_ref(info.parent); 321731b2a84SMatthew Dillon 322a7720be7SMatthew Dillon /* 323a7720be7SMatthew Dillon * Extra ref needed because flush_core expects it when replacing 324a7720be7SMatthew Dillon * chain. 325a7720be7SMatthew Dillon */ 326a7720be7SMatthew Dillon hammer2_chain_ref(chain); 327eedd52a3SMatthew Dillon hmp = chain->hmp; 328925e4ad1SMatthew Dillon loops = 0; 329a7720be7SMatthew Dillon 3300dea3156SMatthew Dillon for (;;) { 33132b800e6SMatthew Dillon /* 332eedd52a3SMatthew Dillon * Move hmp->flushq to info.flushq if non-empty so it can 333eedd52a3SMatthew Dillon * be processed. 334eedd52a3SMatthew Dillon */ 335eedd52a3SMatthew Dillon if (TAILQ_FIRST(&hmp->flushq) != NULL) { 336eedd52a3SMatthew Dillon hammer2_spin_ex(&chain->hmp->list_spin); 337eedd52a3SMatthew Dillon TAILQ_CONCAT(&info.flushq, &hmp->flushq, flush_node); 338eedd52a3SMatthew Dillon hammer2_spin_unex(&chain->hmp->list_spin); 339eedd52a3SMatthew Dillon } 340eedd52a3SMatthew Dillon 341eedd52a3SMatthew Dillon /* 3420dea3156SMatthew Dillon * Unwind deep recursions which had been deferred. This 3438138a154SMatthew Dillon * can leave the FLUSH_* bits set for these chains, which 3448138a154SMatthew Dillon * will be handled when we [re]flush chain after the unwind. 34532b800e6SMatthew Dillon */ 346da6f36f4SMatthew Dillon while ((scan = TAILQ_FIRST(&info.flushq)) != NULL) { 34732b800e6SMatthew Dillon KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED); 348da6f36f4SMatthew Dillon TAILQ_REMOVE(&info.flushq, scan, flush_node); 349eedd52a3SMatthew Dillon atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED | 350eedd52a3SMatthew Dillon HAMMER2_CHAIN_DELAYED); 35132b800e6SMatthew Dillon 35232b800e6SMatthew Dillon /* 35332b800e6SMatthew Dillon * Now that we've popped back up we can do a secondary 35432b800e6SMatthew Dillon * recursion on the deferred elements. 355053e752cSMatthew Dillon * 3568138a154SMatthew Dillon * NOTE: hammer2_flush() may replace scan. 35732b800e6SMatthew Dillon */ 35832b800e6SMatthew Dillon if (hammer2_debug & 0x0040) 359053e752cSMatthew Dillon kprintf("deferred flush %p\n", scan); 3600dea3156SMatthew Dillon hammer2_chain_lock(scan, HAMMER2_RESOLVE_MAYBE); 361c603b86bSMatthew Dillon hammer2_flush(scan, 0); 3620dea3156SMatthew Dillon hammer2_chain_unlock(scan); 363e513e77eSMatthew Dillon hammer2_chain_drop(scan); /* ref from deferral */ 36432b800e6SMatthew Dillon } 36532b800e6SMatthew Dillon 36632b800e6SMatthew Dillon /* 367925e4ad1SMatthew Dillon * [re]flush chain. 36832b800e6SMatthew Dillon */ 3690dea3156SMatthew Dillon info.diddeferral = 0; 3709450e866SMatthew Dillon hammer2_flush_core(&info, chain, istop); 37132b800e6SMatthew Dillon 37232b800e6SMatthew Dillon /* 3730dea3156SMatthew Dillon * Only loop if deep recursions have been deferred. 37432b800e6SMatthew Dillon */ 375da6f36f4SMatthew Dillon if (TAILQ_EMPTY(&info.flushq)) 37632b800e6SMatthew Dillon break; 377925e4ad1SMatthew Dillon 378925e4ad1SMatthew Dillon if (++loops % 1000 == 0) { 3798138a154SMatthew Dillon kprintf("hammer2_flush: excessive loops on %p\n", 380925e4ad1SMatthew Dillon chain); 381925e4ad1SMatthew Dillon if (hammer2_debug & 0x100000) 382925e4ad1SMatthew Dillon Debugger("hell4"); 383925e4ad1SMatthew Dillon } 38432b800e6SMatthew Dillon } 385a7720be7SMatthew Dillon hammer2_chain_drop(chain); 386da6f36f4SMatthew Dillon if (info.parent) 387da6f36f4SMatthew Dillon hammer2_chain_drop(info.parent); 38832b800e6SMatthew Dillon } 38932b800e6SMatthew Dillon 390476d2aadSMatthew Dillon /* 391ea155208SMatthew Dillon * This is the core of the chain flushing code. The chain is locked by the 392a7720be7SMatthew Dillon * caller and must also have an extra ref on it by the caller, and remains 3938138a154SMatthew Dillon * locked and will have an extra ref on return. Upon return, the caller can 394da6f36f4SMatthew Dillon * test the UPDATE bit on the child to determine if the parent needs updating. 395a7720be7SMatthew Dillon * 3968138a154SMatthew Dillon * (1) Determine if this node is a candidate for the flush, return if it is 3978138a154SMatthew Dillon * not. fchain and vchain are always candidates for the flush. 3980dea3156SMatthew Dillon * 3998138a154SMatthew Dillon * (2) If we recurse too deep the chain is entered onto the deferral list and 4008138a154SMatthew Dillon * the current flush stack is aborted until after the deferral list is 4018138a154SMatthew Dillon * run. 4028138a154SMatthew Dillon * 4038138a154SMatthew Dillon * (3) Recursively flush live children (rbtree). This can create deferrals. 404da6f36f4SMatthew Dillon * A successful flush clears the MODIFIED and UPDATE bits on the children 405da6f36f4SMatthew Dillon * and typically causes the parent to be marked MODIFIED as the children 406da6f36f4SMatthew Dillon * update the parent's block table. A parent might already be marked 407da6f36f4SMatthew Dillon * MODIFIED due to a deletion (whos blocktable update in the parent is 408da6f36f4SMatthew Dillon * handled by the frontend), or if the parent itself is modified by the 409da6f36f4SMatthew Dillon * frontend for other reasons. 4108138a154SMatthew Dillon * 411da6f36f4SMatthew Dillon * (4) Permanently disconnected sub-trees are cleaned up by the front-end. 412da6f36f4SMatthew Dillon * Deleted-but-open inodes can still be individually flushed via the 413da6f36f4SMatthew Dillon * filesystem syncer. 4148138a154SMatthew Dillon * 415da6f36f4SMatthew Dillon * (5) Note that an unmodified child may still need the block table in its 416da6f36f4SMatthew Dillon * parent updated (e.g. rename/move). The child will have UPDATE set 417da6f36f4SMatthew Dillon * in this case. 4188138a154SMatthew Dillon * 41950456506SMatthew Dillon * WARNING ON BREF MODIFY_TID/MIRROR_TID 420925e4ad1SMatthew Dillon * 421e513e77eSMatthew Dillon * blockref.modify_tid is consistent only within a PFS, and will not be 422e513e77eSMatthew Dillon * consistent during synchronization. mirror_tid is consistent across the 423e513e77eSMatthew Dillon * block device regardless of the PFS. 424476d2aadSMatthew Dillon */ 42532b800e6SMatthew Dillon static void 426da6f36f4SMatthew Dillon hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain, 4279450e866SMatthew Dillon int istop) 42832b800e6SMatthew Dillon { 429da6f36f4SMatthew Dillon hammer2_chain_t *parent; 430506bd6d1SMatthew Dillon hammer2_dev_t *hmp; 431925e4ad1SMatthew Dillon int diddeferral; 432da6f36f4SMatthew Dillon 433da6f36f4SMatthew Dillon /* 434da6f36f4SMatthew Dillon * (1) Optimize downward recursion to locate nodes needing action. 435da6f36f4SMatthew Dillon * Nothing to do if none of these flags are set. 436da6f36f4SMatthew Dillon */ 437850687d2SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0) { 438850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 439850687d2SMatthew Dillon if (info->debug == NULL) 440850687d2SMatthew Dillon info->debug = chain; 441850687d2SMatthew Dillon } else { 442da6f36f4SMatthew Dillon return; 443850687d2SMatthew Dillon } 444850687d2SMatthew Dillon } 44532b800e6SMatthew Dillon 446a5913bdfSMatthew Dillon hmp = chain->hmp; 447925e4ad1SMatthew Dillon diddeferral = info->diddeferral; 448da6f36f4SMatthew Dillon parent = info->parent; /* can be NULL */ 449925e4ad1SMatthew Dillon 4500924b3f8SMatthew Dillon /* 451da6f36f4SMatthew Dillon * Downward search recursion 452ea155208SMatthew Dillon */ 453eedd52a3SMatthew Dillon if (chain->flags & (HAMMER2_CHAIN_DEFERRED | HAMMER2_CHAIN_DELAYED)) { 454da6f36f4SMatthew Dillon /* 455da6f36f4SMatthew Dillon * Already deferred. 456da6f36f4SMatthew Dillon */ 457925e4ad1SMatthew Dillon ++info->diddeferral; 458925e4ad1SMatthew Dillon } else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) { 459da6f36f4SMatthew Dillon /* 460da6f36f4SMatthew Dillon * Recursion depth reached. 461da6f36f4SMatthew Dillon */ 462eedd52a3SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_DELAYED) == 0); 4630dea3156SMatthew Dillon hammer2_chain_ref(chain); 464da6f36f4SMatthew Dillon TAILQ_INSERT_TAIL(&info->flushq, chain, flush_node); 465da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED); 466925e4ad1SMatthew Dillon ++info->diddeferral; 4679450e866SMatthew Dillon } else if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) && istop == 0) { 4689450e866SMatthew Dillon /* 4699450e866SMatthew Dillon * We do not recurse through PFSROOTs. PFSROOT flushes are 4709450e866SMatthew Dillon * handled by the related pmp's (whether mounted or not, 4719450e866SMatthew Dillon * including during recovery). 4729450e866SMatthew Dillon * 4739450e866SMatthew Dillon * But we must still process the PFSROOT chains for block 4749450e866SMatthew Dillon * table updates in their parent (which IS part of our flush). 4759450e866SMatthew Dillon * 4769450e866SMatthew Dillon * Note that the volume root, vchain, does not set this flag. 4779450e866SMatthew Dillon */ 4789450e866SMatthew Dillon ; 479da6f36f4SMatthew Dillon } else if (chain->flags & HAMMER2_CHAIN_ONFLUSH) { 4808138a154SMatthew Dillon /* 481da6f36f4SMatthew Dillon * Downward recursion search (actual flush occurs bottom-up). 482da6f36f4SMatthew Dillon * pre-clear ONFLUSH. It can get set again due to races, 483da6f36f4SMatthew Dillon * which we want so the scan finds us again in the next flush. 4849450e866SMatthew Dillon * These races can also include 4859450e866SMatthew Dillon * 4869450e866SMatthew Dillon * Flush recursions stop at PFSROOT boundaries. Each PFS 4879450e866SMatthew Dillon * must be individually flushed and then the root must 4889450e866SMatthew Dillon * be flushed. 4898138a154SMatthew Dillon */ 490da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH); 4918138a154SMatthew Dillon info->parent = chain; 49294491fa0SMatthew Dillon hammer2_spin_ex(&chain->core.spin); 493da6f36f4SMatthew Dillon RB_SCAN(hammer2_chain_tree, &chain->core.rbtree, 494da6f36f4SMatthew Dillon NULL, hammer2_flush_recurse, info); 49594491fa0SMatthew Dillon hammer2_spin_unex(&chain->core.spin); 496da6f36f4SMatthew Dillon info->parent = parent; 497da6f36f4SMatthew Dillon if (info->diddeferral) 498c603b86bSMatthew Dillon hammer2_chain_setflush(chain); 4998138a154SMatthew Dillon } 5000924b3f8SMatthew Dillon 50132b800e6SMatthew Dillon /* 502da6f36f4SMatthew Dillon * Now we are in the bottom-up part of the recursion. 503da6f36f4SMatthew Dillon * 504da6f36f4SMatthew Dillon * Do not update chain if lower layers were deferred. 5058138a154SMatthew Dillon */ 506da6f36f4SMatthew Dillon if (info->diddeferral) 5078138a154SMatthew Dillon goto done; 5088138a154SMatthew Dillon 5098138a154SMatthew Dillon /* 510da6f36f4SMatthew Dillon * Propagate the DESTROY flag downwards. This dummies up the flush 511da6f36f4SMatthew Dillon * code and tries to invalidate related buffer cache buffers to 512da6f36f4SMatthew Dillon * avoid the disk write. 513623d43d4SMatthew Dillon */ 514da6f36f4SMatthew Dillon if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY)) 515da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY); 516623d43d4SMatthew Dillon 517623d43d4SMatthew Dillon /* 518da6f36f4SMatthew Dillon * Chain was already modified or has become modified, flush it out. 519da6f36f4SMatthew Dillon */ 520da6f36f4SMatthew Dillon again: 521850687d2SMatthew Dillon if ((hammer2_debug & 0x200) && 522850687d2SMatthew Dillon info->debug && 523850687d2SMatthew Dillon (chain->flags & (HAMMER2_CHAIN_MODIFIED | HAMMER2_CHAIN_UPDATE))) { 524850687d2SMatthew Dillon hammer2_chain_t *scan = chain; 525850687d2SMatthew Dillon 526850687d2SMatthew Dillon kprintf("DISCONNECTED FLUSH %p->%p\n", info->debug, chain); 527850687d2SMatthew Dillon while (scan) { 528850687d2SMatthew Dillon kprintf(" chain %p [%08x] bref=%016jx:%02x\n", 529850687d2SMatthew Dillon scan, scan->flags, 530850687d2SMatthew Dillon scan->bref.key, scan->bref.type); 531850687d2SMatthew Dillon if (scan == info->debug) 532850687d2SMatthew Dillon break; 533850687d2SMatthew Dillon scan = scan->parent; 534850687d2SMatthew Dillon } 535850687d2SMatthew Dillon } 536850687d2SMatthew Dillon 537da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 538da6f36f4SMatthew Dillon /* 539e513e77eSMatthew Dillon * Dispose of the modified bit. 540e513e77eSMatthew Dillon * 541e513e77eSMatthew Dillon * UPDATE should already be set. 542e513e77eSMatthew Dillon * bref.mirror_tid should already be set. 54332b800e6SMatthew Dillon */ 544da6f36f4SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) || 54550456506SMatthew Dillon chain == &hmp->vchain); 5460dea3156SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED); 5478db69c9fSMatthew Dillon 5488db69c9fSMatthew Dillon /* 549e513e77eSMatthew Dillon * Manage threads waiting for excessive dirty memory to 550e513e77eSMatthew Dillon * be retired. 5518db69c9fSMatthew Dillon */ 552e513e77eSMatthew Dillon if (chain->pmp) 553e513e77eSMatthew Dillon hammer2_pfs_memory_wakeup(chain->pmp); 5548138a154SMatthew Dillon 555da6f36f4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) || 5568138a154SMatthew Dillon chain == &hmp->vchain || 5578138a154SMatthew Dillon chain == &hmp->fchain) { 5588138a154SMatthew Dillon /* 5598138a154SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared, 5608138a154SMatthew Dillon * net -1 ref. 5618138a154SMatthew Dillon */ 5620dea3156SMatthew Dillon hammer2_chain_drop(chain); 5638138a154SMatthew Dillon } else { 5648138a154SMatthew Dillon /* 5658138a154SMatthew Dillon * Drop the ref from the MODIFIED bit we cleared and 566da6f36f4SMatthew Dillon * set a ref for the UPDATE bit we are setting. Net 567da6f36f4SMatthew Dillon * 0 refs. 5688138a154SMatthew Dillon */ 569da6f36f4SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 5700dea3156SMatthew Dillon } 5710dea3156SMatthew Dillon 5720dea3156SMatthew Dillon /* 573a71db85dSMatthew Dillon * Issue the flush. This is indirect via the DIO. 5740dea3156SMatthew Dillon * 575a71db85dSMatthew Dillon * NOTE: A DELETED node that reaches this point must be 576a71db85dSMatthew Dillon * flushed for synchronization point consistency. 577a71db85dSMatthew Dillon * 578a71db85dSMatthew Dillon * NOTE: Even though MODIFIED was already set, the related DIO 579a71db85dSMatthew Dillon * might not be dirty due to a system buffer cache 580a71db85dSMatthew Dillon * flush and must be set dirty if we are going to make 581a71db85dSMatthew Dillon * further modifications to the buffer. Chains with 582a71db85dSMatthew Dillon * embedded data don't need this. 5830dea3156SMatthew Dillon */ 584a7720be7SMatthew Dillon if (hammer2_debug & 0x1000) { 585c603b86bSMatthew Dillon kprintf("Flush %p.%d %016jx/%d data=%016jx", 586a7720be7SMatthew Dillon chain, chain->bref.type, 587c603b86bSMatthew Dillon (uintmax_t)chain->bref.key, 588c603b86bSMatthew Dillon chain->bref.keybits, 589c603b86bSMatthew Dillon (uintmax_t)chain->bref.data_off); 590a7720be7SMatthew Dillon } 591a7720be7SMatthew Dillon if (hammer2_debug & 0x2000) { 592a7720be7SMatthew Dillon Debugger("Flush hell"); 593a7720be7SMatthew Dillon } 59410136ab6SMatthew Dillon 59532b800e6SMatthew Dillon /* 596da6f36f4SMatthew Dillon * Update chain CRCs for flush. 59732b800e6SMatthew Dillon * 598da6f36f4SMatthew Dillon * NOTE: Volume headers are NOT flushed here as they require 599da6f36f4SMatthew Dillon * special processing. 60032b800e6SMatthew Dillon */ 60132b800e6SMatthew Dillon switch(chain->bref.type) { 6021a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 603a71db85dSMatthew Dillon /* 604e513e77eSMatthew Dillon * Update the volume header's freemap_tid to the 605e513e77eSMatthew Dillon * freemap's flushing mirror_tid. 606e513e77eSMatthew Dillon * 607a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 608a71db85dSMatthew Dillon */ 60950456506SMatthew Dillon KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED); 610e513e77eSMatthew Dillon KKASSERT(chain == &hmp->fchain); 611e513e77eSMatthew Dillon hmp->voldata.freemap_tid = chain->bref.mirror_tid; 612e513e77eSMatthew Dillon kprintf("sync freemap mirror_tid %08jx\n", 613e513e77eSMatthew Dillon (intmax_t)chain->bref.mirror_tid); 614e513e77eSMatthew Dillon 615e513e77eSMatthew Dillon /* 616e513e77eSMatthew Dillon * The freemap can be flushed independently of the 617e513e77eSMatthew Dillon * main topology, but for the case where it is 618e513e77eSMatthew Dillon * flushed in the same transaction, and flushed 619e513e77eSMatthew Dillon * before vchain (a case we want to allow for 620e513e77eSMatthew Dillon * performance reasons), make sure modifications 621e513e77eSMatthew Dillon * made during the flush under vchain use a new 622e513e77eSMatthew Dillon * transaction id. 623e513e77eSMatthew Dillon * 624e513e77eSMatthew Dillon * Otherwise the mount recovery code will get confused. 625e513e77eSMatthew Dillon */ 626e513e77eSMatthew Dillon ++hmp->voldata.mirror_tid; 6271a7cfe5aSMatthew Dillon break; 62832b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 62932b800e6SMatthew Dillon /* 630e513e77eSMatthew Dillon * The free block table is flushed by 631e513e77eSMatthew Dillon * hammer2_vfs_sync() before it flushes vchain. 632e513e77eSMatthew Dillon * We must still hold fchain locked while copying 633e513e77eSMatthew Dillon * voldata to volsync, however. 634a71db85dSMatthew Dillon * 635a71db85dSMatthew Dillon * (note: embedded data, do not call setdirty) 6361a7cfe5aSMatthew Dillon */ 637da6f36f4SMatthew Dillon hammer2_chain_lock(&hmp->fchain, 638da6f36f4SMatthew Dillon HAMMER2_RESOLVE_ALWAYS); 639a6cf1052SMatthew Dillon hammer2_voldata_lock(hmp); 640e513e77eSMatthew Dillon kprintf("sync volume mirror_tid %08jx\n", 641da6f36f4SMatthew Dillon (intmax_t)chain->bref.mirror_tid); 6421a7cfe5aSMatthew Dillon 6431a7cfe5aSMatthew Dillon /* 644e513e77eSMatthew Dillon * Update the volume header's mirror_tid to the 645e513e77eSMatthew Dillon * main topology's flushing mirror_tid. It is 646e513e77eSMatthew Dillon * possible that voldata.mirror_tid is already 647e513e77eSMatthew Dillon * beyond bref.mirror_tid due to the bump we made 648e513e77eSMatthew Dillon * above in BREF_TYPE_FREEMAP. 649e513e77eSMatthew Dillon */ 650e513e77eSMatthew Dillon if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) { 651e513e77eSMatthew Dillon hmp->voldata.mirror_tid = 652e513e77eSMatthew Dillon chain->bref.mirror_tid; 653e513e77eSMatthew Dillon } 654e513e77eSMatthew Dillon 655e513e77eSMatthew Dillon /* 656da6f36f4SMatthew Dillon * The volume header is flushed manually by the 657da6f36f4SMatthew Dillon * syncer, not here. All we do here is adjust the 658da6f36f4SMatthew Dillon * crc's. 65932b800e6SMatthew Dillon */ 66032b800e6SMatthew Dillon KKASSERT(chain->data != NULL); 661fdf62707SMatthew Dillon KKASSERT(chain->dio == NULL); 66232b800e6SMatthew Dillon 66332b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]= 66432b800e6SMatthew Dillon hammer2_icrc32( 66532b800e6SMatthew Dillon (char *)&hmp->voldata + 66632b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_OFF, 66732b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC1_SIZE); 66832b800e6SMatthew Dillon hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]= 66932b800e6SMatthew Dillon hammer2_icrc32( 67032b800e6SMatthew Dillon (char *)&hmp->voldata + 67132b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_OFF, 67232b800e6SMatthew Dillon HAMMER2_VOLUME_ICRC0_SIZE); 67332b800e6SMatthew Dillon hmp->voldata.icrc_volheader = 67432b800e6SMatthew Dillon hammer2_icrc32( 67532b800e6SMatthew Dillon (char *)&hmp->voldata + 67632b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_OFF, 67732b800e6SMatthew Dillon HAMMER2_VOLUME_ICRCVH_SIZE); 678e513e77eSMatthew Dillon 679e513e77eSMatthew Dillon kprintf("syncvolhdr %016jx %016jx\n", 680e513e77eSMatthew Dillon hmp->voldata.mirror_tid, 681e513e77eSMatthew Dillon hmp->vchain.bref.mirror_tid); 68232b800e6SMatthew Dillon hmp->volsync = hmp->voldata; 6830dea3156SMatthew Dillon atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC); 68450456506SMatthew Dillon hammer2_voldata_unlock(hmp); 685a6cf1052SMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 68632b800e6SMatthew Dillon break; 68732b800e6SMatthew Dillon case HAMMER2_BREF_TYPE_DATA: 68832b800e6SMatthew Dillon /* 689da6f36f4SMatthew Dillon * Data elements have already been flushed via the 690da6f36f4SMatthew Dillon * logical file buffer cache. Their hash was set in 691a71db85dSMatthew Dillon * the bref by the vop_write code. Do not re-dirty. 69232b800e6SMatthew Dillon * 693da6f36f4SMatthew Dillon * Make sure any device buffer(s) have been flushed 694da6f36f4SMatthew Dillon * out here (there aren't usually any to flush) XXX. 69532b800e6SMatthew Dillon */ 69632b800e6SMatthew Dillon break; 697512beabdSMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 6981a7cfe5aSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 69991caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 700da6f36f4SMatthew Dillon /* 701da6f36f4SMatthew Dillon * Buffer I/O will be cleaned up when the volume is 702da6f36f4SMatthew Dillon * flushed (but the kernel is free to flush it before 703da6f36f4SMatthew Dillon * then, as well). 704da6f36f4SMatthew Dillon */ 70550456506SMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 706a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 70750456506SMatthew Dillon break; 70891caa51cSMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 709a71db85dSMatthew Dillon /* 710a71db85dSMatthew Dillon * NOTE: We must call io_setdirty() to make any late 711a71db85dSMatthew Dillon * changes to the inode data, the system might 712a71db85dSMatthew Dillon * have already flushed the buffer. 713a71db85dSMatthew Dillon */ 714b0f58de8SMatthew Dillon if (chain->data->ipdata.meta.op_flags & 715da6f36f4SMatthew Dillon HAMMER2_OPFLAG_PFSROOT) { 716837bd39bSMatthew Dillon /* 717da6f36f4SMatthew Dillon * non-NULL pmp if mounted as a PFS. We must 71818e8ab5fSMatthew Dillon * sync fields cached in the pmp? XXX 719837bd39bSMatthew Dillon */ 720837bd39bSMatthew Dillon hammer2_inode_data_t *ipdata; 721837bd39bSMatthew Dillon 722a71db85dSMatthew Dillon hammer2_io_setdirty(chain->dio); 723837bd39bSMatthew Dillon ipdata = &chain->data->ipdata; 724e513e77eSMatthew Dillon if (chain->pmp) { 725b0f58de8SMatthew Dillon ipdata->meta.pfs_inum = 726e513e77eSMatthew Dillon chain->pmp->inode_tid; 727e513e77eSMatthew Dillon } 72850456506SMatthew Dillon } else { 72950456506SMatthew Dillon /* can't be mounted as a PFS */ 73050456506SMatthew Dillon } 731b3659de2SMatthew Dillon 732512beabdSMatthew Dillon KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0); 733a71db85dSMatthew Dillon hammer2_chain_setcheck(chain, chain->data); 7341a7cfe5aSMatthew Dillon break; 73532b800e6SMatthew Dillon default: 73691caa51cSMatthew Dillon KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED); 737da6f36f4SMatthew Dillon panic("hammer2_flush_core: unsupported " 738da6f36f4SMatthew Dillon "embedded bref %d", 73991caa51cSMatthew Dillon chain->bref.type); 74091caa51cSMatthew Dillon /* NOT REACHED */ 74132b800e6SMatthew Dillon } 74232b800e6SMatthew Dillon 74332b800e6SMatthew Dillon /* 744da6f36f4SMatthew Dillon * If the chain was destroyed try to avoid unnecessary I/O. 745da6f36f4SMatthew Dillon * (this only really works if the DIO system buffer is the 746da6f36f4SMatthew Dillon * same size as chain->bytes). 747da6f36f4SMatthew Dillon */ 74805dd26e4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_DESTROY) && chain->dio) { 749da6f36f4SMatthew Dillon hammer2_io_setinval(chain->dio, chain->bytes); 750da6f36f4SMatthew Dillon } 751da6f36f4SMatthew Dillon } 752da6f36f4SMatthew Dillon 753da6f36f4SMatthew Dillon /* 754da6f36f4SMatthew Dillon * If UPDATE is set the parent block table may need to be updated. 755da6f36f4SMatthew Dillon * 756da6f36f4SMatthew Dillon * NOTE: UPDATE may be set on vchain or fchain in which case 757da6f36f4SMatthew Dillon * parent could be NULL. It's easiest to allow the case 758da6f36f4SMatthew Dillon * and test for NULL. parent can also wind up being NULL 759da6f36f4SMatthew Dillon * due to a deletion so we need to handle the case anyway. 760da6f36f4SMatthew Dillon * 761da6f36f4SMatthew Dillon * If no parent exists we can just clear the UPDATE bit. If the 762da6f36f4SMatthew Dillon * chain gets reattached later on the bit will simply get set 763da6f36f4SMatthew Dillon * again. 764da6f36f4SMatthew Dillon */ 765da6f36f4SMatthew Dillon if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL) { 766da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 767da6f36f4SMatthew Dillon hammer2_chain_drop(chain); 768da6f36f4SMatthew Dillon } 769da6f36f4SMatthew Dillon 770da6f36f4SMatthew Dillon /* 771da6f36f4SMatthew Dillon * The chain may need its blockrefs updated in the parent. This 772da6f36f4SMatthew Dillon * requires some fancy footwork. 773da6f36f4SMatthew Dillon */ 774da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_UPDATE) { 775da6f36f4SMatthew Dillon hammer2_blockref_t *base; 776da6f36f4SMatthew Dillon int count; 777da6f36f4SMatthew Dillon 778da6f36f4SMatthew Dillon /* 779da6f36f4SMatthew Dillon * Both parent and chain must be locked. This requires 780da6f36f4SMatthew Dillon * temporarily unlocking the chain. We have to deal with 781da6f36f4SMatthew Dillon * the case where the chain might be reparented or modified 782da6f36f4SMatthew Dillon * while it was unlocked. 783da6f36f4SMatthew Dillon */ 784da6f36f4SMatthew Dillon hammer2_chain_unlock(chain); 785da6f36f4SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS); 786da6f36f4SMatthew Dillon hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE); 787da6f36f4SMatthew Dillon if (chain->parent != parent) { 788eedd52a3SMatthew Dillon kprintf("PARENT MISMATCH ch=%p p=%p/%p\n", 789eedd52a3SMatthew Dillon chain, chain->parent, parent); 790da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 791da6f36f4SMatthew Dillon goto done; 792da6f36f4SMatthew Dillon } 793da6f36f4SMatthew Dillon 794da6f36f4SMatthew Dillon /* 795da6f36f4SMatthew Dillon * Check race condition. If someone got in and modified 796da6f36f4SMatthew Dillon * it again while it was unlocked, we have to loop up. 797da6f36f4SMatthew Dillon */ 798da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_MODIFIED) { 799da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 800da6f36f4SMatthew Dillon kprintf("hammer2_flush: chain %p flush-mod race\n", 801da6f36f4SMatthew Dillon chain); 802da6f36f4SMatthew Dillon goto again; 803da6f36f4SMatthew Dillon } 804da6f36f4SMatthew Dillon 805da6f36f4SMatthew Dillon /* 806a6cf1052SMatthew Dillon * Clear UPDATE flag, mark parent modified, update its 807a6cf1052SMatthew Dillon * modify_tid if necessary, and adjust the parent blockmap. 808da6f36f4SMatthew Dillon */ 809da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_UPDATE) { 810da6f36f4SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE); 811da6f36f4SMatthew Dillon hammer2_chain_drop(chain); 812da6f36f4SMatthew Dillon } 813a6cf1052SMatthew Dillon 814eedd52a3SMatthew Dillon /* 815eedd52a3SMatthew Dillon * (optional code) 816eedd52a3SMatthew Dillon * 817eedd52a3SMatthew Dillon * Avoid actually modifying and updating the parent if it 818eedd52a3SMatthew Dillon * was flagged for destruction. This can greatly reduce 819eedd52a3SMatthew Dillon * disk I/O in large tree removals because the 820eedd52a3SMatthew Dillon * hammer2_io_setinval() call in the upward recursion 821eedd52a3SMatthew Dillon * (see MODIFIED code above) can only handle a few cases. 822eedd52a3SMatthew Dillon */ 823eedd52a3SMatthew Dillon if (parent->flags & HAMMER2_CHAIN_DESTROY) { 824eedd52a3SMatthew Dillon if (parent->bref.modify_tid < chain->bref.modify_tid) { 825eedd52a3SMatthew Dillon parent->bref.modify_tid = 826eedd52a3SMatthew Dillon chain->bref.modify_tid; 827eedd52a3SMatthew Dillon } 828eedd52a3SMatthew Dillon atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED | 829eedd52a3SMatthew Dillon HAMMER2_CHAIN_BMAPUPD); 830eedd52a3SMatthew Dillon hammer2_chain_unlock(parent); 831eedd52a3SMatthew Dillon goto skipupdate; 832eedd52a3SMatthew Dillon } 833eedd52a3SMatthew Dillon 834eedd52a3SMatthew Dillon /* 835eedd52a3SMatthew Dillon * We are updating the parent's blockmap, the parent must 836eedd52a3SMatthew Dillon * be set modified. 837eedd52a3SMatthew Dillon */ 838c603b86bSMatthew Dillon hammer2_chain_modify(parent, HAMMER2_MODIFY_KEEPMODIFY); 839a6cf1052SMatthew Dillon if (parent->bref.modify_tid < chain->bref.modify_tid) 840a6cf1052SMatthew Dillon parent->bref.modify_tid = chain->bref.modify_tid; 841da6f36f4SMatthew Dillon 842da6f36f4SMatthew Dillon /* 843da6f36f4SMatthew Dillon * Calculate blockmap pointer 844da6f36f4SMatthew Dillon */ 845da6f36f4SMatthew Dillon switch(parent->bref.type) { 846da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INODE: 847da6f36f4SMatthew Dillon /* 848da6f36f4SMatthew Dillon * Access the inode's block array. However, there is 849da6f36f4SMatthew Dillon * no block array if the inode is flagged DIRECTDATA. 850da6f36f4SMatthew Dillon */ 851da6f36f4SMatthew Dillon if (parent->data && 852b0f58de8SMatthew Dillon (parent->data->ipdata.meta.op_flags & 853da6f36f4SMatthew Dillon HAMMER2_OPFLAG_DIRECTDATA) == 0) { 854da6f36f4SMatthew Dillon base = &parent->data-> 855da6f36f4SMatthew Dillon ipdata.u.blockset.blockref[0]; 856da6f36f4SMatthew Dillon } else { 857da6f36f4SMatthew Dillon base = NULL; 858da6f36f4SMatthew Dillon } 859da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 860da6f36f4SMatthew Dillon break; 861da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_INDIRECT: 862da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP_NODE: 863da6f36f4SMatthew Dillon if (parent->data) 864da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 865da6f36f4SMatthew Dillon else 866da6f36f4SMatthew Dillon base = NULL; 867da6f36f4SMatthew Dillon count = parent->bytes / sizeof(hammer2_blockref_t); 868da6f36f4SMatthew Dillon break; 869da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_VOLUME: 870da6f36f4SMatthew Dillon base = &chain->hmp->voldata.sroot_blockset.blockref[0]; 871da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 872da6f36f4SMatthew Dillon break; 873da6f36f4SMatthew Dillon case HAMMER2_BREF_TYPE_FREEMAP: 874da6f36f4SMatthew Dillon base = &parent->data->npdata[0]; 875da6f36f4SMatthew Dillon count = HAMMER2_SET_COUNT; 876da6f36f4SMatthew Dillon break; 877da6f36f4SMatthew Dillon default: 878da6f36f4SMatthew Dillon base = NULL; 879da6f36f4SMatthew Dillon count = 0; 880da6f36f4SMatthew Dillon panic("hammer2_flush_core: " 881da6f36f4SMatthew Dillon "unrecognized blockref type: %d", 882da6f36f4SMatthew Dillon parent->bref.type); 883da6f36f4SMatthew Dillon } 884da6f36f4SMatthew Dillon 885da6f36f4SMatthew Dillon /* 886da6f36f4SMatthew Dillon * Blocktable updates 887b3659de2SMatthew Dillon * 888b3659de2SMatthew Dillon * We synchronize pending statistics at this time. Delta 889b3659de2SMatthew Dillon * adjustments designated for the current and upper level 890b3659de2SMatthew Dillon * are synchronized. 891da6f36f4SMatthew Dillon */ 892da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) { 893da6f36f4SMatthew Dillon if (chain->flags & HAMMER2_CHAIN_BMAPPED) { 8940cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 895c603b86bSMatthew Dillon hammer2_base_delete(parent, base, count, 896da6f36f4SMatthew Dillon &info->cache_index, chain); 8970cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 898b3659de2SMatthew Dillon /* base_delete clears both bits */ 899b3659de2SMatthew Dillon } else { 900b3659de2SMatthew Dillon atomic_clear_int(&chain->flags, 901b3659de2SMatthew Dillon HAMMER2_CHAIN_BMAPUPD); 902da6f36f4SMatthew Dillon } 903da6f36f4SMatthew Dillon } 904da6f36f4SMatthew Dillon if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) { 9050cc33e20SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 906c603b86bSMatthew Dillon hammer2_base_insert(parent, base, count, 907da6f36f4SMatthew Dillon &info->cache_index, chain); 9080cc33e20SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 909b3659de2SMatthew Dillon /* base_insert sets BMAPPED */ 910da6f36f4SMatthew Dillon } 911da6f36f4SMatthew Dillon hammer2_chain_unlock(parent); 912da6f36f4SMatthew Dillon } 913eedd52a3SMatthew Dillon skipupdate: 914eedd52a3SMatthew Dillon ; 915da6f36f4SMatthew Dillon 916da6f36f4SMatthew Dillon /* 9178138a154SMatthew Dillon * Final cleanup after flush 9188138a154SMatthew Dillon */ 9198138a154SMatthew Dillon done: 920e513e77eSMatthew Dillon KKASSERT(chain->refs > 0); 921850687d2SMatthew Dillon if (hammer2_debug & 0x200) { 922850687d2SMatthew Dillon if (info->debug == chain) 923850687d2SMatthew Dillon info->debug = NULL; 924850687d2SMatthew Dillon } 9258138a154SMatthew Dillon } 9268138a154SMatthew Dillon 9278138a154SMatthew Dillon /* 928da6f36f4SMatthew Dillon * Flush recursion helper, called from flush_core, calls flush_core. 9290dea3156SMatthew Dillon * 9308138a154SMatthew Dillon * Flushes the children of the caller's chain (info->parent), restricted 9318138a154SMatthew Dillon * by sync_tid. Set info->domodify if the child's blockref must propagate 9328138a154SMatthew Dillon * back up to the parent. 9330dea3156SMatthew Dillon * 9348138a154SMatthew Dillon * Ripouts can move child from rbtree to dbtree or dbq but the caller's 9358138a154SMatthew Dillon * flush scan order prevents any chains from being lost. A child can be 936da6f36f4SMatthew Dillon * executes more than once. 937ea155208SMatthew Dillon * 9388138a154SMatthew Dillon * WARNING! If we do not call hammer2_flush_core() we must update 9398138a154SMatthew Dillon * bref.mirror_tid ourselves to indicate that the flush has 9408138a154SMatthew Dillon * processed the child. 941925e4ad1SMatthew Dillon * 9428138a154SMatthew Dillon * WARNING! parent->core spinlock is held on entry and return. 94332b800e6SMatthew Dillon */ 9440dea3156SMatthew Dillon static int 945da6f36f4SMatthew Dillon hammer2_flush_recurse(hammer2_chain_t *child, void *data) 94632b800e6SMatthew Dillon { 9470dea3156SMatthew Dillon hammer2_flush_info_t *info = data; 9480dea3156SMatthew Dillon hammer2_chain_t *parent = info->parent; 949925e4ad1SMatthew Dillon 9500dea3156SMatthew Dillon /* 95110136ab6SMatthew Dillon * (child can never be fchain or vchain so a special check isn't 95210136ab6SMatthew Dillon * needed). 953da6f36f4SMatthew Dillon * 954a4dc31e0SMatthew Dillon * We must ref the child before unlocking the spinlock. 955a4dc31e0SMatthew Dillon * 956a4dc31e0SMatthew Dillon * The caller has added a ref to the parent so we can temporarily 957a4dc31e0SMatthew Dillon * unlock it in order to lock the child. 958a4dc31e0SMatthew Dillon */ 959ea155208SMatthew Dillon hammer2_chain_ref(child); 96094491fa0SMatthew Dillon hammer2_spin_unex(&parent->core.spin); 9610dea3156SMatthew Dillon 9620dea3156SMatthew Dillon hammer2_chain_unlock(parent); 9630dea3156SMatthew Dillon hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE); 9640dea3156SMatthew Dillon 96503faa7d5SMatthew Dillon /* 966e513e77eSMatthew Dillon * Recurse and collect deferral data. We're in the media flush, 967e513e77eSMatthew Dillon * this can cross PFS boundaries. 96803faa7d5SMatthew Dillon */ 969da6f36f4SMatthew Dillon if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) { 9700dea3156SMatthew Dillon ++info->depth; 9719450e866SMatthew Dillon hammer2_flush_core(info, child, 0); 9720dea3156SMatthew Dillon --info->depth; 973850687d2SMatthew Dillon } else if (hammer2_debug & 0x200) { 974850687d2SMatthew Dillon if (info->debug == NULL) 975850687d2SMatthew Dillon info->debug = child; 976850687d2SMatthew Dillon ++info->depth; 9779450e866SMatthew Dillon hammer2_flush_core(info, child, 0); 978850687d2SMatthew Dillon --info->depth; 979850687d2SMatthew Dillon if (info->debug == child) 980850687d2SMatthew Dillon info->debug = NULL; 9818138a154SMatthew Dillon } 9820dea3156SMatthew Dillon 983a4dc31e0SMatthew Dillon /* 984a4dc31e0SMatthew Dillon * Relock to continue the loop 985a4dc31e0SMatthew Dillon */ 986a4dc31e0SMatthew Dillon hammer2_chain_unlock(child); 987ea155208SMatthew Dillon hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE); 988a4dc31e0SMatthew Dillon hammer2_chain_drop(child); 989a4dc31e0SMatthew Dillon KKASSERT(info->parent == parent); 99094491fa0SMatthew Dillon hammer2_spin_ex(&parent->core.spin); 9910dea3156SMatthew Dillon 9920dea3156SMatthew Dillon return (0); 9930dea3156SMatthew Dillon } 994*12ff971cSMatthew Dillon 995*12ff971cSMatthew Dillon /* 996*12ff971cSMatthew Dillon * flush helper (backend threaded) 997*12ff971cSMatthew Dillon * 998*12ff971cSMatthew Dillon * Flushes core chains, issues disk sync, flushes volume roots. 999*12ff971cSMatthew Dillon * 1000*12ff971cSMatthew Dillon * Primarily called from vfs_sync(). 1001*12ff971cSMatthew Dillon */ 1002*12ff971cSMatthew Dillon void 1003*12ff971cSMatthew Dillon hammer2_inode_xop_flush(hammer2_xop_t *arg, int clindex) 1004*12ff971cSMatthew Dillon { 1005*12ff971cSMatthew Dillon hammer2_xop_flush_t *xop = &arg->xop_flush; 1006*12ff971cSMatthew Dillon hammer2_chain_t *chain; 1007*12ff971cSMatthew Dillon hammer2_chain_t *parent; 1008*12ff971cSMatthew Dillon hammer2_dev_t *hmp; 1009*12ff971cSMatthew Dillon int error = 0; 1010*12ff971cSMatthew Dillon int total_error = 0; 1011*12ff971cSMatthew Dillon int j; 1012*12ff971cSMatthew Dillon 1013*12ff971cSMatthew Dillon /* 1014*12ff971cSMatthew Dillon * Flush core chains 1015*12ff971cSMatthew Dillon */ 1016*12ff971cSMatthew Dillon chain = hammer2_inode_chain(xop->head.ip, clindex, 1017*12ff971cSMatthew Dillon HAMMER2_RESOLVE_ALWAYS); 1018*12ff971cSMatthew Dillon if (chain) { 1019*12ff971cSMatthew Dillon hmp = chain->hmp; 1020*12ff971cSMatthew Dillon if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) { 1021*12ff971cSMatthew Dillon hammer2_flush(chain, 1); 1022*12ff971cSMatthew Dillon parent = chain->parent; 1023*12ff971cSMatthew Dillon KKASSERT(chain->pmp != parent->pmp); 1024*12ff971cSMatthew Dillon hammer2_chain_setflush(parent); 1025*12ff971cSMatthew Dillon } 1026*12ff971cSMatthew Dillon hammer2_chain_unlock(chain); 1027*12ff971cSMatthew Dillon hammer2_chain_drop(chain); 1028*12ff971cSMatthew Dillon chain = NULL; 1029*12ff971cSMatthew Dillon } else { 1030*12ff971cSMatthew Dillon hmp = NULL; 1031*12ff971cSMatthew Dillon } 1032*12ff971cSMatthew Dillon 1033*12ff971cSMatthew Dillon /* 1034*12ff971cSMatthew Dillon * Flush volume roots. Avoid replication, we only want to 1035*12ff971cSMatthew Dillon * flush each hammer2_dev (hmp) once. 1036*12ff971cSMatthew Dillon */ 1037*12ff971cSMatthew Dillon for (j = clindex - 1; j >= 0; --j) { 1038*12ff971cSMatthew Dillon if ((chain = xop->head.ip->cluster.array[j].chain) != NULL) { 1039*12ff971cSMatthew Dillon if (chain->hmp == hmp) { 1040*12ff971cSMatthew Dillon chain = NULL; /* safety */ 1041*12ff971cSMatthew Dillon goto skip; 1042*12ff971cSMatthew Dillon } 1043*12ff971cSMatthew Dillon } 1044*12ff971cSMatthew Dillon } 1045*12ff971cSMatthew Dillon chain = NULL; /* safety */ 1046*12ff971cSMatthew Dillon 1047*12ff971cSMatthew Dillon /* 1048*12ff971cSMatthew Dillon * spmp transaction. The super-root is never directly mounted so 1049*12ff971cSMatthew Dillon * there shouldn't be any vnodes, let alone any dirty vnodes 1050*12ff971cSMatthew Dillon * associated with it. 1051*12ff971cSMatthew Dillon */ 1052*12ff971cSMatthew Dillon hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH); 1053*12ff971cSMatthew Dillon 1054*12ff971cSMatthew Dillon /* 1055*12ff971cSMatthew Dillon * Media mounts have two 'roots', vchain for the topology 1056*12ff971cSMatthew Dillon * and fchain for the free block table. Flush both. 1057*12ff971cSMatthew Dillon * 1058*12ff971cSMatthew Dillon * Note that the topology and free block table are handled 1059*12ff971cSMatthew Dillon * independently, so the free block table can wind up being 1060*12ff971cSMatthew Dillon * ahead of the topology. We depend on the bulk free scan 1061*12ff971cSMatthew Dillon * code to deal with any loose ends. 1062*12ff971cSMatthew Dillon */ 1063*12ff971cSMatthew Dillon hammer2_chain_ref(&hmp->vchain); 1064*12ff971cSMatthew Dillon hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1065*12ff971cSMatthew Dillon hammer2_chain_ref(&hmp->fchain); 1066*12ff971cSMatthew Dillon hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS); 1067*12ff971cSMatthew Dillon if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 1068*12ff971cSMatthew Dillon /* 1069*12ff971cSMatthew Dillon * This will also modify vchain as a side effect, 1070*12ff971cSMatthew Dillon * mark vchain as modified now. 1071*12ff971cSMatthew Dillon */ 1072*12ff971cSMatthew Dillon hammer2_voldata_modify(hmp); 1073*12ff971cSMatthew Dillon chain = &hmp->fchain; 1074*12ff971cSMatthew Dillon hammer2_flush(chain, 1); 1075*12ff971cSMatthew Dillon KKASSERT(chain == &hmp->fchain); 1076*12ff971cSMatthew Dillon } 1077*12ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->fchain); 1078*12ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->vchain); 1079*12ff971cSMatthew Dillon hammer2_chain_drop(&hmp->fchain); 1080*12ff971cSMatthew Dillon /* vchain dropped down below */ 1081*12ff971cSMatthew Dillon 1082*12ff971cSMatthew Dillon hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS); 1083*12ff971cSMatthew Dillon if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) { 1084*12ff971cSMatthew Dillon chain = &hmp->vchain; 1085*12ff971cSMatthew Dillon hammer2_flush(chain, 1); 1086*12ff971cSMatthew Dillon KKASSERT(chain == &hmp->vchain); 1087*12ff971cSMatthew Dillon } 1088*12ff971cSMatthew Dillon hammer2_chain_unlock(&hmp->vchain); 1089*12ff971cSMatthew Dillon hammer2_chain_drop(&hmp->vchain); 1090*12ff971cSMatthew Dillon 1091*12ff971cSMatthew Dillon error = 0; 1092*12ff971cSMatthew Dillon 1093*12ff971cSMatthew Dillon /* 1094*12ff971cSMatthew Dillon * We can't safely flush the volume header until we have 1095*12ff971cSMatthew Dillon * flushed any device buffers which have built up. 1096*12ff971cSMatthew Dillon * 1097*12ff971cSMatthew Dillon * XXX this isn't being incremental 1098*12ff971cSMatthew Dillon */ 1099*12ff971cSMatthew Dillon vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY); 1100*12ff971cSMatthew Dillon error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0); 1101*12ff971cSMatthew Dillon vn_unlock(hmp->devvp); 1102*12ff971cSMatthew Dillon 1103*12ff971cSMatthew Dillon /* 1104*12ff971cSMatthew Dillon * The flush code sets CHAIN_VOLUMESYNC to indicate that the 1105*12ff971cSMatthew Dillon * volume header needs synchronization via hmp->volsync. 1106*12ff971cSMatthew Dillon * 1107*12ff971cSMatthew Dillon * XXX synchronize the flag & data with only this flush XXX 1108*12ff971cSMatthew Dillon */ 1109*12ff971cSMatthew Dillon if (error == 0 && 1110*12ff971cSMatthew Dillon (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) { 1111*12ff971cSMatthew Dillon struct buf *bp; 1112*12ff971cSMatthew Dillon 1113*12ff971cSMatthew Dillon /* 1114*12ff971cSMatthew Dillon * Synchronize the disk before flushing the volume 1115*12ff971cSMatthew Dillon * header. 1116*12ff971cSMatthew Dillon */ 1117*12ff971cSMatthew Dillon bp = getpbuf(NULL); 1118*12ff971cSMatthew Dillon bp->b_bio1.bio_offset = 0; 1119*12ff971cSMatthew Dillon bp->b_bufsize = 0; 1120*12ff971cSMatthew Dillon bp->b_bcount = 0; 1121*12ff971cSMatthew Dillon bp->b_cmd = BUF_CMD_FLUSH; 1122*12ff971cSMatthew Dillon bp->b_bio1.bio_done = biodone_sync; 1123*12ff971cSMatthew Dillon bp->b_bio1.bio_flags |= BIO_SYNC; 1124*12ff971cSMatthew Dillon vn_strategy(hmp->devvp, &bp->b_bio1); 1125*12ff971cSMatthew Dillon biowait(&bp->b_bio1, "h2vol"); 1126*12ff971cSMatthew Dillon relpbuf(bp, NULL); 1127*12ff971cSMatthew Dillon 1128*12ff971cSMatthew Dillon /* 1129*12ff971cSMatthew Dillon * Then we can safely flush the version of the 1130*12ff971cSMatthew Dillon * volume header synchronized by the flush code. 1131*12ff971cSMatthew Dillon */ 1132*12ff971cSMatthew Dillon j = hmp->volhdrno + 1; 1133*12ff971cSMatthew Dillon if (j >= HAMMER2_NUM_VOLHDRS) 1134*12ff971cSMatthew Dillon j = 0; 1135*12ff971cSMatthew Dillon if (j * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE > 1136*12ff971cSMatthew Dillon hmp->volsync.volu_size) { 1137*12ff971cSMatthew Dillon j = 0; 1138*12ff971cSMatthew Dillon } 1139*12ff971cSMatthew Dillon kprintf("sync volhdr %d %jd\n", 1140*12ff971cSMatthew Dillon j, (intmax_t)hmp->volsync.volu_size); 1141*12ff971cSMatthew Dillon bp = getblk(hmp->devvp, j * HAMMER2_ZONE_BYTES64, 1142*12ff971cSMatthew Dillon HAMMER2_PBUFSIZE, 0, 0); 1143*12ff971cSMatthew Dillon atomic_clear_int(&hmp->vchain.flags, 1144*12ff971cSMatthew Dillon HAMMER2_CHAIN_VOLUMESYNC); 1145*12ff971cSMatthew Dillon bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE); 1146*12ff971cSMatthew Dillon bawrite(bp); 1147*12ff971cSMatthew Dillon hmp->volhdrno = j; 1148*12ff971cSMatthew Dillon } 1149*12ff971cSMatthew Dillon if (error) 1150*12ff971cSMatthew Dillon total_error = error; 1151*12ff971cSMatthew Dillon 1152*12ff971cSMatthew Dillon hammer2_trans_done(hmp->spmp); /* spmp trans */ 1153*12ff971cSMatthew Dillon skip: 1154*12ff971cSMatthew Dillon error = hammer2_xop_feed(&xop->head, NULL, clindex, total_error); 1155*12ff971cSMatthew Dillon } 1156