xref: /dragonfly/sys/vfs/hammer2/hammer2_flush.c (revision a13468b0)
132b800e6SMatthew Dillon /*
268b321c1SMatthew Dillon  * Copyright (c) 2011-2018 The DragonFly Project.  All rights reserved.
332b800e6SMatthew Dillon  *
432b800e6SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
532b800e6SMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
632b800e6SMatthew Dillon  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
732b800e6SMatthew Dillon  *
832b800e6SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
932b800e6SMatthew Dillon  * modification, are permitted provided that the following conditions
1032b800e6SMatthew Dillon  * are met:
1132b800e6SMatthew Dillon  *
1232b800e6SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
1332b800e6SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
1432b800e6SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
1532b800e6SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
1632b800e6SMatthew Dillon  *    the documentation and/or other materials provided with the
1732b800e6SMatthew Dillon  *    distribution.
1832b800e6SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
1932b800e6SMatthew Dillon  *    contributors may be used to endorse or promote products derived
2032b800e6SMatthew Dillon  *    from this software without specific, prior written permission.
2132b800e6SMatthew Dillon  *
2232b800e6SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2332b800e6SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2432b800e6SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2532b800e6SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2632b800e6SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2732b800e6SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2832b800e6SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2932b800e6SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
3032b800e6SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3132b800e6SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3232b800e6SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3332b800e6SMatthew Dillon  * SUCH DAMAGE.
3432b800e6SMatthew Dillon  */
3550456506SMatthew Dillon /*
3650456506SMatthew Dillon  *			TRANSACTION AND FLUSH HANDLING
3750456506SMatthew Dillon  *
3850456506SMatthew Dillon  * Deceptively simple but actually fairly difficult to implement properly is
3950456506SMatthew Dillon  * how I would describe it.
4050456506SMatthew Dillon  *
41da6f36f4SMatthew Dillon  * Flushing generally occurs bottom-up but requires a top-down scan to
42da6f36f4SMatthew Dillon  * locate chains with MODIFIED and/or UPDATE bits set.  The ONFLUSH flag
43da6f36f4SMatthew Dillon  * tells how to recurse downward to find these chains.
4450456506SMatthew Dillon  */
4550456506SMatthew Dillon 
4632b800e6SMatthew Dillon #include <sys/cdefs.h>
4732b800e6SMatthew Dillon #include <sys/param.h>
4832b800e6SMatthew Dillon #include <sys/systm.h>
4932b800e6SMatthew Dillon #include <sys/types.h>
5032b800e6SMatthew Dillon #include <sys/lock.h>
51c12cfc4aSTomohiro Kusumi #include <sys/vnode.h>
52c12cfc4aSTomohiro Kusumi #include <sys/buf.h>
5332b800e6SMatthew Dillon 
5432b800e6SMatthew Dillon #include "hammer2.h"
5532b800e6SMatthew Dillon 
566aaf5cb0SMatthew Dillon #define HAMMER2_FLUSH_DEPTH_LIMIT	60      /* stack recursion limit */
57a71db85dSMatthew Dillon 
58a71db85dSMatthew Dillon 
5932b800e6SMatthew Dillon /*
6032b800e6SMatthew Dillon  * Recursively flush the specified chain.  The chain is locked and
6132b800e6SMatthew Dillon  * referenced by the caller and will remain so on return.  The chain
6232b800e6SMatthew Dillon  * will remain referenced throughout but can temporarily lose its
6332b800e6SMatthew Dillon  * lock during the recursion to avoid unnecessarily stalling user
6432b800e6SMatthew Dillon  * processes.
6532b800e6SMatthew Dillon  */
6632b800e6SMatthew Dillon struct hammer2_flush_info {
670dea3156SMatthew Dillon 	hammer2_chain_t *parent;
6832b800e6SMatthew Dillon 	int		depth;
6965cacacfSMatthew Dillon 	int		error;			/* cumulative error */
7053f84d31SMatthew Dillon 	int		flags;
716aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
726aaf5cb0SMatthew Dillon 	long		scan_count;
736aaf5cb0SMatthew Dillon 	long		scan_mod_count;
746aaf5cb0SMatthew Dillon 	long		scan_upd_count;
756aaf5cb0SMatthew Dillon 	long		scan_onf_count;
766aaf5cb0SMatthew Dillon 	long		scan_del_count;
776aaf5cb0SMatthew Dillon 	long		scan_btype[7];
786aaf5cb0SMatthew Dillon #endif
7932b800e6SMatthew Dillon };
8032b800e6SMatthew Dillon 
8132b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t;
8232b800e6SMatthew Dillon 
83ecfe89b8SMatthew Dillon static int hammer2_flush_core(hammer2_flush_info_t *info,
8453f84d31SMatthew Dillon 				hammer2_chain_t *chain, int flags);
85da6f36f4SMatthew Dillon static int hammer2_flush_recurse(hammer2_chain_t *child, void *data);
8693f3933aSMatthew Dillon 
8732b800e6SMatthew Dillon /*
88c603b86bSMatthew Dillon  * Any per-pfs transaction initialization goes here.
8950456506SMatthew Dillon  */
9050456506SMatthew Dillon void
hammer2_trans_manage_init(hammer2_pfs_t * pmp)91c603b86bSMatthew Dillon hammer2_trans_manage_init(hammer2_pfs_t *pmp)
9250456506SMatthew Dillon {
9350456506SMatthew Dillon }
9450456506SMatthew Dillon 
9550456506SMatthew Dillon /*
96d34788efSMatthew Dillon  * Transaction support for any modifying operation.  Transactions are used
97d34788efSMatthew Dillon  * in the pmp layer by the frontend and in the spmp layer by the backend.
98c603b86bSMatthew Dillon  *
993e8408dbSMatthew Dillon  * 0			- Normal transaction.  Interlocks against just the
1003e8408dbSMatthew Dillon  *			  COPYQ portion of an ISFLUSH transaction.
101c603b86bSMatthew Dillon  *
102ecfe89b8SMatthew Dillon  * TRANS_ISFLUSH	- Flush transaction.  Interlocks against other flush
103ecfe89b8SMatthew Dillon  *			  transactions.
104c603b86bSMatthew Dillon  *
1053e8408dbSMatthew Dillon  *			  When COPYQ is also specified, waits for the count
1063e8408dbSMatthew Dillon  *			  to drop to 1.
1073e8408dbSMatthew Dillon  *
108ecfe89b8SMatthew Dillon  * TRANS_BUFCACHE	- Buffer cache transaction.  No interlock.
109ecfe89b8SMatthew Dillon  *
110ecfe89b8SMatthew Dillon  * TRANS_SIDEQ		- Run the sideq (only tested in trans_done())
1110dea3156SMatthew Dillon  *
11210136ab6SMatthew Dillon  * Initializing a new transaction allocates a transaction ID.  Typically
11310136ab6SMatthew Dillon  * passed a pmp (hmp passed as NULL), indicating a cluster transaction.  Can
11410136ab6SMatthew Dillon  * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single
11510136ab6SMatthew Dillon  * media target.  The latter mode is used by the recovery code.
1160dea3156SMatthew Dillon  */
1170dea3156SMatthew Dillon void
hammer2_trans_init(hammer2_pfs_t * pmp,uint32_t flags)118c603b86bSMatthew Dillon hammer2_trans_init(hammer2_pfs_t *pmp, uint32_t flags)
1190dea3156SMatthew Dillon {
120c603b86bSMatthew Dillon 	uint32_t oflags;
121c603b86bSMatthew Dillon 	uint32_t nflags;
122c603b86bSMatthew Dillon 	int dowait;
123d001f460SMatthew Dillon 
124c603b86bSMatthew Dillon 	for (;;) {
125c603b86bSMatthew Dillon 		oflags = pmp->trans.flags;
126c603b86bSMatthew Dillon 		cpu_ccfence();
127c603b86bSMatthew Dillon 		dowait = 0;
128d001f460SMatthew Dillon 
129d001f460SMatthew Dillon 		if (flags & HAMMER2_TRANS_ISFLUSH) {
130d001f460SMatthew Dillon 			/*
131ecfe89b8SMatthew Dillon 			 * Interlock against other flush transactions.
132355d67fcSMatthew Dillon 			 */
1335afbe9d8SMatthew Dillon 			if (oflags & HAMMER2_TRANS_ISFLUSH) {
1345afbe9d8SMatthew Dillon 				nflags = oflags | HAMMER2_TRANS_WAITING;
1355afbe9d8SMatthew Dillon 				dowait = 1;
1365afbe9d8SMatthew Dillon 			} else {
1375afbe9d8SMatthew Dillon 				nflags = (oflags | flags) + 1;
1385afbe9d8SMatthew Dillon 			}
139c603b86bSMatthew Dillon 		} else if (flags & HAMMER2_TRANS_BUFCACHE) {
140a7720be7SMatthew Dillon 			/*
14120852157SMatthew Dillon 			 * Requesting strategy transaction from buffer-cache,
14220852157SMatthew Dillon 			 * or a VM getpages/putpages through the buffer cache.
14320852157SMatthew Dillon 			 * We must allow such transactions in all situations
14420852157SMatthew Dillon 			 * to avoid deadlocks.
14520852157SMatthew Dillon 			 */
14620852157SMatthew Dillon 			nflags = (oflags | flags) + 1;
147a4dc31e0SMatthew Dillon 		} else {
148a4dc31e0SMatthew Dillon 			/*
149d0755e6dSMatthew Dillon 			 * Normal transaction.  We do not interlock against
150d0755e6dSMatthew Dillon 			 * BUFCACHE or ISFLUSH.
15168b321c1SMatthew Dillon 			 *
152ecfe89b8SMatthew Dillon 			 * Note that vnode locks may be held going into
153ecfe89b8SMatthew Dillon 			 * this call.
15468b321c1SMatthew Dillon 			 *
15568b321c1SMatthew Dillon 			 * NOTE: Remember that non-modifying operations
15668b321c1SMatthew Dillon 			 *	 such as read, stat, readdir, etc, do
15768b321c1SMatthew Dillon 			 *	 not use transactions.
158a4dc31e0SMatthew Dillon 			 */
159c603b86bSMatthew Dillon 			nflags = (oflags | flags) + 1;
160c603b86bSMatthew Dillon 		}
161c603b86bSMatthew Dillon 		if (dowait)
162c603b86bSMatthew Dillon 			tsleep_interlock(&pmp->trans.sync_wait, 0);
163c603b86bSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
164c603b86bSMatthew Dillon 			if (dowait == 0)
165a4dc31e0SMatthew Dillon 				break;
166c603b86bSMatthew Dillon 			tsleep(&pmp->trans.sync_wait, PINTERLOCKED,
167c603b86bSMatthew Dillon 			       "h2trans", hz);
168ecfe89b8SMatthew Dillon 			/* retry */
169c603b86bSMatthew Dillon 		} else {
170c603b86bSMatthew Dillon 			cpu_pause();
171ecfe89b8SMatthew Dillon 			/* retry */
172a7720be7SMatthew Dillon 		}
173c603b86bSMatthew Dillon 		/* retry */
174c603b86bSMatthew Dillon 	}
1753e8408dbSMatthew Dillon 
176d0755e6dSMatthew Dillon #if 0
1773e8408dbSMatthew Dillon 	/*
1783e8408dbSMatthew Dillon 	 * When entering a FLUSH transaction with COPYQ set, wait for the
1793e8408dbSMatthew Dillon 	 * transaction count to drop to 1 (our flush transaction only)
1803e8408dbSMatthew Dillon 	 * before proceeding.
1813e8408dbSMatthew Dillon 	 *
1823e8408dbSMatthew Dillon 	 * This waits for all non-flush transactions to complete and blocks
1833e8408dbSMatthew Dillon 	 * new non-flush transactions from starting until COPYQ is cleared.
1843e8408dbSMatthew Dillon 	 * (the flush will then proceed after clearing COPYQ).  This should
1853e8408dbSMatthew Dillon 	 * be a very short stall on modifying operations.
1863e8408dbSMatthew Dillon 	 */
1873e8408dbSMatthew Dillon 	while ((flags & HAMMER2_TRANS_ISFLUSH) &&
1883e8408dbSMatthew Dillon 	       (flags & HAMMER2_TRANS_COPYQ)) {
1893e8408dbSMatthew Dillon 		oflags = pmp->trans.flags;
1903e8408dbSMatthew Dillon 		cpu_ccfence();
1913e8408dbSMatthew Dillon 		if ((oflags & HAMMER2_TRANS_MASK) == 1)
1923e8408dbSMatthew Dillon 			break;
1933e8408dbSMatthew Dillon 		nflags = oflags | HAMMER2_TRANS_WAITING;
1943e8408dbSMatthew Dillon 		tsleep_interlock(&pmp->trans.sync_wait, 0);
1953e8408dbSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
1963e8408dbSMatthew Dillon 			tsleep(&pmp->trans.sync_wait, PINTERLOCKED,
1973e8408dbSMatthew Dillon 			       "h2trans2", hz);
1983e8408dbSMatthew Dillon 		}
1993e8408dbSMatthew Dillon 	}
200d0755e6dSMatthew Dillon #endif
201c603b86bSMatthew Dillon }
202a4dc31e0SMatthew Dillon 
203e2163f5bSMatthew Dillon /*
204e2163f5bSMatthew Dillon  * Start a sub-transaction, there is no 'subdone' function.  This will
20553f84d31SMatthew Dillon  * issue a new modify_tid (mtid) for the current transaction, which is a
20653f84d31SMatthew Dillon  * CLC (cluster level change) id and not a per-node id.
20753f84d31SMatthew Dillon  *
20853f84d31SMatthew Dillon  * This function must be called for each XOP when multiple XOPs are run in
20953f84d31SMatthew Dillon  * sequence within a transaction.
21053f84d31SMatthew Dillon  *
21153f84d31SMatthew Dillon  * Callers typically update the inode with the transaction mtid manually
21253f84d31SMatthew Dillon  * to enforce sequencing.
213e2163f5bSMatthew Dillon  */
214e2163f5bSMatthew Dillon hammer2_tid_t
hammer2_trans_sub(hammer2_pfs_t * pmp)215e2163f5bSMatthew Dillon hammer2_trans_sub(hammer2_pfs_t *pmp)
216e2163f5bSMatthew Dillon {
217e2163f5bSMatthew Dillon 	hammer2_tid_t mtid;
218e2163f5bSMatthew Dillon 
219e2163f5bSMatthew Dillon 	mtid = atomic_fetchadd_64(&pmp->modify_tid, 1);
220e2163f5bSMatthew Dillon 
221e2163f5bSMatthew Dillon 	return (mtid);
222e2163f5bSMatthew Dillon }
223e2163f5bSMatthew Dillon 
224c603b86bSMatthew Dillon void
hammer2_trans_setflags(hammer2_pfs_t * pmp,uint32_t flags)225ecfe89b8SMatthew Dillon hammer2_trans_setflags(hammer2_pfs_t *pmp, uint32_t flags)
226ecfe89b8SMatthew Dillon {
227ecfe89b8SMatthew Dillon 	atomic_set_int(&pmp->trans.flags, flags);
228ecfe89b8SMatthew Dillon }
229ecfe89b8SMatthew Dillon 
2303e8408dbSMatthew Dillon /*
2313e8408dbSMatthew Dillon  * Typically used to clear trans flags asynchronously.  If TRANS_WAITING
2323e8408dbSMatthew Dillon  * is in the mask, and was previously set, this function will wake up
2333e8408dbSMatthew Dillon  * any waiters.
2343e8408dbSMatthew Dillon  */
235ecfe89b8SMatthew Dillon void
hammer2_trans_clearflags(hammer2_pfs_t * pmp,uint32_t flags)236ecfe89b8SMatthew Dillon hammer2_trans_clearflags(hammer2_pfs_t *pmp, uint32_t flags)
237ecfe89b8SMatthew Dillon {
238ecfe89b8SMatthew Dillon 	uint32_t oflags;
239ecfe89b8SMatthew Dillon 	uint32_t nflags;
240ecfe89b8SMatthew Dillon 
241ecfe89b8SMatthew Dillon 	for (;;) {
242ecfe89b8SMatthew Dillon 		oflags = pmp->trans.flags;
243ecfe89b8SMatthew Dillon 		cpu_ccfence();
244ecfe89b8SMatthew Dillon 		nflags = oflags & ~flags;
245ecfe89b8SMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
246ecfe89b8SMatthew Dillon 			if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING)
247ecfe89b8SMatthew Dillon 				wakeup(&pmp->trans.sync_wait);
248ecfe89b8SMatthew Dillon 			break;
249ecfe89b8SMatthew Dillon 		}
250ecfe89b8SMatthew Dillon 		cpu_pause();
251ecfe89b8SMatthew Dillon 		/* retry */
252ecfe89b8SMatthew Dillon 	}
253ecfe89b8SMatthew Dillon }
254ecfe89b8SMatthew Dillon 
255ecfe89b8SMatthew Dillon void
hammer2_trans_done(hammer2_pfs_t * pmp,uint32_t flags)256ecfe89b8SMatthew Dillon hammer2_trans_done(hammer2_pfs_t *pmp, uint32_t flags)
257c603b86bSMatthew Dillon {
258c603b86bSMatthew Dillon 	uint32_t oflags;
259c603b86bSMatthew Dillon 	uint32_t nflags;
260c603b86bSMatthew Dillon 
261d2a41023SMatthew Dillon #if 0
262257c2728SMatthew Dillon 	/*
263257c2728SMatthew Dillon 	 * Modifying ops on the front-end can cause dirty inodes to
264257c2728SMatthew Dillon 	 * build up in the sideq.  We don't flush these on inactive/reclaim
265257c2728SMatthew Dillon 	 * due to potential deadlocks, so we have to deal with them from
266257c2728SMatthew Dillon 	 * inside other nominal modifying front-end transactions.
267257c2728SMatthew Dillon 	 */
268ecfe89b8SMatthew Dillon 	if ((flags & HAMMER2_TRANS_SIDEQ) &&
269d0755e6dSMatthew Dillon 	    pmp->sideq_count > hammer2_limit_dirty_inodes / 2 &&
270ecfe89b8SMatthew Dillon 	    pmp->sideq_count > (pmp->inum_count >> 3) &&
271ecfe89b8SMatthew Dillon 	    pmp->mp) {
2725afbe9d8SMatthew Dillon 		speedup_syncer(pmp->mp);
273ecfe89b8SMatthew Dillon 	}
274d2a41023SMatthew Dillon #endif
275257c2728SMatthew Dillon 
276257c2728SMatthew Dillon 	/*
2773e8408dbSMatthew Dillon 	 * Clean-up the transaction.  Wakeup any waiters when finishing
2783e8408dbSMatthew Dillon 	 * a flush transaction or transitioning the non-flush transaction
2793e8408dbSMatthew Dillon 	 * count from 2->1 while a flush transaction is pending.
280257c2728SMatthew Dillon 	 */
281c603b86bSMatthew Dillon 	for (;;) {
282c603b86bSMatthew Dillon 		oflags = pmp->trans.flags;
283c603b86bSMatthew Dillon 		cpu_ccfence();
284c603b86bSMatthew Dillon 		KKASSERT(oflags & HAMMER2_TRANS_MASK);
285ecfe89b8SMatthew Dillon 
286ecfe89b8SMatthew Dillon 		nflags = (oflags - 1) & ~flags;
287ecfe89b8SMatthew Dillon 		if (flags & HAMMER2_TRANS_ISFLUSH) {
288ecfe89b8SMatthew Dillon 			nflags &= ~HAMMER2_TRANS_WAITING;
289c603b86bSMatthew Dillon 		}
2903e8408dbSMatthew Dillon 		if ((oflags & (HAMMER2_TRANS_ISFLUSH|HAMMER2_TRANS_MASK)) ==
2913e8408dbSMatthew Dillon 		    (HAMMER2_TRANS_ISFLUSH|2)) {
2923e8408dbSMatthew Dillon 			nflags &= ~HAMMER2_TRANS_WAITING;
2933e8408dbSMatthew Dillon 		}
294c603b86bSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
295ecfe89b8SMatthew Dillon 			if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING)
296c603b86bSMatthew Dillon 				wakeup(&pmp->trans.sync_wait);
297c603b86bSMatthew Dillon 			break;
298c603b86bSMatthew Dillon 		}
299ecfe89b8SMatthew Dillon 		cpu_pause();
300c603b86bSMatthew Dillon 		/* retry */
301044541cdSMatthew Dillon 	}
30250456506SMatthew Dillon }
30350456506SMatthew Dillon 
304c603b86bSMatthew Dillon /*
305c603b86bSMatthew Dillon  * Obtain new, unique inode number (not serialized by caller).
306c603b86bSMatthew Dillon  */
307c603b86bSMatthew Dillon hammer2_tid_t
hammer2_trans_newinum(hammer2_pfs_t * pmp)308c603b86bSMatthew Dillon hammer2_trans_newinum(hammer2_pfs_t *pmp)
309c603b86bSMatthew Dillon {
310c603b86bSMatthew Dillon 	hammer2_tid_t tid;
311c603b86bSMatthew Dillon 
312e2163f5bSMatthew Dillon 	tid = atomic_fetchadd_64(&pmp->inode_tid, 1);
313c603b86bSMatthew Dillon 
314c603b86bSMatthew Dillon 	return tid;
315a7720be7SMatthew Dillon }
316a7720be7SMatthew Dillon 
317c603b86bSMatthew Dillon /*
31820852157SMatthew Dillon  * Assert that a strategy call is ok here.  Currently we allow strategy
31920852157SMatthew Dillon  * calls in all situations, including during flushes.  Previously:
32020852157SMatthew Dillon  *	(old) (1) In a normal transaction.
321c603b86bSMatthew Dillon  */
3220dea3156SMatthew Dillon void
hammer2_trans_assert_strategy(hammer2_pfs_t * pmp)3239450e866SMatthew Dillon hammer2_trans_assert_strategy(hammer2_pfs_t *pmp)
324c7916d0bSMatthew Dillon {
32520852157SMatthew Dillon #if 0
32655e28d18STomohiro Kusumi 	KKASSERT((pmp->trans.flags & HAMMER2_TRANS_ISFLUSH) == 0);
32720852157SMatthew Dillon #endif
328c7916d0bSMatthew Dillon }
329c7916d0bSMatthew Dillon 
330eedd52a3SMatthew Dillon /*
3310dea3156SMatthew Dillon  * Flush the chain and all modified sub-chains through the specified
33253f84d31SMatthew Dillon  * synchronization point, propagating blockref updates back up.  As
33353f84d31SMatthew Dillon  * part of this propagation, mirror_tid and inode/data usage statistics
33453f84d31SMatthew Dillon  * propagates back upward.
3350dea3156SMatthew Dillon  *
33665cacacfSMatthew Dillon  * Returns a HAMMER2 error code, 0 if no error.  Note that I/O errors from
33765cacacfSMatthew Dillon  * buffers dirtied during the flush operation can occur later.
33865cacacfSMatthew Dillon  *
33953f84d31SMatthew Dillon  * modify_tid (clc - cluster level change) is not propagated.
34053f84d31SMatthew Dillon  *
34153f84d31SMatthew Dillon  * update_tid (clc) is used for validation and is not propagated by this
34253f84d31SMatthew Dillon  * function.
3430dea3156SMatthew Dillon  *
34432b800e6SMatthew Dillon  * This routine can be called from several places but the most important
345c4421f07SMatthew Dillon  * is from VFS_SYNC (frontend) via hammer2_xop_inode_flush (backend).
34632b800e6SMatthew Dillon  *
347da6f36f4SMatthew Dillon  * chain is locked on call and will remain locked on return.  The chain's
348da6f36f4SMatthew Dillon  * UPDATE flag indicates that its parent's block table (which is not yet
3495c51ecaeSMatthew Dillon  * part of the flush) should be updated.
35040498d1cSMatthew Dillon  *
35140498d1cSMatthew Dillon  * flags:
35240498d1cSMatthew Dillon  *	HAMMER2_FLUSH_TOP	Indicates that this is the top of the flush.
35340498d1cSMatthew Dillon  *				Is cleared for the recursion.
35440498d1cSMatthew Dillon  *
35540498d1cSMatthew Dillon  *	HAMMER2_FLUSH_ALL	Recurse everything
35640498d1cSMatthew Dillon  *
35765c894ffSMatthew Dillon  *	HAMMER2_FLUSH_INODE_STOP
35865c894ffSMatthew Dillon  *				Stop at PFS inode or normal inode boundary
35932b800e6SMatthew Dillon  */
36065cacacfSMatthew Dillon int
hammer2_flush(hammer2_chain_t * chain,int flags)36153f84d31SMatthew Dillon hammer2_flush(hammer2_chain_t *chain, int flags)
36232b800e6SMatthew Dillon {
36332b800e6SMatthew Dillon 	hammer2_flush_info_t info;
364925e4ad1SMatthew Dillon 	int loops;
36532b800e6SMatthew Dillon 
36632b800e6SMatthew Dillon 	/*
36732b800e6SMatthew Dillon 	 * Execute the recursive flush and handle deferrals.
36832b800e6SMatthew Dillon 	 *
36932b800e6SMatthew Dillon 	 * Chains can be ridiculously long (thousands deep), so to
37032b800e6SMatthew Dillon 	 * avoid blowing out the kernel stack the recursive flush has a
37132b800e6SMatthew Dillon 	 * depth limit.  Elements at the limit are placed on a list
37232b800e6SMatthew Dillon 	 * for re-execution after the stack has been popped.
37332b800e6SMatthew Dillon 	 */
37432b800e6SMatthew Dillon 	bzero(&info, sizeof(info));
37553f84d31SMatthew Dillon 	info.flags = flags & ~HAMMER2_FLUSH_TOP;
37632b800e6SMatthew Dillon 
377da6f36f4SMatthew Dillon 	/*
378da6f36f4SMatthew Dillon 	 * Calculate parent (can be NULL), if not NULL the flush core
379da6f36f4SMatthew Dillon 	 * expects the parent to be referenced so it can easily lock/unlock
380da6f36f4SMatthew Dillon 	 * it without it getting ripped up.
381da6f36f4SMatthew Dillon 	 */
382da6f36f4SMatthew Dillon 	if ((info.parent = chain->parent) != NULL)
383da6f36f4SMatthew Dillon 		hammer2_chain_ref(info.parent);
384731b2a84SMatthew Dillon 
385a7720be7SMatthew Dillon 	/*
386a7720be7SMatthew Dillon 	 * Extra ref needed because flush_core expects it when replacing
387a7720be7SMatthew Dillon 	 * chain.
388a7720be7SMatthew Dillon 	 */
389a7720be7SMatthew Dillon 	hammer2_chain_ref(chain);
390925e4ad1SMatthew Dillon 	loops = 0;
391a7720be7SMatthew Dillon 
3920dea3156SMatthew Dillon 	for (;;) {
39332b800e6SMatthew Dillon 		/*
39440498d1cSMatthew Dillon 		 * [re]flush chain as the deep recursion may have generated
39540498d1cSMatthew Dillon 		 * additional modifications.
39632b800e6SMatthew Dillon 		 */
39740498d1cSMatthew Dillon 		if (info.parent != chain->parent) {
39868b321c1SMatthew Dillon 			if (hammer2_debug & 0x0040) {
39968b321c1SMatthew Dillon 				kprintf("LOST CHILD4 %p->%p "
40068b321c1SMatthew Dillon 					"(actual parent %p)\n",
40140498d1cSMatthew Dillon 					info.parent, chain, chain->parent);
40268b321c1SMatthew Dillon 			}
40340498d1cSMatthew Dillon 			hammer2_chain_drop(info.parent);
40440498d1cSMatthew Dillon 			info.parent = chain->parent;
40540498d1cSMatthew Dillon 			hammer2_chain_ref(info.parent);
40640498d1cSMatthew Dillon 		}
407ecfe89b8SMatthew Dillon 		if (hammer2_flush_core(&info, chain, flags) == 0)
40832b800e6SMatthew Dillon 			break;
409925e4ad1SMatthew Dillon 
410925e4ad1SMatthew Dillon 		if (++loops % 1000 == 0) {
4118138a154SMatthew Dillon 			kprintf("hammer2_flush: excessive loops on %p\n",
412925e4ad1SMatthew Dillon 				chain);
413925e4ad1SMatthew Dillon 			if (hammer2_debug & 0x100000)
414925e4ad1SMatthew Dillon 				Debugger("hell4");
415925e4ad1SMatthew Dillon 		}
41632b800e6SMatthew Dillon 	}
4176aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
4186aaf5cb0SMatthew Dillon 	if (info.scan_count >= 10)
4196aaf5cb0SMatthew Dillon 	kprintf("hammer2_flush: scan_count %ld (%ld,%ld,%ld,%ld) "
420ecfe89b8SMatthew Dillon 		"bt(%ld,%ld,%ld,%ld,%ld,%ld)\n",
4216aaf5cb0SMatthew Dillon 		info.scan_count,
4226aaf5cb0SMatthew Dillon 		info.scan_mod_count,
4236aaf5cb0SMatthew Dillon 		info.scan_upd_count,
4246aaf5cb0SMatthew Dillon 		info.scan_onf_count,
4256aaf5cb0SMatthew Dillon 		info.scan_del_count,
4266aaf5cb0SMatthew Dillon 		info.scan_btype[1],
4276aaf5cb0SMatthew Dillon 		info.scan_btype[2],
4286aaf5cb0SMatthew Dillon 		info.scan_btype[3],
4296aaf5cb0SMatthew Dillon 		info.scan_btype[4],
4306aaf5cb0SMatthew Dillon 		info.scan_btype[5],
431ecfe89b8SMatthew Dillon 		info.scan_btype[6]);
4326aaf5cb0SMatthew Dillon #endif
433a7720be7SMatthew Dillon 	hammer2_chain_drop(chain);
434da6f36f4SMatthew Dillon 	if (info.parent)
435da6f36f4SMatthew Dillon 		hammer2_chain_drop(info.parent);
43665cacacfSMatthew Dillon 	return (info.error);
43732b800e6SMatthew Dillon }
43832b800e6SMatthew Dillon 
439476d2aadSMatthew Dillon /*
440ea155208SMatthew Dillon  * This is the core of the chain flushing code.  The chain is locked by the
441a7720be7SMatthew Dillon  * caller and must also have an extra ref on it by the caller, and remains
442fae225dcSMatthew Dillon  * locked and will have an extra ref on return.  info.parent is referenced
443fae225dcSMatthew Dillon  * but not locked.
444fae225dcSMatthew Dillon  *
445fae225dcSMatthew Dillon  * Upon return, the caller can test the UPDATE bit on the chain to determine
446fae225dcSMatthew Dillon  * if the parent needs updating.
447a7720be7SMatthew Dillon  *
448ecfe89b8SMatthew Dillon  * If non-zero is returned, the chain's parent changed during the flush and
449ecfe89b8SMatthew Dillon  * the caller must retry the operation.
450ecfe89b8SMatthew Dillon  *
4518138a154SMatthew Dillon  * (1) Determine if this node is a candidate for the flush, return if it is
4528138a154SMatthew Dillon  *     not.  fchain and vchain are always candidates for the flush.
4530dea3156SMatthew Dillon  *
4548138a154SMatthew Dillon  * (2) If we recurse too deep the chain is entered onto the deferral list and
4558138a154SMatthew Dillon  *     the current flush stack is aborted until after the deferral list is
4568138a154SMatthew Dillon  *     run.
4578138a154SMatthew Dillon  *
4588138a154SMatthew Dillon  * (3) Recursively flush live children (rbtree).  This can create deferrals.
459da6f36f4SMatthew Dillon  *     A successful flush clears the MODIFIED and UPDATE bits on the children
460da6f36f4SMatthew Dillon  *     and typically causes the parent to be marked MODIFIED as the children
461da6f36f4SMatthew Dillon  *     update the parent's block table.  A parent might already be marked
462da6f36f4SMatthew Dillon  *     MODIFIED due to a deletion (whos blocktable update in the parent is
463da6f36f4SMatthew Dillon  *     handled by the frontend), or if the parent itself is modified by the
464da6f36f4SMatthew Dillon  *     frontend for other reasons.
4658138a154SMatthew Dillon  *
466da6f36f4SMatthew Dillon  * (4) Permanently disconnected sub-trees are cleaned up by the front-end.
467da6f36f4SMatthew Dillon  *     Deleted-but-open inodes can still be individually flushed via the
468da6f36f4SMatthew Dillon  *     filesystem syncer.
4698138a154SMatthew Dillon  *
470470dad14SMatthew Dillon  * (5) Delete parents on the way back up if they are normal indirect blocks
471470dad14SMatthew Dillon  *     and have no children.
472470dad14SMatthew Dillon  *
473470dad14SMatthew Dillon  * (6) Note that an unmodified child may still need the block table in its
474da6f36f4SMatthew Dillon  *     parent updated (e.g. rename/move).  The child will have UPDATE set
475da6f36f4SMatthew Dillon  *     in this case.
4768138a154SMatthew Dillon  *
47750456506SMatthew Dillon  *			WARNING ON BREF MODIFY_TID/MIRROR_TID
478925e4ad1SMatthew Dillon  *
479e513e77eSMatthew Dillon  * blockref.modify_tid is consistent only within a PFS, and will not be
480e513e77eSMatthew Dillon  * consistent during synchronization.  mirror_tid is consistent across the
481e513e77eSMatthew Dillon  * block device regardless of the PFS.
482476d2aadSMatthew Dillon  */
483ecfe89b8SMatthew Dillon static int
hammer2_flush_core(hammer2_flush_info_t * info,hammer2_chain_t * chain,int flags)484da6f36f4SMatthew Dillon hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
48553f84d31SMatthew Dillon 		   int flags)
48632b800e6SMatthew Dillon {
487da6f36f4SMatthew Dillon 	hammer2_chain_t *parent;
488506bd6d1SMatthew Dillon 	hammer2_dev_t *hmp;
48965cacacfSMatthew Dillon 	int save_error;
490ecfe89b8SMatthew Dillon 	int retry;
491ecfe89b8SMatthew Dillon 
492ecfe89b8SMatthew Dillon 	retry = 0;
493da6f36f4SMatthew Dillon 
494da6f36f4SMatthew Dillon 	/*
495da6f36f4SMatthew Dillon 	 * (1) Optimize downward recursion to locate nodes needing action.
496da6f36f4SMatthew Dillon 	 *     Nothing to do if none of these flags are set.
497da6f36f4SMatthew Dillon 	 */
498556042eaSTomohiro Kusumi 	if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0)
499ecfe89b8SMatthew Dillon 		return 0;
50032b800e6SMatthew Dillon 
501a5913bdfSMatthew Dillon 	hmp = chain->hmp;
50240498d1cSMatthew Dillon 
50340498d1cSMatthew Dillon 	/*
50440498d1cSMatthew Dillon 	 * NOTE: parent can be NULL, usually due to destroy races.
50540498d1cSMatthew Dillon 	 */
50640498d1cSMatthew Dillon 	parent = info->parent;
507fae225dcSMatthew Dillon 	KKASSERT(chain->parent == parent);
508925e4ad1SMatthew Dillon 
5090924b3f8SMatthew Dillon 	/*
510da6f36f4SMatthew Dillon 	 * Downward search recursion
51140498d1cSMatthew Dillon 	 *
512d0755e6dSMatthew Dillon 	 * We must be careful on cold stops, which often occur on inode
513d0755e6dSMatthew Dillon 	 * boundaries due to the way hammer2_vfs_sync() sequences the flush.
514d0755e6dSMatthew Dillon 	 * Be sure to issue an appropriate chain_setflush()
515ea155208SMatthew Dillon 	 */
516ecfe89b8SMatthew Dillon 	if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) &&
51753f84d31SMatthew Dillon 	    (flags & HAMMER2_FLUSH_ALL) == 0 &&
518c42feed6SMatthew Dillon 	    (flags & HAMMER2_FLUSH_TOP) == 0 &&
519c42feed6SMatthew Dillon 	    chain->pmp && chain->pmp->mp) {
5209450e866SMatthew Dillon 		/*
521fae225dcSMatthew Dillon 		 * If FLUSH_ALL is not specified the caller does not want
522c42feed6SMatthew Dillon 		 * to recurse through PFS roots that have been mounted.
523c42feed6SMatthew Dillon 		 *
524c42feed6SMatthew Dillon 		 * (If the PFS has not been mounted there may not be
525c42feed6SMatthew Dillon 		 *  anything monitoring its chains and its up to us
526c42feed6SMatthew Dillon 		 *  to flush it).
527c42feed6SMatthew Dillon 		 *
528c42feed6SMatthew Dillon 		 * The typical sequence is to flush dirty PFS's starting at
529c42feed6SMatthew Dillon 		 * their root downward, then flush the device root (vchain).
530c42feed6SMatthew Dillon 		 * It is this second flush that typically leaves out the
531c42feed6SMatthew Dillon 		 * ALL flag.
5329450e866SMatthew Dillon 		 *
533fae225dcSMatthew Dillon 		 * However we must still process the PFSROOT chains for block
5349450e866SMatthew Dillon 		 * table updates in their parent (which IS part of our flush).
5359450e866SMatthew Dillon 		 *
536fae225dcSMatthew Dillon 		 * NOTE: The volume root, vchain, does not set PFSBOUNDARY.
537fae225dcSMatthew Dillon 		 *
538fae225dcSMatthew Dillon 		 * NOTE: We must re-set ONFLUSH in the parent to retain if
539fae225dcSMatthew Dillon 		 *	 this chain (that we are skipping) requires work.
5409450e866SMatthew Dillon 		 */
541fae225dcSMatthew Dillon 		if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
542fae225dcSMatthew Dillon 				    HAMMER2_CHAIN_DESTROY |
543fae225dcSMatthew Dillon 				    HAMMER2_CHAIN_MODIFIED)) {
544fae225dcSMatthew Dillon 			hammer2_chain_setflush(parent);
545fae225dcSMatthew Dillon 		}
546ecfe89b8SMatthew Dillon 		goto done;
54740498d1cSMatthew Dillon 	} else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
54840498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_INODE_STOP) &&
54940498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_ALL) == 0 &&
55040498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_TOP) == 0 &&
55140498d1cSMatthew Dillon 		   chain->pmp && chain->pmp->mp) {
55240498d1cSMatthew Dillon 		/*
553ecfe89b8SMatthew Dillon 		 * When FLUSH_INODE_STOP is specified we are being asked not
554ecfe89b8SMatthew Dillon 		 * to include any inode changes for inodes we encounter,
555ecfe89b8SMatthew Dillon 		 * with the exception of the inode that the flush began with.
556ecfe89b8SMatthew Dillon 		 * So: INODE, INODE_STOP, and TOP==0 basically.
557d0755e6dSMatthew Dillon 		 *
558d0755e6dSMatthew Dillon 		 * Dirty inodes are flushed based on the hammer2_inode
559d0755e6dSMatthew Dillon 		 * in-memory structure, issuing a chain_setflush() here
560d0755e6dSMatthew Dillon 		 * will only cause unnecessary traversals of the topology.
561ecfe89b8SMatthew Dillon 		 */
562ecfe89b8SMatthew Dillon 		goto done;
563ecfe89b8SMatthew Dillon #if 0
564ecfe89b8SMatthew Dillon 		/*
56540498d1cSMatthew Dillon 		 * If FLUSH_INODE_STOP is specified and both ALL and TOP
56640498d1cSMatthew Dillon 		 * are clear, we must not flush the chain.  The chain should
56740498d1cSMatthew Dillon 		 * have already been flushed and any further ONFLUSH/UPDATE
56840498d1cSMatthew Dillon 		 * setting will be related to the next flush.
56940498d1cSMatthew Dillon 		 *
57040498d1cSMatthew Dillon 		 * This features allows us to flush inodes independently of
57140498d1cSMatthew Dillon 		 * each other and meta-data above the inodes separately.
57240498d1cSMatthew Dillon 		 */
57340498d1cSMatthew Dillon 		if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
57440498d1cSMatthew Dillon 				    HAMMER2_CHAIN_DESTROY |
57540498d1cSMatthew Dillon 				    HAMMER2_CHAIN_MODIFIED)) {
57640498d1cSMatthew Dillon 			if (parent)
57740498d1cSMatthew Dillon 				hammer2_chain_setflush(parent);
57840498d1cSMatthew Dillon 		}
579ecfe89b8SMatthew Dillon #endif
58053f84d31SMatthew Dillon 	} else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) {
58153f84d31SMatthew Dillon 		/*
58253f84d31SMatthew Dillon 		 * Recursion depth reached.
58353f84d31SMatthew Dillon 		 */
584ecfe89b8SMatthew Dillon 		panic("hammer2: flush depth limit");
5858bbe5025SMatthew Dillon 	} else if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
5868bbe5025SMatthew Dillon 				   HAMMER2_CHAIN_DESTROY)) {
5878138a154SMatthew Dillon 		/*
588da6f36f4SMatthew Dillon 		 * Downward recursion search (actual flush occurs bottom-up).
58965cacacfSMatthew Dillon 		 * pre-clear ONFLUSH.  It can get set again due to races or
59065cacacfSMatthew Dillon 		 * flush errors, which we want so the scan finds us again in
59165cacacfSMatthew Dillon 		 * the next flush.
5928bbe5025SMatthew Dillon 		 *
5938bbe5025SMatthew Dillon 		 * We must also recurse if DESTROY is set so we can finally
5948bbe5025SMatthew Dillon 		 * get rid of the related children, otherwise the node will
5958bbe5025SMatthew Dillon 		 * just get re-flushed on lastdrop.
596fae225dcSMatthew Dillon 		 *
597fae225dcSMatthew Dillon 		 * WARNING!  The recursion will unlock/relock info->parent
598fae225dcSMatthew Dillon 		 *	     (which is 'chain'), potentially allowing it
599fae225dcSMatthew Dillon 		 *	     to be ripped up.
6008138a154SMatthew Dillon 		 */
601a964af6fSMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH);
60265cacacfSMatthew Dillon 		save_error = info->error;
60365cacacfSMatthew Dillon 		info->error = 0;
6048138a154SMatthew Dillon 		info->parent = chain;
605a964af6fSMatthew Dillon 
606a964af6fSMatthew Dillon 		/*
607a964af6fSMatthew Dillon 		 * We may have to do this twice to catch any indirect
608ecfe89b8SMatthew Dillon 		 * block maintenance that occurs.
609a964af6fSMatthew Dillon 		 */
61094491fa0SMatthew Dillon 		hammer2_spin_ex(&chain->core.spin);
611da6f36f4SMatthew Dillon 		RB_SCAN(hammer2_chain_tree, &chain->core.rbtree,
612da6f36f4SMatthew Dillon 			NULL, hammer2_flush_recurse, info);
613a964af6fSMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_ONFLUSH) {
614a964af6fSMatthew Dillon 			atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH);
615a964af6fSMatthew Dillon 			RB_SCAN(hammer2_chain_tree, &chain->core.rbtree,
616a964af6fSMatthew Dillon 				NULL, hammer2_flush_recurse, info);
61719808ac9SMatthew Dillon 		}
618a964af6fSMatthew Dillon 		hammer2_spin_unex(&chain->core.spin);
619da6f36f4SMatthew Dillon 		info->parent = parent;
62065cacacfSMatthew Dillon 
62165cacacfSMatthew Dillon 		/*
62265cacacfSMatthew Dillon 		 * Re-set the flush bits if the flush was incomplete or
62365cacacfSMatthew Dillon 		 * an error occurred.  If an error occurs it is typically
62465cacacfSMatthew Dillon 		 * an allocation error.  Errors do not cause deferrals.
62565cacacfSMatthew Dillon 		 */
62665cacacfSMatthew Dillon 		if (info->error)
62765cacacfSMatthew Dillon 			hammer2_chain_setflush(chain);
62865cacacfSMatthew Dillon 		info->error |= save_error;
629fae225dcSMatthew Dillon 
630fae225dcSMatthew Dillon 		/*
631fae225dcSMatthew Dillon 		 * If we lost the parent->chain association we have to
632fae225dcSMatthew Dillon 		 * stop processing this chain because it is no longer
633fae225dcSMatthew Dillon 		 * in this recursion.  If it moved, it will be handled
634fae225dcSMatthew Dillon 		 * by the ONFLUSH flag elsewhere.
635fae225dcSMatthew Dillon 		 */
636fae225dcSMatthew Dillon 		if (chain->parent != parent) {
637fae225dcSMatthew Dillon 			kprintf("LOST CHILD2 %p->%p (actual parent %p)\n",
638fae225dcSMatthew Dillon 				parent, chain, chain->parent);
639fae225dcSMatthew Dillon 			goto done;
640fae225dcSMatthew Dillon 		}
6418138a154SMatthew Dillon 	}
6420924b3f8SMatthew Dillon 
64332b800e6SMatthew Dillon 	/*
644da6f36f4SMatthew Dillon 	 * Now we are in the bottom-up part of the recursion.
645da6f36f4SMatthew Dillon 	 *
646ecfe89b8SMatthew Dillon 	 * We continue to try to update the chain on lower-level errors, but
647ecfe89b8SMatthew Dillon 	 * the flush code may decide not to flush the volume root.
64865cacacfSMatthew Dillon 	 *
64965cacacfSMatthew Dillon 	 * XXX should we continue to try to update the chain if an error
65065cacacfSMatthew Dillon 	 *     occurred?
6518138a154SMatthew Dillon 	 */
6528138a154SMatthew Dillon 
6538138a154SMatthew Dillon 	/*
654fae225dcSMatthew Dillon 	 * Both parent and chain must be locked in order to flush chain,
655fae225dcSMatthew Dillon 	 * in order to properly update the parent under certain conditions.
656fae225dcSMatthew Dillon 	 *
657fae225dcSMatthew Dillon 	 * In addition, we can't safely unlock/relock the chain once we
658fae225dcSMatthew Dillon 	 * start flushing the chain itself, which we would have to do later
659fae225dcSMatthew Dillon 	 * on in order to lock the parent if we didn't do that now.
660fae225dcSMatthew Dillon 	 */
6616aaf5cb0SMatthew Dillon 	hammer2_chain_ref_hold(chain);
662fae225dcSMatthew Dillon 	hammer2_chain_unlock(chain);
663fae225dcSMatthew Dillon 	if (parent)
664fae225dcSMatthew Dillon 		hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
665fae225dcSMatthew Dillon 	hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE);
6666aaf5cb0SMatthew Dillon 	hammer2_chain_drop_unhold(chain);
66765cacacfSMatthew Dillon 
66865cacacfSMatthew Dillon 	/*
66965cacacfSMatthew Dillon 	 * Can't process if we can't access their content.
67065cacacfSMatthew Dillon 	 */
67165cacacfSMatthew Dillon 	if ((parent && parent->error) || chain->error) {
67265cacacfSMatthew Dillon 		kprintf("hammer2: chain error during flush\n");
67365cacacfSMatthew Dillon 		info->error |= chain->error;
67465cacacfSMatthew Dillon 		if (parent) {
67565cacacfSMatthew Dillon 			info->error |= parent->error;
67665cacacfSMatthew Dillon 			hammer2_chain_unlock(parent);
67765cacacfSMatthew Dillon 		}
67865cacacfSMatthew Dillon 		goto done;
67965cacacfSMatthew Dillon 	}
68065cacacfSMatthew Dillon 
681fae225dcSMatthew Dillon 	if (chain->parent != parent) {
68268b321c1SMatthew Dillon 		if (hammer2_debug & 0x0040) {
683fae225dcSMatthew Dillon 			kprintf("LOST CHILD3 %p->%p (actual parent %p)\n",
684fae225dcSMatthew Dillon 				parent, chain, chain->parent);
68568b321c1SMatthew Dillon 		}
686fae225dcSMatthew Dillon 		KKASSERT(parent != NULL);
687fae225dcSMatthew Dillon 		hammer2_chain_unlock(parent);
688ecfe89b8SMatthew Dillon 		retry = 1;
689fae225dcSMatthew Dillon 		goto done;
690fae225dcSMatthew Dillon 	}
691fae225dcSMatthew Dillon 
692fae225dcSMatthew Dillon 	/*
693da6f36f4SMatthew Dillon 	 * Propagate the DESTROY flag downwards.  This dummies up the flush
694da6f36f4SMatthew Dillon 	 * code and tries to invalidate related buffer cache buffers to
695da6f36f4SMatthew Dillon 	 * avoid the disk write.
696623d43d4SMatthew Dillon 	 */
697da6f36f4SMatthew Dillon 	if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY))
698da6f36f4SMatthew Dillon 		atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY);
699623d43d4SMatthew Dillon 
700623d43d4SMatthew Dillon 	/*
701e513e77eSMatthew Dillon 	 * Dispose of the modified bit.
702e513e77eSMatthew Dillon 	 *
7033f4ec3cfSMatthew Dillon 	 * If parent is present, the UPDATE bit should already be set.
704e513e77eSMatthew Dillon 	 * UPDATE should already be set.
705e513e77eSMatthew Dillon 	 * bref.mirror_tid should already be set.
70632b800e6SMatthew Dillon 	 */
70765cacacfSMatthew Dillon 	if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
708da6f36f4SMatthew Dillon 		KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) ||
7093f4ec3cfSMatthew Dillon 			 chain->parent == NULL);
7100dea3156SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
711f9f4459eSMatthew Dillon 		atomic_add_long(&hammer2_count_modified_chains, -1);
7128db69c9fSMatthew Dillon 
7138db69c9fSMatthew Dillon 		/*
714e513e77eSMatthew Dillon 		 * Manage threads waiting for excessive dirty memory to
715e513e77eSMatthew Dillon 		 * be retired.
7168db69c9fSMatthew Dillon 		 */
717e513e77eSMatthew Dillon 		if (chain->pmp)
718d0e99d5dSMatthew Dillon 			hammer2_pfs_memory_wakeup(chain->pmp, -1);
7198138a154SMatthew Dillon 
7203f4ec3cfSMatthew Dillon #if 0
7213f4ec3cfSMatthew Dillon 		if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0 &&
7223f4ec3cfSMatthew Dillon 		    chain != &hmp->vchain &&
7233f4ec3cfSMatthew Dillon 		    chain != &hmp->fchain) {
7248138a154SMatthew Dillon 			/*
7253f4ec3cfSMatthew Dillon 			 * Set UPDATE bit indicating that the parent block
7263f4ec3cfSMatthew Dillon 			 * table requires updating.
7278138a154SMatthew Dillon 			 */
728da6f36f4SMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
7290dea3156SMatthew Dillon 		}
7303f4ec3cfSMatthew Dillon #endif
7310dea3156SMatthew Dillon 
7320dea3156SMatthew Dillon 		/*
733a71db85dSMatthew Dillon 		 * Issue the flush.  This is indirect via the DIO.
7340dea3156SMatthew Dillon 		 *
735a71db85dSMatthew Dillon 		 * NOTE: A DELETED node that reaches this point must be
736a71db85dSMatthew Dillon 		 *	 flushed for synchronization point consistency.
737a71db85dSMatthew Dillon 		 *
738a71db85dSMatthew Dillon 		 * NOTE: Even though MODIFIED was already set, the related DIO
739a71db85dSMatthew Dillon 		 *	 might not be dirty due to a system buffer cache
740a71db85dSMatthew Dillon 		 *	 flush and must be set dirty if we are going to make
741a71db85dSMatthew Dillon 		 *	 further modifications to the buffer.  Chains with
742a71db85dSMatthew Dillon 		 *	 embedded data don't need this.
7430dea3156SMatthew Dillon 		 */
744a7720be7SMatthew Dillon 		if (hammer2_debug & 0x1000) {
7457fece146SMatthew Dillon 			kprintf("Flush %p.%d %016jx/%d data=%016jx\n",
746a7720be7SMatthew Dillon 				chain, chain->bref.type,
747c603b86bSMatthew Dillon 				(uintmax_t)chain->bref.key,
748c603b86bSMatthew Dillon 				chain->bref.keybits,
749c603b86bSMatthew Dillon 				(uintmax_t)chain->bref.data_off);
750a7720be7SMatthew Dillon 		}
75110136ab6SMatthew Dillon 
75232b800e6SMatthew Dillon 		/*
753da6f36f4SMatthew Dillon 		 * Update chain CRCs for flush.
75432b800e6SMatthew Dillon 		 *
755da6f36f4SMatthew Dillon 		 * NOTE: Volume headers are NOT flushed here as they require
756da6f36f4SMatthew Dillon 		 *	 special processing.
75732b800e6SMatthew Dillon 		 */
75832b800e6SMatthew Dillon 		switch(chain->bref.type) {
7591a7cfe5aSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP:
760a71db85dSMatthew Dillon 			/*
761e513e77eSMatthew Dillon 			 * Update the volume header's freemap_tid to the
762e513e77eSMatthew Dillon 			 * freemap's flushing mirror_tid.
763e513e77eSMatthew Dillon 			 *
764a71db85dSMatthew Dillon 			 * (note: embedded data, do not call setdirty)
765a71db85dSMatthew Dillon 			 */
76650456506SMatthew Dillon 			KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED);
767e513e77eSMatthew Dillon 			KKASSERT(chain == &hmp->fchain);
768e513e77eSMatthew Dillon 			hmp->voldata.freemap_tid = chain->bref.mirror_tid;
7695d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
7705d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
771e513e77eSMatthew Dillon 				kprintf("sync freemap mirror_tid %08jx\n",
772e513e77eSMatthew Dillon 					(intmax_t)chain->bref.mirror_tid);
7735d37f96dSMatthew Dillon 			}
774e513e77eSMatthew Dillon 
775e513e77eSMatthew Dillon 			/*
776e513e77eSMatthew Dillon 			 * The freemap can be flushed independently of the
777e513e77eSMatthew Dillon 			 * main topology, but for the case where it is
778e513e77eSMatthew Dillon 			 * flushed in the same transaction, and flushed
779e513e77eSMatthew Dillon 			 * before vchain (a case we want to allow for
780e513e77eSMatthew Dillon 			 * performance reasons), make sure modifications
781e513e77eSMatthew Dillon 			 * made during the flush under vchain use a new
782e513e77eSMatthew Dillon 			 * transaction id.
783e513e77eSMatthew Dillon 			 *
784e513e77eSMatthew Dillon 			 * Otherwise the mount recovery code will get confused.
785e513e77eSMatthew Dillon 			 */
786e513e77eSMatthew Dillon 			++hmp->voldata.mirror_tid;
7871a7cfe5aSMatthew Dillon 			break;
78832b800e6SMatthew Dillon 		case HAMMER2_BREF_TYPE_VOLUME:
78932b800e6SMatthew Dillon 			/*
790e513e77eSMatthew Dillon 			 * The free block table is flushed by
791e513e77eSMatthew Dillon 			 * hammer2_vfs_sync() before it flushes vchain.
792e513e77eSMatthew Dillon 			 * We must still hold fchain locked while copying
793e513e77eSMatthew Dillon 			 * voldata to volsync, however.
794a71db85dSMatthew Dillon 			 *
79565cacacfSMatthew Dillon 			 * These do not error per-say since their data does
79665cacacfSMatthew Dillon 			 * not need to be re-read from media on lock.
79765cacacfSMatthew Dillon 			 *
798a71db85dSMatthew Dillon 			 * (note: embedded data, do not call setdirty)
7991a7cfe5aSMatthew Dillon 			 */
800da6f36f4SMatthew Dillon 			hammer2_chain_lock(&hmp->fchain,
801da6f36f4SMatthew Dillon 					   HAMMER2_RESOLVE_ALWAYS);
802a6cf1052SMatthew Dillon 			hammer2_voldata_lock(hmp);
8035d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
8045d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
805e513e77eSMatthew Dillon 				kprintf("sync volume  mirror_tid %08jx\n",
806da6f36f4SMatthew Dillon 					(intmax_t)chain->bref.mirror_tid);
8075d37f96dSMatthew Dillon 			}
8081a7cfe5aSMatthew Dillon 
8091a7cfe5aSMatthew Dillon 			/*
810e513e77eSMatthew Dillon 			 * Update the volume header's mirror_tid to the
811e513e77eSMatthew Dillon 			 * main topology's flushing mirror_tid.  It is
812e513e77eSMatthew Dillon 			 * possible that voldata.mirror_tid is already
813e513e77eSMatthew Dillon 			 * beyond bref.mirror_tid due to the bump we made
814e513e77eSMatthew Dillon 			 * above in BREF_TYPE_FREEMAP.
815e513e77eSMatthew Dillon 			 */
816e513e77eSMatthew Dillon 			if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) {
817e513e77eSMatthew Dillon 				hmp->voldata.mirror_tid =
818e513e77eSMatthew Dillon 					chain->bref.mirror_tid;
819e513e77eSMatthew Dillon 			}
820e513e77eSMatthew Dillon 
821e513e77eSMatthew Dillon 			/*
822da6f36f4SMatthew Dillon 			 * The volume header is flushed manually by the
823da6f36f4SMatthew Dillon 			 * syncer, not here.  All we do here is adjust the
824da6f36f4SMatthew Dillon 			 * crc's.
82532b800e6SMatthew Dillon 			 */
82632b800e6SMatthew Dillon 			KKASSERT(chain->data != NULL);
827fdf62707SMatthew Dillon 			KKASSERT(chain->dio == NULL);
82832b800e6SMatthew Dillon 
82932b800e6SMatthew Dillon 			hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
83032b800e6SMatthew Dillon 				hammer2_icrc32(
83132b800e6SMatthew Dillon 					(char *)&hmp->voldata +
83232b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRC1_OFF,
83332b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRC1_SIZE);
83432b800e6SMatthew Dillon 			hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
83532b800e6SMatthew Dillon 				hammer2_icrc32(
83632b800e6SMatthew Dillon 					(char *)&hmp->voldata +
83732b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRC0_OFF,
83832b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRC0_SIZE);
83932b800e6SMatthew Dillon 			hmp->voldata.icrc_volheader =
84032b800e6SMatthew Dillon 				hammer2_icrc32(
84132b800e6SMatthew Dillon 					(char *)&hmp->voldata +
84232b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRCVH_OFF,
84332b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRCVH_SIZE);
844e513e77eSMatthew Dillon 
8455d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
8465d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
847e513e77eSMatthew Dillon 				kprintf("syncvolhdr %016jx %016jx\n",
848e513e77eSMatthew Dillon 					hmp->voldata.mirror_tid,
849e513e77eSMatthew Dillon 					hmp->vchain.bref.mirror_tid);
8505d37f96dSMatthew Dillon 			}
85132b800e6SMatthew Dillon 			hmp->volsync = hmp->voldata;
8520dea3156SMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC);
85350456506SMatthew Dillon 			hammer2_voldata_unlock(hmp);
854a6cf1052SMatthew Dillon 			hammer2_chain_unlock(&hmp->fchain);
85532b800e6SMatthew Dillon 			break;
85632b800e6SMatthew Dillon 		case HAMMER2_BREF_TYPE_DATA:
85732b800e6SMatthew Dillon 			/*
858da6f36f4SMatthew Dillon 			 * Data elements have already been flushed via the
859da6f36f4SMatthew Dillon 			 * logical file buffer cache.  Their hash was set in
860a71db85dSMatthew Dillon 			 * the bref by the vop_write code.  Do not re-dirty.
86132b800e6SMatthew Dillon 			 *
862da6f36f4SMatthew Dillon 			 * Make sure any device buffer(s) have been flushed
863da6f36f4SMatthew Dillon 			 * out here (there aren't usually any to flush) XXX.
86432b800e6SMatthew Dillon 			 */
86532b800e6SMatthew Dillon 			break;
866512beabdSMatthew Dillon 		case HAMMER2_BREF_TYPE_INDIRECT:
8671a7cfe5aSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_NODE:
86891caa51cSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
869da6f36f4SMatthew Dillon 			/*
870da6f36f4SMatthew Dillon 			 * Buffer I/O will be cleaned up when the volume is
871da6f36f4SMatthew Dillon 			 * flushed (but the kernel is free to flush it before
872da6f36f4SMatthew Dillon 			 * then, as well).
873da6f36f4SMatthew Dillon 			 */
874a71db85dSMatthew Dillon 			hammer2_chain_setcheck(chain, chain->data);
87550456506SMatthew Dillon 			break;
876da0cdd33SMatthew Dillon 		case HAMMER2_BREF_TYPE_DIRENT:
877da0cdd33SMatthew Dillon 			/*
878da0cdd33SMatthew Dillon 			 * A directory entry can use the check area to store
879da0cdd33SMatthew Dillon 			 * the filename for filenames <= 64 bytes, don't blow
880da0cdd33SMatthew Dillon 			 * it up!
881da0cdd33SMatthew Dillon 			 */
882da0cdd33SMatthew Dillon 			if (chain->bytes)
883da0cdd33SMatthew Dillon 				hammer2_chain_setcheck(chain, chain->data);
884da0cdd33SMatthew Dillon 			break;
88591caa51cSMatthew Dillon 		case HAMMER2_BREF_TYPE_INODE:
886a71db85dSMatthew Dillon 			/*
887a71db85dSMatthew Dillon 			 * NOTE: We must call io_setdirty() to make any late
888a71db85dSMatthew Dillon 			 *	 changes to the inode data, the system might
889a71db85dSMatthew Dillon 			 *	 have already flushed the buffer.
890a71db85dSMatthew Dillon 			 */
891b0f58de8SMatthew Dillon 			if (chain->data->ipdata.meta.op_flags &
892da6f36f4SMatthew Dillon 			    HAMMER2_OPFLAG_PFSROOT) {
893837bd39bSMatthew Dillon 				/*
894da6f36f4SMatthew Dillon 				 * non-NULL pmp if mounted as a PFS.  We must
89518e8ab5fSMatthew Dillon 				 * sync fields cached in the pmp? XXX
896837bd39bSMatthew Dillon 				 */
897837bd39bSMatthew Dillon 				hammer2_inode_data_t *ipdata;
898837bd39bSMatthew Dillon 
899a71db85dSMatthew Dillon 				hammer2_io_setdirty(chain->dio);
900837bd39bSMatthew Dillon 				ipdata = &chain->data->ipdata;
901e513e77eSMatthew Dillon 				if (chain->pmp) {
902b0f58de8SMatthew Dillon 					ipdata->meta.pfs_inum =
903e513e77eSMatthew Dillon 						chain->pmp->inode_tid;
904e513e77eSMatthew Dillon 				}
90550456506SMatthew Dillon 			} else {
90650456506SMatthew Dillon 				/* can't be mounted as a PFS */
90750456506SMatthew Dillon 			}
908b3659de2SMatthew Dillon 
909a71db85dSMatthew Dillon 			hammer2_chain_setcheck(chain, chain->data);
9101a7cfe5aSMatthew Dillon 			break;
91132b800e6SMatthew Dillon 		default:
912da6f36f4SMatthew Dillon 			panic("hammer2_flush_core: unsupported "
913da6f36f4SMatthew Dillon 			      "embedded bref %d",
91491caa51cSMatthew Dillon 			      chain->bref.type);
91591caa51cSMatthew Dillon 			/* NOT REACHED */
91632b800e6SMatthew Dillon 		}
91732b800e6SMatthew Dillon 
91832b800e6SMatthew Dillon 		/*
9193d4f397aSMatthew Dillon 		 * If the chain was destroyed try to avoid unnecessary I/O
9203d4f397aSMatthew Dillon 		 * that might not have yet occurred.  Remove the data range
9213d4f397aSMatthew Dillon 		 * from dedup candidacy and attempt to invalidation that
9223d4f397aSMatthew Dillon 		 * potentially dirty portion of the I/O buffer.
923da6f36f4SMatthew Dillon 		 */
9247767d389SMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_DESTROY) {
9250b8efeb7SMatthew Dillon 			hammer2_io_dedup_delete(hmp,
9260b8efeb7SMatthew Dillon 						chain->bref.type,
9273d4f397aSMatthew Dillon 						chain->bref.data_off,
9283d4f397aSMatthew Dillon 						chain->bytes);
9293d4f397aSMatthew Dillon #if 0
9307767d389SMatthew Dillon 			hammer2_io_t *dio;
9317767d389SMatthew Dillon 			if (chain->dio) {
9323d4f397aSMatthew Dillon 				hammer2_io_inval(chain->dio,
9337d565a4fSMatthew Dillon 						 chain->bref.data_off,
9347d565a4fSMatthew Dillon 						 chain->bytes);
9357767d389SMatthew Dillon 			} else if ((dio = hammer2_io_getquick(hmp,
9367767d389SMatthew Dillon 						  chain->bref.data_off,
9373d4f397aSMatthew Dillon 						  chain->bytes,
9383d4f397aSMatthew Dillon 						  1)) != NULL) {
9393d4f397aSMatthew Dillon 				hammer2_io_inval(dio,
9407767d389SMatthew Dillon 						 chain->bref.data_off,
9417767d389SMatthew Dillon 						 chain->bytes);
9427767d389SMatthew Dillon 				hammer2_io_putblk(&dio);
9437767d389SMatthew Dillon 			}
9443d4f397aSMatthew Dillon #endif
945da6f36f4SMatthew Dillon 		}
946da6f36f4SMatthew Dillon 	}
947da6f36f4SMatthew Dillon 
948da6f36f4SMatthew Dillon 	/*
949da6f36f4SMatthew Dillon 	 * If UPDATE is set the parent block table may need to be updated.
95065cacacfSMatthew Dillon 	 * This can fail if the hammer2_chain_modify() fails.
951da6f36f4SMatthew Dillon 	 *
952da6f36f4SMatthew Dillon 	 * NOTE: UPDATE may be set on vchain or fchain in which case
953ecfe89b8SMatthew Dillon 	 *	 parent could be NULL, or on an inode that has not yet
954ecfe89b8SMatthew Dillon 	 *	 been inserted into the radix tree.  It's easiest to allow
955ecfe89b8SMatthew Dillon 	 *	 the case and test for NULL.  parent can also wind up being
956ecfe89b8SMatthew Dillon 	 *	 NULL due to a deletion so we need to handle the case anyway.
957ecfe89b8SMatthew Dillon 	 *
958ecfe89b8SMatthew Dillon 	 * NOTE: UPDATE can be set when chains are renamed into or out of
959ecfe89b8SMatthew Dillon 	 *	 an indirect block, without the chain itself being flagged
960ecfe89b8SMatthew Dillon 	 *	 MODIFIED.
961da6f36f4SMatthew Dillon 	 *
962da6f36f4SMatthew Dillon 	 * If no parent exists we can just clear the UPDATE bit.  If the
963da6f36f4SMatthew Dillon 	 * chain gets reattached later on the bit will simply get set
964da6f36f4SMatthew Dillon 	 * again.
965da6f36f4SMatthew Dillon 	 */
9663f4ec3cfSMatthew Dillon 	if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL)
967da6f36f4SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
968da6f36f4SMatthew Dillon 
969da6f36f4SMatthew Dillon 	/*
970ecfe89b8SMatthew Dillon 	 * When flushing an inode outside of a FLUSH_FSSYNC we must NOT
971ecfe89b8SMatthew Dillon 	 * update the parent block table to point at the flushed inode.
972ecfe89b8SMatthew Dillon 	 * The block table should only ever be updated by the filesystem
973ecfe89b8SMatthew Dillon 	 * sync code.  If we do, inode<->inode dependencies (such as
974ecfe89b8SMatthew Dillon 	 * directory entries vs inode nlink count) can wind up not being
975ecfe89b8SMatthew Dillon 	 * flushed together and result in a broken topology if a crash/reboot
976ecfe89b8SMatthew Dillon 	 * occurs at the wrong time.
977ecfe89b8SMatthew Dillon 	 */
978ecfe89b8SMatthew Dillon 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
9796f445d15SMatthew Dillon 	    (flags & HAMMER2_FLUSH_INODE_STOP) &&
980ecfe89b8SMatthew Dillon 	    (flags & HAMMER2_FLUSH_FSSYNC) == 0 &&
981ecfe89b8SMatthew Dillon 	    (flags & HAMMER2_FLUSH_ALL) == 0 &&
982ecfe89b8SMatthew Dillon 	    chain->pmp && chain->pmp->mp) {
9836f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC
9846f445d15SMatthew Dillon 		kprintf("inum %ld do not update parent, non-fssync\n",
9856f445d15SMatthew Dillon 			(long)chain->bref.key);
9866f445d15SMatthew Dillon #endif
987ecfe89b8SMatthew Dillon 		goto skipupdate;
988ecfe89b8SMatthew Dillon 	}
9896f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC
9906f445d15SMatthew Dillon 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
9916f445d15SMatthew Dillon 		kprintf("inum %ld update parent\n", (long)chain->bref.key);
9926f445d15SMatthew Dillon #endif
993ecfe89b8SMatthew Dillon 
994ecfe89b8SMatthew Dillon 	/*
995ecfe89b8SMatthew Dillon 	 * The chain may need its blockrefs updated in the parent, normal
996ecfe89b8SMatthew Dillon 	 * path.
997da6f36f4SMatthew Dillon 	 */
998da6f36f4SMatthew Dillon 	if (chain->flags & HAMMER2_CHAIN_UPDATE) {
999da6f36f4SMatthew Dillon 		hammer2_blockref_t *base;
1000da6f36f4SMatthew Dillon 		int count;
1001da6f36f4SMatthew Dillon 
1002da6f36f4SMatthew Dillon 		/*
1003a6cf1052SMatthew Dillon 		 * Clear UPDATE flag, mark parent modified, update its
1004a6cf1052SMatthew Dillon 		 * modify_tid if necessary, and adjust the parent blockmap.
1005da6f36f4SMatthew Dillon 		 */
1006da6f36f4SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
1007a6cf1052SMatthew Dillon 
1008eedd52a3SMatthew Dillon 		/*
1009eedd52a3SMatthew Dillon 		 * (optional code)
1010eedd52a3SMatthew Dillon 		 *
1011eedd52a3SMatthew Dillon 		 * Avoid actually modifying and updating the parent if it
1012eedd52a3SMatthew Dillon 		 * was flagged for destruction.  This can greatly reduce
1013eedd52a3SMatthew Dillon 		 * disk I/O in large tree removals because the
1014eedd52a3SMatthew Dillon 		 * hammer2_io_setinval() call in the upward recursion
1015eedd52a3SMatthew Dillon 		 * (see MODIFIED code above) can only handle a few cases.
1016eedd52a3SMatthew Dillon 		 */
1017eedd52a3SMatthew Dillon 		if (parent->flags & HAMMER2_CHAIN_DESTROY) {
1018eedd52a3SMatthew Dillon 			if (parent->bref.modify_tid < chain->bref.modify_tid) {
1019eedd52a3SMatthew Dillon 				parent->bref.modify_tid =
1020eedd52a3SMatthew Dillon 					chain->bref.modify_tid;
1021eedd52a3SMatthew Dillon 			}
1022b70cecb7STomohiro Kusumi 			atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BLKMAPPED |
1023b70cecb7STomohiro Kusumi 							HAMMER2_CHAIN_BLKMAPUPD);
1024eedd52a3SMatthew Dillon 			goto skipupdate;
1025eedd52a3SMatthew Dillon 		}
1026eedd52a3SMatthew Dillon 
1027eedd52a3SMatthew Dillon 		/*
1028470dad14SMatthew Dillon 		 * The flusher is responsible for deleting empty indirect
1029470dad14SMatthew Dillon 		 * blocks at this point.  If we don't do this, no major harm
1030470dad14SMatthew Dillon 		 * will be done but the empty indirect blocks will stay in
1031850d3f60SMatthew Dillon 		 * the topology and make it a messy and inefficient.
103230b0abf3SMatthew Dillon 		 *
1033850d3f60SMatthew Dillon 		 * The flusher is also responsible for collapsing the
1034850d3f60SMatthew Dillon 		 * content of an indirect block into its parent whenever
1035850d3f60SMatthew Dillon 		 * possible (with some hysteresis).  Not doing this will also
1036850d3f60SMatthew Dillon 		 * not harm the topology, but would make it messy and
1037850d3f60SMatthew Dillon 		 * inefficient.
1038470dad14SMatthew Dillon 		 */
1039850d3f60SMatthew Dillon 		if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1040850d3f60SMatthew Dillon 			if (hammer2_chain_indirect_maintenance(parent, chain))
1041470dad14SMatthew Dillon 				goto skipupdate;
1042470dad14SMatthew Dillon 		}
1043470dad14SMatthew Dillon 
1044470dad14SMatthew Dillon 		/*
1045eedd52a3SMatthew Dillon 		 * We are updating the parent's blockmap, the parent must
104665cacacfSMatthew Dillon 		 * be set modified.  If this fails we re-set the UPDATE flag
104765cacacfSMatthew Dillon 		 * in the child.
104865cacacfSMatthew Dillon 		 *
104965cacacfSMatthew Dillon 		 * NOTE! A modification error can be ENOSPC.  We still want
105065cacacfSMatthew Dillon 		 *	 to flush modified chains recursively, not break out,
105165cacacfSMatthew Dillon 		 *	 so we just skip the update in this situation and
105265cacacfSMatthew Dillon 		 *	 continue.  That is, we still need to try to clean
105365cacacfSMatthew Dillon 		 *	 out dirty chains and buffers.
105465cacacfSMatthew Dillon 		 *
105565cacacfSMatthew Dillon 		 *	 This may not help bulkfree though. XXX
1056eedd52a3SMatthew Dillon 		 */
105765cacacfSMatthew Dillon 		save_error = hammer2_chain_modify(parent, 0, 0, 0);
105865cacacfSMatthew Dillon 		if (save_error) {
105965cacacfSMatthew Dillon 			info->error |= save_error;
106065cacacfSMatthew Dillon 			kprintf("hammer2_flush: %016jx.%02x error=%08x\n",
106165cacacfSMatthew Dillon 				parent->bref.data_off, parent->bref.type,
106265cacacfSMatthew Dillon 				save_error);
106365cacacfSMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
106465cacacfSMatthew Dillon 			goto skipupdate;
106565cacacfSMatthew Dillon 		}
1066a6cf1052SMatthew Dillon 		if (parent->bref.modify_tid < chain->bref.modify_tid)
1067a6cf1052SMatthew Dillon 			parent->bref.modify_tid = chain->bref.modify_tid;
1068da6f36f4SMatthew Dillon 
1069da6f36f4SMatthew Dillon 		/*
1070da6f36f4SMatthew Dillon 		 * Calculate blockmap pointer
1071da6f36f4SMatthew Dillon 		 */
1072da6f36f4SMatthew Dillon 		switch(parent->bref.type) {
1073da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_INODE:
1074da6f36f4SMatthew Dillon 			/*
1075da6f36f4SMatthew Dillon 			 * Access the inode's block array.  However, there is
1076da6f36f4SMatthew Dillon 			 * no block array if the inode is flagged DIRECTDATA.
1077da6f36f4SMatthew Dillon 			 */
1078da6f36f4SMatthew Dillon 			if (parent->data &&
1079b0f58de8SMatthew Dillon 			    (parent->data->ipdata.meta.op_flags &
1080da6f36f4SMatthew Dillon 			     HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1081da6f36f4SMatthew Dillon 				base = &parent->data->
1082da6f36f4SMatthew Dillon 					ipdata.u.blockset.blockref[0];
1083da6f36f4SMatthew Dillon 			} else {
1084da6f36f4SMatthew Dillon 				base = NULL;
1085da6f36f4SMatthew Dillon 			}
1086da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1087da6f36f4SMatthew Dillon 			break;
1088da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_INDIRECT:
1089da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1090da6f36f4SMatthew Dillon 			if (parent->data)
1091da6f36f4SMatthew Dillon 				base = &parent->data->npdata[0];
1092da6f36f4SMatthew Dillon 			else
1093da6f36f4SMatthew Dillon 				base = NULL;
1094da6f36f4SMatthew Dillon 			count = parent->bytes / sizeof(hammer2_blockref_t);
1095da6f36f4SMatthew Dillon 			break;
1096da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_VOLUME:
1097da6f36f4SMatthew Dillon 			base = &chain->hmp->voldata.sroot_blockset.blockref[0];
1098da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1099da6f36f4SMatthew Dillon 			break;
1100da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP:
1101da6f36f4SMatthew Dillon 			base = &parent->data->npdata[0];
1102da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1103da6f36f4SMatthew Dillon 			break;
1104da6f36f4SMatthew Dillon 		default:
1105da6f36f4SMatthew Dillon 			base = NULL;
1106da6f36f4SMatthew Dillon 			count = 0;
1107da6f36f4SMatthew Dillon 			panic("hammer2_flush_core: "
1108da6f36f4SMatthew Dillon 			      "unrecognized blockref type: %d",
1109da6f36f4SMatthew Dillon 			      parent->bref.type);
11101eb19191STomohiro Kusumi 			break;
1111da6f36f4SMatthew Dillon 		}
1112da6f36f4SMatthew Dillon 
1113da6f36f4SMatthew Dillon 		/*
1114da6f36f4SMatthew Dillon 		 * Blocktable updates
1115da6f36f4SMatthew Dillon 		 */
1116b70cecb7STomohiro Kusumi 		if (base && (chain->flags & HAMMER2_CHAIN_BLKMAPUPD)) {
1117b70cecb7STomohiro Kusumi 			if (chain->flags & HAMMER2_CHAIN_BLKMAPPED) {
11180cc33e20SMatthew Dillon 				hammer2_spin_ex(&parent->core.spin);
1119ecfe89b8SMatthew Dillon 				hammer2_base_delete(parent, base, count, chain,
1120ecfe89b8SMatthew Dillon 						    NULL);
11210cc33e20SMatthew Dillon 				hammer2_spin_unex(&parent->core.spin);
1122b3659de2SMatthew Dillon 				/* base_delete clears both bits */
1123b3659de2SMatthew Dillon 			} else {
1124b3659de2SMatthew Dillon 				atomic_clear_int(&chain->flags,
1125b70cecb7STomohiro Kusumi 						 HAMMER2_CHAIN_BLKMAPUPD);
1126da6f36f4SMatthew Dillon 			}
1127da6f36f4SMatthew Dillon 		}
1128b70cecb7STomohiro Kusumi 		if (base && (chain->flags & HAMMER2_CHAIN_BLKMAPPED) == 0) {
11290cc33e20SMatthew Dillon 			hammer2_spin_ex(&parent->core.spin);
1130850d3f60SMatthew Dillon 			hammer2_base_insert(parent, base, count,
1131850d3f60SMatthew Dillon 					    chain, &chain->bref);
11320cc33e20SMatthew Dillon 			hammer2_spin_unex(&parent->core.spin);
1133b70cecb7STomohiro Kusumi 			/* base_insert sets BLKMAPPED */
1134da6f36f4SMatthew Dillon 		}
1135da6f36f4SMatthew Dillon 	}
1136eedd52a3SMatthew Dillon skipupdate:
1137fae225dcSMatthew Dillon 	if (parent)
1138fae225dcSMatthew Dillon 		hammer2_chain_unlock(parent);
1139da6f36f4SMatthew Dillon 
1140da6f36f4SMatthew Dillon 	/*
11418138a154SMatthew Dillon 	 * Final cleanup after flush
11428138a154SMatthew Dillon 	 */
11438138a154SMatthew Dillon done:
1144e513e77eSMatthew Dillon 	KKASSERT(chain->refs > 0);
1145556042eaSTomohiro Kusumi 
1146ecfe89b8SMatthew Dillon 	return retry;
11478138a154SMatthew Dillon }
11488138a154SMatthew Dillon 
11498138a154SMatthew Dillon /*
1150da6f36f4SMatthew Dillon  * Flush recursion helper, called from flush_core, calls flush_core.
11510dea3156SMatthew Dillon  *
11528138a154SMatthew Dillon  * Flushes the children of the caller's chain (info->parent), restricted
1153628176c9STomohiro Kusumi  * by sync_tid.
11540dea3156SMatthew Dillon  *
115565cacacfSMatthew Dillon  * This function may set info->error as a side effect.
115665cacacfSMatthew Dillon  *
11578138a154SMatthew Dillon  * WARNING! If we do not call hammer2_flush_core() we must update
11588138a154SMatthew Dillon  *	    bref.mirror_tid ourselves to indicate that the flush has
11598138a154SMatthew Dillon  *	    processed the child.
1160925e4ad1SMatthew Dillon  *
11618138a154SMatthew Dillon  * WARNING! parent->core spinlock is held on entry and return.
116232b800e6SMatthew Dillon  */
11630dea3156SMatthew Dillon static int
hammer2_flush_recurse(hammer2_chain_t * child,void * data)1164da6f36f4SMatthew Dillon hammer2_flush_recurse(hammer2_chain_t *child, void *data)
116532b800e6SMatthew Dillon {
11660dea3156SMatthew Dillon 	hammer2_flush_info_t *info = data;
11670dea3156SMatthew Dillon 	hammer2_chain_t *parent = info->parent;
1168925e4ad1SMatthew Dillon 
11696aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
11706aaf5cb0SMatthew Dillon 	++info->scan_count;
11716aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_MODIFIED)
11726aaf5cb0SMatthew Dillon 		++info->scan_mod_count;
11736aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_UPDATE)
11746aaf5cb0SMatthew Dillon 		++info->scan_upd_count;
11756aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_ONFLUSH)
11766aaf5cb0SMatthew Dillon 		++info->scan_onf_count;
11776aaf5cb0SMatthew Dillon #endif
11786aaf5cb0SMatthew Dillon 
11790dea3156SMatthew Dillon 	/*
118010136ab6SMatthew Dillon 	 * (child can never be fchain or vchain so a special check isn't
118110136ab6SMatthew Dillon 	 *  needed).
1182da6f36f4SMatthew Dillon 	 *
1183a4dc31e0SMatthew Dillon 	 * We must ref the child before unlocking the spinlock.
1184a4dc31e0SMatthew Dillon 	 *
1185a4dc31e0SMatthew Dillon 	 * The caller has added a ref to the parent so we can temporarily
1186fae225dcSMatthew Dillon 	 * unlock it in order to lock the child.  However, if it no longer
1187fae225dcSMatthew Dillon 	 * winds up being the child of the parent we must skip this child.
118865cacacfSMatthew Dillon 	 *
118965cacacfSMatthew Dillon 	 * NOTE! chain locking errors are fatal.  They are never out-of-space
119065cacacfSMatthew Dillon 	 *	 errors.
1191a4dc31e0SMatthew Dillon 	 */
1192ea155208SMatthew Dillon 	hammer2_chain_ref(child);
119394491fa0SMatthew Dillon 	hammer2_spin_unex(&parent->core.spin);
11940dea3156SMatthew Dillon 
11956aaf5cb0SMatthew Dillon 	hammer2_chain_ref_hold(parent);
11960dea3156SMatthew Dillon 	hammer2_chain_unlock(parent);
11970dea3156SMatthew Dillon 	hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE);
1198fae225dcSMatthew Dillon 	if (child->parent != parent) {
1199fae225dcSMatthew Dillon 		kprintf("LOST CHILD1 %p->%p (actual parent %p)\n",
1200fae225dcSMatthew Dillon 			parent, child, child->parent);
1201fae225dcSMatthew Dillon 		goto done;
1202fae225dcSMatthew Dillon 	}
120365cacacfSMatthew Dillon 	if (child->error) {
120465cacacfSMatthew Dillon 		kprintf("CHILD ERROR DURING FLUSH LOCK %p->%p\n",
120565cacacfSMatthew Dillon 			parent, child);
120665cacacfSMatthew Dillon 		info->error |= child->error;
120765cacacfSMatthew Dillon 		goto done;
120865cacacfSMatthew Dillon 	}
12090dea3156SMatthew Dillon 
121003faa7d5SMatthew Dillon 	/*
12118bbe5025SMatthew Dillon 	 * Must propagate the DESTROY flag downwards, otherwise the
12128bbe5025SMatthew Dillon 	 * parent could end up never being removed because it will
12138bbe5025SMatthew Dillon 	 * be requeued to the flusher if it survives this run due to
12148bbe5025SMatthew Dillon 	 * the flag.
12158bbe5025SMatthew Dillon 	 */
12168bbe5025SMatthew Dillon 	if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY))
12178bbe5025SMatthew Dillon 		atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROY);
12186aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
12196aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_DESTROY)
12206aaf5cb0SMatthew Dillon 		++info->scan_del_count;
12216aaf5cb0SMatthew Dillon #endif
1222ecfe89b8SMatthew Dillon 	/*
1223ecfe89b8SMatthew Dillon 	 * Special handling of the root inode.  Because the root inode
1224ecfe89b8SMatthew Dillon 	 * contains an index of all the inodes in the PFS in addition to
1225ecfe89b8SMatthew Dillon 	 * its normal directory entries, any flush that is not part of a
1226ecfe89b8SMatthew Dillon 	 * filesystem sync must only flush the directory entries, and not
1227ecfe89b8SMatthew Dillon 	 * anything else.
1228ecfe89b8SMatthew Dillon 	 *
1229ecfe89b8SMatthew Dillon 	 * The child might be an indirect block, but H2 guarantees that
1230ecfe89b8SMatthew Dillon 	 * the key-range will fully partition the inode index from the
1231ecfe89b8SMatthew Dillon 	 * directory entries so the case just works naturally.
1232ecfe89b8SMatthew Dillon 	 */
1233ecfe89b8SMatthew Dillon 	if ((parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) &&
1234ecfe89b8SMatthew Dillon 	    (child->flags & HAMMER2_CHAIN_DESTROY) == 0 &&
1235ecfe89b8SMatthew Dillon 	    parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
1236ecfe89b8SMatthew Dillon 	    (info->flags & HAMMER2_FLUSH_FSSYNC) == 0) {
1237ecfe89b8SMatthew Dillon 		if ((child->bref.key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1238ecfe89b8SMatthew Dillon 			if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) {
1239ecfe89b8SMatthew Dillon 				hammer2_chain_setflush(parent);
1240ecfe89b8SMatthew Dillon 			}
1241ecfe89b8SMatthew Dillon 			goto done;
1242ecfe89b8SMatthew Dillon 		}
1243ecfe89b8SMatthew Dillon 	}
12448bbe5025SMatthew Dillon 
12458bbe5025SMatthew Dillon 	/*
1246e513e77eSMatthew Dillon 	 * Recurse and collect deferral data.  We're in the media flush,
1247e513e77eSMatthew Dillon 	 * this can cross PFS boundaries.
124803faa7d5SMatthew Dillon 	 */
1249da6f36f4SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) {
12506aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
12516aaf5cb0SMatthew Dillon 		if (child->bref.type < 7)
12526aaf5cb0SMatthew Dillon 			++info->scan_btype[child->bref.type];
12536aaf5cb0SMatthew Dillon #endif
12540dea3156SMatthew Dillon 		++info->depth;
125553f84d31SMatthew Dillon 		hammer2_flush_core(info, child, info->flags);
12560dea3156SMatthew Dillon 		--info->depth;
12578138a154SMatthew Dillon 	}
12580dea3156SMatthew Dillon 
1259fae225dcSMatthew Dillon done:
1260a4dc31e0SMatthew Dillon 	/*
126165cacacfSMatthew Dillon 	 * Relock to continue the loop.
1262a4dc31e0SMatthew Dillon 	 */
1263a4dc31e0SMatthew Dillon 	hammer2_chain_unlock(child);
1264ea155208SMatthew Dillon 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE);
12656aaf5cb0SMatthew Dillon 	hammer2_chain_drop_unhold(parent);
126665cacacfSMatthew Dillon 	if (parent->error) {
126765cacacfSMatthew Dillon 		kprintf("PARENT ERROR DURING FLUSH LOCK %p->%p\n",
126865cacacfSMatthew Dillon 			parent, child);
126965cacacfSMatthew Dillon 		info->error |= parent->error;
127065cacacfSMatthew Dillon 	}
1271a4dc31e0SMatthew Dillon 	hammer2_chain_drop(child);
1272a4dc31e0SMatthew Dillon 	KKASSERT(info->parent == parent);
127394491fa0SMatthew Dillon 	hammer2_spin_ex(&parent->core.spin);
12740dea3156SMatthew Dillon 
12750dea3156SMatthew Dillon 	return (0);
12760dea3156SMatthew Dillon }
127712ff971cSMatthew Dillon 
127853f84d31SMatthew Dillon /*
127912ff971cSMatthew Dillon  * flush helper (backend threaded)
128012ff971cSMatthew Dillon  *
128140498d1cSMatthew Dillon  * Flushes chain topology for the specified inode.
128240498d1cSMatthew Dillon  *
1283ecfe89b8SMatthew Dillon  * HAMMER2_XOP_INODE_STOP	The flush recursion stops at inode boundaries.
1284ecfe89b8SMatthew Dillon  *				Inodes belonging to the same flush are flushed
1285ecfe89b8SMatthew Dillon  *				separately.
128640498d1cSMatthew Dillon  *
1287ecfe89b8SMatthew Dillon  * chain->parent can be NULL, usually due to destroy races or detached inodes.
128812ff971cSMatthew Dillon  *
128912ff971cSMatthew Dillon  * Primarily called from vfs_sync().
129012ff971cSMatthew Dillon  */
129112ff971cSMatthew Dillon void
hammer2_xop_inode_flush(hammer2_xop_t * arg,void * scratch __unused,int clindex)1292c4421f07SMatthew Dillon hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
129312ff971cSMatthew Dillon {
129412ff971cSMatthew Dillon 	hammer2_xop_flush_t *xop = &arg->xop_flush;
129512ff971cSMatthew Dillon 	hammer2_chain_t *chain;
12966f445d15SMatthew Dillon 	hammer2_inode_t *ip;
129712ff971cSMatthew Dillon 	hammer2_dev_t *hmp;
12986f445d15SMatthew Dillon 	hammer2_pfs_t *pmp;
12990b738157STomohiro Kusumi 	hammer2_devvp_t *e;
13000b738157STomohiro Kusumi 	struct vnode *devvp;
130165cacacfSMatthew Dillon 	int flush_error = 0;
130265cacacfSMatthew Dillon 	int fsync_error = 0;
130312ff971cSMatthew Dillon 	int total_error = 0;
130412ff971cSMatthew Dillon 	int j;
130540498d1cSMatthew Dillon 	int xflags;
130640498d1cSMatthew Dillon 	int ispfsroot = 0;
130740498d1cSMatthew Dillon 
130840498d1cSMatthew Dillon 	xflags = HAMMER2_FLUSH_TOP;
130940498d1cSMatthew Dillon 	if (xop->head.flags & HAMMER2_XOP_INODE_STOP)
131040498d1cSMatthew Dillon 		xflags |= HAMMER2_FLUSH_INODE_STOP;
1311ecfe89b8SMatthew Dillon 	if (xop->head.flags & HAMMER2_XOP_FSSYNC)
1312ecfe89b8SMatthew Dillon 		xflags |= HAMMER2_FLUSH_FSSYNC;
131312ff971cSMatthew Dillon 
131412ff971cSMatthew Dillon 	/*
131512ff971cSMatthew Dillon 	 * Flush core chains
131612ff971cSMatthew Dillon 	 */
13176f445d15SMatthew Dillon 	ip = xop->head.ip1;
13186f445d15SMatthew Dillon 	pmp = ip->pmp;
13196f445d15SMatthew Dillon 	chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
132012ff971cSMatthew Dillon 	if (chain) {
132112ff971cSMatthew Dillon 		hmp = chain->hmp;
1322ecfe89b8SMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) {
1323ecfe89b8SMatthew Dillon 			/*
1324ecfe89b8SMatthew Dillon 			 * Due to flush partitioning the chain topology
1325ecfe89b8SMatthew Dillon 			 * above the inode's chain may no longer be flagged.
1326ecfe89b8SMatthew Dillon 			 * When asked to flush an inode, remark the topology
1327ecfe89b8SMatthew Dillon 			 * leading to that inode.
1328ecfe89b8SMatthew Dillon 			 */
1329ecfe89b8SMatthew Dillon 			if (chain->parent)
1330ecfe89b8SMatthew Dillon 				hammer2_chain_setflush(chain->parent);
133140498d1cSMatthew Dillon 			hammer2_flush(chain, xflags);
1332ecfe89b8SMatthew Dillon 
13336f445d15SMatthew Dillon 			/* XXX cluster */
13346f445d15SMatthew Dillon 			if (ip == pmp->iroot && pmp != hmp->spmp) {
1335*34fb48c2SMatthew Dillon 				hammer2_spin_ex(&pmp->blockset_spin);
13366f445d15SMatthew Dillon 				pmp->pfs_iroot_blocksets[clindex] =
13376f445d15SMatthew Dillon 					chain->data->ipdata.u.blockset;
1338*34fb48c2SMatthew Dillon 				hammer2_spin_unex(&pmp->blockset_spin);
13396f445d15SMatthew Dillon 			}
13406f445d15SMatthew Dillon 
1341ecfe89b8SMatthew Dillon #if 0
1342ecfe89b8SMatthew Dillon 			/*
1343ecfe89b8SMatthew Dillon 			 * Propogate upwards but only cross an inode boundary
1344ecfe89b8SMatthew Dillon 			 * for inodes associated with the current filesystem
1345ecfe89b8SMatthew Dillon 			 * sync.
1346ecfe89b8SMatthew Dillon 			 */
1347ecfe89b8SMatthew Dillon 			if ((xop->head.flags & HAMMER2_XOP_PARENTONFLUSH) ||
1348ecfe89b8SMatthew Dillon 			    chain->bref.type != HAMMER2_BREF_TYPE_INODE) {
134912ff971cSMatthew Dillon 				parent = chain->parent;
135040498d1cSMatthew Dillon 				if (parent)
135112ff971cSMatthew Dillon 					hammer2_chain_setflush(parent);
135212ff971cSMatthew Dillon 			}
1353ecfe89b8SMatthew Dillon #endif
1354ecfe89b8SMatthew Dillon 		}
135540498d1cSMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY)
135640498d1cSMatthew Dillon 			ispfsroot = 1;
135712ff971cSMatthew Dillon 		hammer2_chain_unlock(chain);
135812ff971cSMatthew Dillon 		hammer2_chain_drop(chain);
135912ff971cSMatthew Dillon 		chain = NULL;
136012ff971cSMatthew Dillon 	} else {
136112ff971cSMatthew Dillon 		hmp = NULL;
136212ff971cSMatthew Dillon 	}
136312ff971cSMatthew Dillon 
136412ff971cSMatthew Dillon 	/*
136565c894ffSMatthew Dillon 	 * Only flush the volume header if asked to, plus the inode must also
136665c894ffSMatthew Dillon 	 * be the PFS root.
136740498d1cSMatthew Dillon 	 */
136865c894ffSMatthew Dillon 	if ((xop->head.flags & HAMMER2_XOP_VOLHDR) == 0)
136965c894ffSMatthew Dillon 		goto skip;
137040498d1cSMatthew Dillon 	if (ispfsroot == 0)
137140498d1cSMatthew Dillon 		goto skip;
137240498d1cSMatthew Dillon 
137340498d1cSMatthew Dillon 	/*
137412ff971cSMatthew Dillon 	 * Flush volume roots.  Avoid replication, we only want to
137512ff971cSMatthew Dillon 	 * flush each hammer2_dev (hmp) once.
137612ff971cSMatthew Dillon 	 */
1377c4421f07SMatthew Dillon 	for (j = clindex - 1; j >= 0; --j) {
13786f445d15SMatthew Dillon 		if ((chain = ip->cluster.array[j].chain) != NULL) {
137912ff971cSMatthew Dillon 			if (chain->hmp == hmp) {
138012ff971cSMatthew Dillon 				chain = NULL;	/* safety */
138112ff971cSMatthew Dillon 				goto skip;
138212ff971cSMatthew Dillon 			}
138312ff971cSMatthew Dillon 		}
138412ff971cSMatthew Dillon 	}
138512ff971cSMatthew Dillon 	chain = NULL;	/* safety */
138612ff971cSMatthew Dillon 
138712ff971cSMatthew Dillon 	/*
138812ff971cSMatthew Dillon 	 * spmp transaction.  The super-root is never directly mounted so
138912ff971cSMatthew Dillon 	 * there shouldn't be any vnodes, let alone any dirty vnodes
139053f84d31SMatthew Dillon 	 * associated with it, so we shouldn't have to mess around with any
139153f84d31SMatthew Dillon 	 * vnode flushes here.
139212ff971cSMatthew Dillon 	 */
139312ff971cSMatthew Dillon 	hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
139412ff971cSMatthew Dillon 
139512ff971cSMatthew Dillon 	/*
13966f445d15SMatthew Dillon 	 * We must flush the superroot down to the PFS iroot.  Remember
13976f445d15SMatthew Dillon 	 * that hammer2_chain_setflush() stops at inode boundaries, so
13986f445d15SMatthew Dillon 	 * the pmp->iroot has been flushed and flagged down to the superroot,
13996f445d15SMatthew Dillon 	 * but the volume root (vchain) probably has not yet been flagged.
14006f445d15SMatthew Dillon 	 */
14016f445d15SMatthew Dillon 	if (hmp->spmp->iroot) {
14026f445d15SMatthew Dillon 		chain = hmp->spmp->iroot->cluster.array[0].chain;
14036f445d15SMatthew Dillon 		if (chain) {
14046f445d15SMatthew Dillon 			hammer2_chain_ref(chain);
14056f445d15SMatthew Dillon 			hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
14066f445d15SMatthew Dillon 			flush_error |=
14076f445d15SMatthew Dillon 				hammer2_flush(chain,
14086f445d15SMatthew Dillon 					      HAMMER2_FLUSH_TOP |
14096f445d15SMatthew Dillon 					      HAMMER2_FLUSH_INODE_STOP |
14106f445d15SMatthew Dillon 					      HAMMER2_FLUSH_FSSYNC);
14116f445d15SMatthew Dillon 			hammer2_chain_unlock(chain);
14126f445d15SMatthew Dillon 			hammer2_chain_drop(chain);
14136f445d15SMatthew Dillon 		}
14146f445d15SMatthew Dillon 	}
14156f445d15SMatthew Dillon 
14166f445d15SMatthew Dillon 	/*
141712ff971cSMatthew Dillon 	 * Media mounts have two 'roots', vchain for the topology
141812ff971cSMatthew Dillon 	 * and fchain for the free block table.  Flush both.
141912ff971cSMatthew Dillon 	 *
142012ff971cSMatthew Dillon 	 * Note that the topology and free block table are handled
142112ff971cSMatthew Dillon 	 * independently, so the free block table can wind up being
142212ff971cSMatthew Dillon 	 * ahead of the topology.  We depend on the bulk free scan
142312ff971cSMatthew Dillon 	 * code to deal with any loose ends.
142465cacacfSMatthew Dillon 	 *
142565cacacfSMatthew Dillon 	 * vchain and fchain do not error on-lock since their data does
142665cacacfSMatthew Dillon 	 * not have to be re-read from media.
142712ff971cSMatthew Dillon 	 */
142812ff971cSMatthew Dillon 	hammer2_chain_ref(&hmp->vchain);
142912ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
143012ff971cSMatthew Dillon 	hammer2_chain_ref(&hmp->fchain);
143112ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
143212ff971cSMatthew Dillon 	if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
143312ff971cSMatthew Dillon 		/*
143412ff971cSMatthew Dillon 		 * This will also modify vchain as a side effect,
143512ff971cSMatthew Dillon 		 * mark vchain as modified now.
143612ff971cSMatthew Dillon 		 */
143712ff971cSMatthew Dillon 		hammer2_voldata_modify(hmp);
143812ff971cSMatthew Dillon 		chain = &hmp->fchain;
143965cacacfSMatthew Dillon 		flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
144012ff971cSMatthew Dillon 		KKASSERT(chain == &hmp->fchain);
144112ff971cSMatthew Dillon 	}
144212ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->fchain);
144312ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->vchain);
144412ff971cSMatthew Dillon 	hammer2_chain_drop(&hmp->fchain);
144512ff971cSMatthew Dillon 	/* vchain dropped down below */
144612ff971cSMatthew Dillon 
144712ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
144812ff971cSMatthew Dillon 	if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
144912ff971cSMatthew Dillon 		chain = &hmp->vchain;
145065cacacfSMatthew Dillon 		flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
145112ff971cSMatthew Dillon 		KKASSERT(chain == &hmp->vchain);
145212ff971cSMatthew Dillon 	}
145312ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->vchain);
145412ff971cSMatthew Dillon 	hammer2_chain_drop(&hmp->vchain);
145512ff971cSMatthew Dillon 
145612ff971cSMatthew Dillon 	/*
145712ff971cSMatthew Dillon 	 * We can't safely flush the volume header until we have
145812ff971cSMatthew Dillon 	 * flushed any device buffers which have built up.
145912ff971cSMatthew Dillon 	 *
146012ff971cSMatthew Dillon 	 * XXX this isn't being incremental
146112ff971cSMatthew Dillon 	 */
14620b738157STomohiro Kusumi 	TAILQ_FOREACH(e, &hmp->devvpl, entry) {
14630b738157STomohiro Kusumi 		devvp = e->devvp;
14640b738157STomohiro Kusumi 		KKASSERT(devvp);
14650b738157STomohiro Kusumi 		vn_lock(devvp, LK_EXCLUSIVE | LK_RETRY);
14660b738157STomohiro Kusumi 		fsync_error = VOP_FSYNC(devvp, MNT_WAIT, 0);
14670b738157STomohiro Kusumi 		vn_unlock(devvp);
146865cacacfSMatthew Dillon 		if (fsync_error || flush_error) {
146965cacacfSMatthew Dillon 			kprintf("hammer2: sync error fsync=%d h2flush=0x%04x dev=%s\n",
14700b738157STomohiro Kusumi 				fsync_error, flush_error, e->path);
14710b738157STomohiro Kusumi 		}
147265cacacfSMatthew Dillon 	}
147312ff971cSMatthew Dillon 
147412ff971cSMatthew Dillon 	/*
147512ff971cSMatthew Dillon 	 * The flush code sets CHAIN_VOLUMESYNC to indicate that the
147612ff971cSMatthew Dillon 	 * volume header needs synchronization via hmp->volsync.
147712ff971cSMatthew Dillon 	 *
147812ff971cSMatthew Dillon 	 * XXX synchronize the flag & data with only this flush XXX
147912ff971cSMatthew Dillon 	 */
148065cacacfSMatthew Dillon 	if (fsync_error == 0 && flush_error == 0 &&
148112ff971cSMatthew Dillon 	    (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
148212ff971cSMatthew Dillon 		struct buf *bp;
148365cacacfSMatthew Dillon 		int vol_error = 0;
148412ff971cSMatthew Dillon 
148512ff971cSMatthew Dillon 		/*
148612ff971cSMatthew Dillon 		 * Synchronize the disk before flushing the volume
148712ff971cSMatthew Dillon 		 * header.
148812ff971cSMatthew Dillon 		 */
148912ff971cSMatthew Dillon 		bp = getpbuf(NULL);
149012ff971cSMatthew Dillon 		bp->b_bio1.bio_offset = 0;
149112ff971cSMatthew Dillon 		bp->b_bufsize = 0;
149212ff971cSMatthew Dillon 		bp->b_bcount = 0;
149312ff971cSMatthew Dillon 		bp->b_cmd = BUF_CMD_FLUSH;
149412ff971cSMatthew Dillon 		bp->b_bio1.bio_done = biodone_sync;
149512ff971cSMatthew Dillon 		bp->b_bio1.bio_flags |= BIO_SYNC;
149612ff971cSMatthew Dillon 		vn_strategy(hmp->devvp, &bp->b_bio1);
149765cacacfSMatthew Dillon 		fsync_error = biowait(&bp->b_bio1, "h2vol");
149812ff971cSMatthew Dillon 		relpbuf(bp, NULL);
149912ff971cSMatthew Dillon 
150012ff971cSMatthew Dillon 		/*
150112ff971cSMatthew Dillon 		 * Then we can safely flush the version of the
150212ff971cSMatthew Dillon 		 * volume header synchronized by the flush code.
150312ff971cSMatthew Dillon 		 */
150412ff971cSMatthew Dillon 		j = hmp->volhdrno + 1;
150519808ac9SMatthew Dillon 		if (j < 0)
150619808ac9SMatthew Dillon 			j = 0;
150712ff971cSMatthew Dillon 		if (j >= HAMMER2_NUM_VOLHDRS)
150812ff971cSMatthew Dillon 			j = 0;
150912ff971cSMatthew Dillon 		if (j * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE >
151012ff971cSMatthew Dillon 		    hmp->volsync.volu_size) {
151112ff971cSMatthew Dillon 			j = 0;
151212ff971cSMatthew Dillon 		}
15135d37f96dSMatthew Dillon 		if (hammer2_debug & 0x8000) {
15145d37f96dSMatthew Dillon 			/* debug only, avoid syslogd loop */
151512ff971cSMatthew Dillon 			kprintf("sync volhdr %d %jd\n",
151612ff971cSMatthew Dillon 				j, (intmax_t)hmp->volsync.volu_size);
15175d37f96dSMatthew Dillon 		}
151812ff971cSMatthew Dillon 		bp = getblk(hmp->devvp, j * HAMMER2_ZONE_BYTES64,
151973da1719STomohiro Kusumi 			    HAMMER2_VOLUME_BYTES, GETBLK_KVABIO, 0);
152012ff971cSMatthew Dillon 		atomic_clear_int(&hmp->vchain.flags,
152112ff971cSMatthew Dillon 				 HAMMER2_CHAIN_VOLUMESYNC);
152204b8e839SMatthew Dillon 		bkvasync(bp);
152373da1719STomohiro Kusumi 		bcopy(&hmp->volsync, bp->b_data, HAMMER2_VOLUME_BYTES);
152465cacacfSMatthew Dillon 		vol_error = bwrite(bp);
152512ff971cSMatthew Dillon 		hmp->volhdrno = j;
152665cacacfSMatthew Dillon 		if (vol_error)
152765cacacfSMatthew Dillon 			fsync_error = vol_error;
152812ff971cSMatthew Dillon 	}
152965cacacfSMatthew Dillon 	if (flush_error)
153065cacacfSMatthew Dillon 		total_error = flush_error;
153165cacacfSMatthew Dillon 	if (fsync_error)
153265cacacfSMatthew Dillon 		total_error = hammer2_errno_to_error(fsync_error);
153312ff971cSMatthew Dillon 
1534ecfe89b8SMatthew Dillon 	/* spmp trans */
1535ecfe89b8SMatthew Dillon 	hammer2_trans_done(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
153612ff971cSMatthew Dillon skip:
1537c4421f07SMatthew Dillon 	hammer2_xop_feed(&xop->head, NULL, clindex, total_error);
153812ff971cSMatthew Dillon }
1539