xref: /dragonfly/sys/vfs/hammer2/hammer2_flush.c (revision d0755e6d)
132b800e6SMatthew Dillon /*
268b321c1SMatthew Dillon  * Copyright (c) 2011-2018 The DragonFly Project.  All rights reserved.
332b800e6SMatthew Dillon  *
432b800e6SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
532b800e6SMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
632b800e6SMatthew Dillon  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
732b800e6SMatthew Dillon  *
832b800e6SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
932b800e6SMatthew Dillon  * modification, are permitted provided that the following conditions
1032b800e6SMatthew Dillon  * are met:
1132b800e6SMatthew Dillon  *
1232b800e6SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
1332b800e6SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
1432b800e6SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
1532b800e6SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
1632b800e6SMatthew Dillon  *    the documentation and/or other materials provided with the
1732b800e6SMatthew Dillon  *    distribution.
1832b800e6SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
1932b800e6SMatthew Dillon  *    contributors may be used to endorse or promote products derived
2032b800e6SMatthew Dillon  *    from this software without specific, prior written permission.
2132b800e6SMatthew Dillon  *
2232b800e6SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2332b800e6SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2432b800e6SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2532b800e6SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
2632b800e6SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
2732b800e6SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
2832b800e6SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
2932b800e6SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
3032b800e6SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3132b800e6SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3232b800e6SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3332b800e6SMatthew Dillon  * SUCH DAMAGE.
3432b800e6SMatthew Dillon  */
3550456506SMatthew Dillon /*
3650456506SMatthew Dillon  *			TRANSACTION AND FLUSH HANDLING
3750456506SMatthew Dillon  *
3850456506SMatthew Dillon  * Deceptively simple but actually fairly difficult to implement properly is
3950456506SMatthew Dillon  * how I would describe it.
4050456506SMatthew Dillon  *
41da6f36f4SMatthew Dillon  * Flushing generally occurs bottom-up but requires a top-down scan to
42da6f36f4SMatthew Dillon  * locate chains with MODIFIED and/or UPDATE bits set.  The ONFLUSH flag
43da6f36f4SMatthew Dillon  * tells how to recurse downward to find these chains.
4450456506SMatthew Dillon  */
4550456506SMatthew Dillon 
4632b800e6SMatthew Dillon #include <sys/cdefs.h>
4732b800e6SMatthew Dillon #include <sys/param.h>
4832b800e6SMatthew Dillon #include <sys/systm.h>
4932b800e6SMatthew Dillon #include <sys/types.h>
5032b800e6SMatthew Dillon #include <sys/lock.h>
5132b800e6SMatthew Dillon #include <sys/uuid.h>
5232b800e6SMatthew Dillon 
5332b800e6SMatthew Dillon #include "hammer2.h"
5432b800e6SMatthew Dillon 
55925e4ad1SMatthew Dillon #define FLUSH_DEBUG 0
56925e4ad1SMatthew Dillon 
576aaf5cb0SMatthew Dillon #define HAMMER2_FLUSH_DEPTH_LIMIT	60      /* stack recursion limit */
58a71db85dSMatthew Dillon 
59a71db85dSMatthew Dillon 
6032b800e6SMatthew Dillon /*
6132b800e6SMatthew Dillon  * Recursively flush the specified chain.  The chain is locked and
6232b800e6SMatthew Dillon  * referenced by the caller and will remain so on return.  The chain
6332b800e6SMatthew Dillon  * will remain referenced throughout but can temporarily lose its
6432b800e6SMatthew Dillon  * lock during the recursion to avoid unnecessarily stalling user
6532b800e6SMatthew Dillon  * processes.
6632b800e6SMatthew Dillon  */
6732b800e6SMatthew Dillon struct hammer2_flush_info {
680dea3156SMatthew Dillon 	hammer2_chain_t *parent;
6932b800e6SMatthew Dillon 	int		depth;
7065cacacfSMatthew Dillon 	int		error;			/* cumulative error */
7153f84d31SMatthew Dillon 	int		flags;
726aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
736aaf5cb0SMatthew Dillon 	long		scan_count;
746aaf5cb0SMatthew Dillon 	long		scan_mod_count;
756aaf5cb0SMatthew Dillon 	long		scan_upd_count;
766aaf5cb0SMatthew Dillon 	long		scan_onf_count;
776aaf5cb0SMatthew Dillon 	long		scan_del_count;
786aaf5cb0SMatthew Dillon 	long		scan_btype[7];
796aaf5cb0SMatthew Dillon #endif
80850687d2SMatthew Dillon 	hammer2_chain_t	*debug;
8132b800e6SMatthew Dillon };
8232b800e6SMatthew Dillon 
8332b800e6SMatthew Dillon typedef struct hammer2_flush_info hammer2_flush_info_t;
8432b800e6SMatthew Dillon 
85ecfe89b8SMatthew Dillon static int hammer2_flush_core(hammer2_flush_info_t *info,
8653f84d31SMatthew Dillon 				hammer2_chain_t *chain, int flags);
87da6f36f4SMatthew Dillon static int hammer2_flush_recurse(hammer2_chain_t *child, void *data);
8893f3933aSMatthew Dillon 
8932b800e6SMatthew Dillon /*
90c603b86bSMatthew Dillon  * Any per-pfs transaction initialization goes here.
9150456506SMatthew Dillon  */
9250456506SMatthew Dillon void
93c603b86bSMatthew Dillon hammer2_trans_manage_init(hammer2_pfs_t *pmp)
9450456506SMatthew Dillon {
9550456506SMatthew Dillon }
9650456506SMatthew Dillon 
9750456506SMatthew Dillon /*
98d34788efSMatthew Dillon  * Transaction support for any modifying operation.  Transactions are used
99d34788efSMatthew Dillon  * in the pmp layer by the frontend and in the spmp layer by the backend.
100c603b86bSMatthew Dillon  *
1013e8408dbSMatthew Dillon  * 0			- Normal transaction.  Interlocks against just the
1023e8408dbSMatthew Dillon  *			  COPYQ portion of an ISFLUSH transaction.
103c603b86bSMatthew Dillon  *
104ecfe89b8SMatthew Dillon  * TRANS_ISFLUSH	- Flush transaction.  Interlocks against other flush
105ecfe89b8SMatthew Dillon  *			  transactions.
106c603b86bSMatthew Dillon  *
1073e8408dbSMatthew Dillon  *			  When COPYQ is also specified, waits for the count
1083e8408dbSMatthew Dillon  *			  to drop to 1.
1093e8408dbSMatthew Dillon  *
110ecfe89b8SMatthew Dillon  * TRANS_BUFCACHE	- Buffer cache transaction.  No interlock.
111ecfe89b8SMatthew Dillon  *
112ecfe89b8SMatthew Dillon  * TRANS_SIDEQ		- Run the sideq (only tested in trans_done())
1130dea3156SMatthew Dillon  *
11410136ab6SMatthew Dillon  * Initializing a new transaction allocates a transaction ID.  Typically
11510136ab6SMatthew Dillon  * passed a pmp (hmp passed as NULL), indicating a cluster transaction.  Can
11610136ab6SMatthew Dillon  * be passed a NULL pmp and non-NULL hmp to indicate a transaction on a single
11710136ab6SMatthew Dillon  * media target.  The latter mode is used by the recovery code.
1180dea3156SMatthew Dillon  */
1190dea3156SMatthew Dillon void
120c603b86bSMatthew Dillon hammer2_trans_init(hammer2_pfs_t *pmp, uint32_t flags)
1210dea3156SMatthew Dillon {
122c603b86bSMatthew Dillon 	uint32_t oflags;
123c603b86bSMatthew Dillon 	uint32_t nflags;
124c603b86bSMatthew Dillon 	int dowait;
125d001f460SMatthew Dillon 
126c603b86bSMatthew Dillon 	for (;;) {
127c603b86bSMatthew Dillon 		oflags = pmp->trans.flags;
128c603b86bSMatthew Dillon 		cpu_ccfence();
129c603b86bSMatthew Dillon 		dowait = 0;
130d001f460SMatthew Dillon 
131d001f460SMatthew Dillon 		if (flags & HAMMER2_TRANS_ISFLUSH) {
132d001f460SMatthew Dillon 			/*
133ecfe89b8SMatthew Dillon 			 * Interlock against other flush transactions.
134355d67fcSMatthew Dillon 			 */
1355afbe9d8SMatthew Dillon 			if (oflags & HAMMER2_TRANS_ISFLUSH) {
1365afbe9d8SMatthew Dillon 				nflags = oflags | HAMMER2_TRANS_WAITING;
1375afbe9d8SMatthew Dillon 				dowait = 1;
1385afbe9d8SMatthew Dillon 			} else {
1395afbe9d8SMatthew Dillon 				nflags = (oflags | flags) + 1;
1405afbe9d8SMatthew Dillon 			}
141c603b86bSMatthew Dillon 		} else if (flags & HAMMER2_TRANS_BUFCACHE) {
142a7720be7SMatthew Dillon 			/*
14320852157SMatthew Dillon 			 * Requesting strategy transaction from buffer-cache,
14420852157SMatthew Dillon 			 * or a VM getpages/putpages through the buffer cache.
14520852157SMatthew Dillon 			 * We must allow such transactions in all situations
14620852157SMatthew Dillon 			 * to avoid deadlocks.
14720852157SMatthew Dillon 			 */
14820852157SMatthew Dillon 			nflags = (oflags | flags) + 1;
149a4dc31e0SMatthew Dillon 		} else {
150a4dc31e0SMatthew Dillon 			/*
151*d0755e6dSMatthew Dillon 			 * Normal transaction.  We do not interlock against
152*d0755e6dSMatthew Dillon 			 * BUFCACHE or ISFLUSH.
15368b321c1SMatthew Dillon 			 *
154ecfe89b8SMatthew Dillon 			 * Note that vnode locks may be held going into
155ecfe89b8SMatthew Dillon 			 * this call.
15668b321c1SMatthew Dillon 			 *
15768b321c1SMatthew Dillon 			 * NOTE: Remember that non-modifying operations
15868b321c1SMatthew Dillon 			 *	 such as read, stat, readdir, etc, do
15968b321c1SMatthew Dillon 			 *	 not use transactions.
160a4dc31e0SMatthew Dillon 			 */
161c603b86bSMatthew Dillon 			nflags = (oflags | flags) + 1;
162c603b86bSMatthew Dillon 		}
163c603b86bSMatthew Dillon 		if (dowait)
164c603b86bSMatthew Dillon 			tsleep_interlock(&pmp->trans.sync_wait, 0);
165c603b86bSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
166c603b86bSMatthew Dillon 			if (dowait == 0)
167a4dc31e0SMatthew Dillon 				break;
168c603b86bSMatthew Dillon 			tsleep(&pmp->trans.sync_wait, PINTERLOCKED,
169c603b86bSMatthew Dillon 			       "h2trans", hz);
170ecfe89b8SMatthew Dillon 			/* retry */
171c603b86bSMatthew Dillon 		} else {
172c603b86bSMatthew Dillon 			cpu_pause();
173ecfe89b8SMatthew Dillon 			/* retry */
174a7720be7SMatthew Dillon 		}
175c603b86bSMatthew Dillon 		/* retry */
176c603b86bSMatthew Dillon 	}
1773e8408dbSMatthew Dillon 
178*d0755e6dSMatthew Dillon #if 0
1793e8408dbSMatthew Dillon 	/*
1803e8408dbSMatthew Dillon 	 * When entering a FLUSH transaction with COPYQ set, wait for the
1813e8408dbSMatthew Dillon 	 * transaction count to drop to 1 (our flush transaction only)
1823e8408dbSMatthew Dillon 	 * before proceeding.
1833e8408dbSMatthew Dillon 	 *
1843e8408dbSMatthew Dillon 	 * This waits for all non-flush transactions to complete and blocks
1853e8408dbSMatthew Dillon 	 * new non-flush transactions from starting until COPYQ is cleared.
1863e8408dbSMatthew Dillon 	 * (the flush will then proceed after clearing COPYQ).  This should
1873e8408dbSMatthew Dillon 	 * be a very short stall on modifying operations.
1883e8408dbSMatthew Dillon 	 */
1893e8408dbSMatthew Dillon 	while ((flags & HAMMER2_TRANS_ISFLUSH) &&
1903e8408dbSMatthew Dillon 	       (flags & HAMMER2_TRANS_COPYQ)) {
1913e8408dbSMatthew Dillon 		oflags = pmp->trans.flags;
1923e8408dbSMatthew Dillon 		cpu_ccfence();
1933e8408dbSMatthew Dillon 		if ((oflags & HAMMER2_TRANS_MASK) == 1)
1943e8408dbSMatthew Dillon 			break;
1953e8408dbSMatthew Dillon 		nflags = oflags | HAMMER2_TRANS_WAITING;
1963e8408dbSMatthew Dillon 		tsleep_interlock(&pmp->trans.sync_wait, 0);
1973e8408dbSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
1983e8408dbSMatthew Dillon 			tsleep(&pmp->trans.sync_wait, PINTERLOCKED,
1993e8408dbSMatthew Dillon 			       "h2trans2", hz);
2003e8408dbSMatthew Dillon 		}
2013e8408dbSMatthew Dillon 	}
202*d0755e6dSMatthew Dillon #endif
203c603b86bSMatthew Dillon }
204a4dc31e0SMatthew Dillon 
205e2163f5bSMatthew Dillon /*
206e2163f5bSMatthew Dillon  * Start a sub-transaction, there is no 'subdone' function.  This will
20753f84d31SMatthew Dillon  * issue a new modify_tid (mtid) for the current transaction, which is a
20853f84d31SMatthew Dillon  * CLC (cluster level change) id and not a per-node id.
20953f84d31SMatthew Dillon  *
21053f84d31SMatthew Dillon  * This function must be called for each XOP when multiple XOPs are run in
21153f84d31SMatthew Dillon  * sequence within a transaction.
21253f84d31SMatthew Dillon  *
21353f84d31SMatthew Dillon  * Callers typically update the inode with the transaction mtid manually
21453f84d31SMatthew Dillon  * to enforce sequencing.
215e2163f5bSMatthew Dillon  */
216e2163f5bSMatthew Dillon hammer2_tid_t
217e2163f5bSMatthew Dillon hammer2_trans_sub(hammer2_pfs_t *pmp)
218e2163f5bSMatthew Dillon {
219e2163f5bSMatthew Dillon 	hammer2_tid_t mtid;
220e2163f5bSMatthew Dillon 
221e2163f5bSMatthew Dillon 	mtid = atomic_fetchadd_64(&pmp->modify_tid, 1);
222e2163f5bSMatthew Dillon 
223e2163f5bSMatthew Dillon 	return (mtid);
224e2163f5bSMatthew Dillon }
225e2163f5bSMatthew Dillon 
226c603b86bSMatthew Dillon void
227ecfe89b8SMatthew Dillon hammer2_trans_setflags(hammer2_pfs_t *pmp, uint32_t flags)
228ecfe89b8SMatthew Dillon {
229ecfe89b8SMatthew Dillon 	atomic_set_int(&pmp->trans.flags, flags);
230ecfe89b8SMatthew Dillon }
231ecfe89b8SMatthew Dillon 
2323e8408dbSMatthew Dillon /*
2333e8408dbSMatthew Dillon  * Typically used to clear trans flags asynchronously.  If TRANS_WAITING
2343e8408dbSMatthew Dillon  * is in the mask, and was previously set, this function will wake up
2353e8408dbSMatthew Dillon  * any waiters.
2363e8408dbSMatthew Dillon  */
237ecfe89b8SMatthew Dillon void
238ecfe89b8SMatthew Dillon hammer2_trans_clearflags(hammer2_pfs_t *pmp, uint32_t flags)
239ecfe89b8SMatthew Dillon {
240ecfe89b8SMatthew Dillon 	uint32_t oflags;
241ecfe89b8SMatthew Dillon 	uint32_t nflags;
242ecfe89b8SMatthew Dillon 
243ecfe89b8SMatthew Dillon 	for (;;) {
244ecfe89b8SMatthew Dillon 		oflags = pmp->trans.flags;
245ecfe89b8SMatthew Dillon 		cpu_ccfence();
246ecfe89b8SMatthew Dillon 		nflags = oflags & ~flags;
247ecfe89b8SMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
248ecfe89b8SMatthew Dillon 			if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING)
249ecfe89b8SMatthew Dillon 				wakeup(&pmp->trans.sync_wait);
250ecfe89b8SMatthew Dillon 			break;
251ecfe89b8SMatthew Dillon 		}
252ecfe89b8SMatthew Dillon 		cpu_pause();
253ecfe89b8SMatthew Dillon 		/* retry */
254ecfe89b8SMatthew Dillon 	}
255ecfe89b8SMatthew Dillon }
256ecfe89b8SMatthew Dillon 
257ecfe89b8SMatthew Dillon void
258ecfe89b8SMatthew Dillon hammer2_trans_done(hammer2_pfs_t *pmp, uint32_t flags)
259c603b86bSMatthew Dillon {
260c603b86bSMatthew Dillon 	uint32_t oflags;
261c603b86bSMatthew Dillon 	uint32_t nflags;
262c603b86bSMatthew Dillon 
263257c2728SMatthew Dillon 	/*
264257c2728SMatthew Dillon 	 * Modifying ops on the front-end can cause dirty inodes to
265257c2728SMatthew Dillon 	 * build up in the sideq.  We don't flush these on inactive/reclaim
266257c2728SMatthew Dillon 	 * due to potential deadlocks, so we have to deal with them from
267257c2728SMatthew Dillon 	 * inside other nominal modifying front-end transactions.
268257c2728SMatthew Dillon 	 */
269ecfe89b8SMatthew Dillon 	if ((flags & HAMMER2_TRANS_SIDEQ) &&
270*d0755e6dSMatthew Dillon 	    pmp->sideq_count > hammer2_limit_dirty_inodes / 2 &&
271ecfe89b8SMatthew Dillon 	    pmp->sideq_count > (pmp->inum_count >> 3) &&
272ecfe89b8SMatthew Dillon 	    pmp->mp) {
2735afbe9d8SMatthew Dillon 		speedup_syncer(pmp->mp);
274ecfe89b8SMatthew Dillon 	}
275257c2728SMatthew Dillon 
276257c2728SMatthew Dillon 	/*
2773e8408dbSMatthew Dillon 	 * Clean-up the transaction.  Wakeup any waiters when finishing
2783e8408dbSMatthew Dillon 	 * a flush transaction or transitioning the non-flush transaction
2793e8408dbSMatthew Dillon 	 * count from 2->1 while a flush transaction is pending.
280257c2728SMatthew Dillon 	 */
281c603b86bSMatthew Dillon 	for (;;) {
282c603b86bSMatthew Dillon 		oflags = pmp->trans.flags;
283c603b86bSMatthew Dillon 		cpu_ccfence();
284c603b86bSMatthew Dillon 		KKASSERT(oflags & HAMMER2_TRANS_MASK);
285ecfe89b8SMatthew Dillon 
286ecfe89b8SMatthew Dillon 		nflags = (oflags - 1) & ~flags;
287ecfe89b8SMatthew Dillon 		if (flags & HAMMER2_TRANS_ISFLUSH) {
288ecfe89b8SMatthew Dillon 			nflags &= ~HAMMER2_TRANS_WAITING;
289c603b86bSMatthew Dillon 		}
2903e8408dbSMatthew Dillon 		if ((oflags & (HAMMER2_TRANS_ISFLUSH|HAMMER2_TRANS_MASK)) ==
2913e8408dbSMatthew Dillon 		    (HAMMER2_TRANS_ISFLUSH|2)) {
2923e8408dbSMatthew Dillon 			nflags &= ~HAMMER2_TRANS_WAITING;
2933e8408dbSMatthew Dillon 		}
294c603b86bSMatthew Dillon 		if (atomic_cmpset_int(&pmp->trans.flags, oflags, nflags)) {
295ecfe89b8SMatthew Dillon 			if ((oflags ^ nflags) & HAMMER2_TRANS_WAITING)
296c603b86bSMatthew Dillon 				wakeup(&pmp->trans.sync_wait);
297c603b86bSMatthew Dillon 			break;
298c603b86bSMatthew Dillon 		}
299ecfe89b8SMatthew Dillon 		cpu_pause();
300c603b86bSMatthew Dillon 		/* retry */
301044541cdSMatthew Dillon 	}
30250456506SMatthew Dillon }
30350456506SMatthew Dillon 
304c603b86bSMatthew Dillon /*
305c603b86bSMatthew Dillon  * Obtain new, unique inode number (not serialized by caller).
306c603b86bSMatthew Dillon  */
307c603b86bSMatthew Dillon hammer2_tid_t
308c603b86bSMatthew Dillon hammer2_trans_newinum(hammer2_pfs_t *pmp)
309c603b86bSMatthew Dillon {
310c603b86bSMatthew Dillon 	hammer2_tid_t tid;
311c603b86bSMatthew Dillon 
312e2163f5bSMatthew Dillon 	tid = atomic_fetchadd_64(&pmp->inode_tid, 1);
313c603b86bSMatthew Dillon 
314c603b86bSMatthew Dillon 	return tid;
315a7720be7SMatthew Dillon }
316a7720be7SMatthew Dillon 
317c603b86bSMatthew Dillon /*
31820852157SMatthew Dillon  * Assert that a strategy call is ok here.  Currently we allow strategy
31920852157SMatthew Dillon  * calls in all situations, including during flushes.  Previously:
32020852157SMatthew Dillon  *	(old) (1) In a normal transaction.
32120852157SMatthew Dillon  *	(old) (2) In a flush transaction only if PREFLUSH is also set.
322c603b86bSMatthew Dillon  */
3230dea3156SMatthew Dillon void
3249450e866SMatthew Dillon hammer2_trans_assert_strategy(hammer2_pfs_t *pmp)
325c7916d0bSMatthew Dillon {
32620852157SMatthew Dillon #if 0
327c603b86bSMatthew Dillon 	KKASSERT((pmp->trans.flags & HAMMER2_TRANS_ISFLUSH) == 0 ||
328c603b86bSMatthew Dillon 		 (pmp->trans.flags & HAMMER2_TRANS_PREFLUSH));
32920852157SMatthew Dillon #endif
330c7916d0bSMatthew Dillon }
331c7916d0bSMatthew Dillon 
332eedd52a3SMatthew Dillon /*
3330dea3156SMatthew Dillon  * Flush the chain and all modified sub-chains through the specified
33453f84d31SMatthew Dillon  * synchronization point, propagating blockref updates back up.  As
33553f84d31SMatthew Dillon  * part of this propagation, mirror_tid and inode/data usage statistics
33653f84d31SMatthew Dillon  * propagates back upward.
3370dea3156SMatthew Dillon  *
33865cacacfSMatthew Dillon  * Returns a HAMMER2 error code, 0 if no error.  Note that I/O errors from
33965cacacfSMatthew Dillon  * buffers dirtied during the flush operation can occur later.
34065cacacfSMatthew Dillon  *
34153f84d31SMatthew Dillon  * modify_tid (clc - cluster level change) is not propagated.
34253f84d31SMatthew Dillon  *
34353f84d31SMatthew Dillon  * update_tid (clc) is used for validation and is not propagated by this
34453f84d31SMatthew Dillon  * function.
3450dea3156SMatthew Dillon  *
34632b800e6SMatthew Dillon  * This routine can be called from several places but the most important
347c4421f07SMatthew Dillon  * is from VFS_SYNC (frontend) via hammer2_xop_inode_flush (backend).
34832b800e6SMatthew Dillon  *
349da6f36f4SMatthew Dillon  * chain is locked on call and will remain locked on return.  The chain's
350da6f36f4SMatthew Dillon  * UPDATE flag indicates that its parent's block table (which is not yet
3515c51ecaeSMatthew Dillon  * part of the flush) should be updated.
35240498d1cSMatthew Dillon  *
35340498d1cSMatthew Dillon  * flags:
35440498d1cSMatthew Dillon  *	HAMMER2_FLUSH_TOP	Indicates that this is the top of the flush.
35540498d1cSMatthew Dillon  *				Is cleared for the recursion.
35640498d1cSMatthew Dillon  *
35740498d1cSMatthew Dillon  *	HAMMER2_FLUSH_ALL	Recurse everything
35840498d1cSMatthew Dillon  *
35965c894ffSMatthew Dillon  *	HAMMER2_FLUSH_INODE_STOP
36065c894ffSMatthew Dillon  *				Stop at PFS inode or normal inode boundary
36132b800e6SMatthew Dillon  */
36265cacacfSMatthew Dillon int
36353f84d31SMatthew Dillon hammer2_flush(hammer2_chain_t *chain, int flags)
36432b800e6SMatthew Dillon {
36532b800e6SMatthew Dillon 	hammer2_flush_info_t info;
366eedd52a3SMatthew Dillon 	hammer2_dev_t *hmp;
367925e4ad1SMatthew Dillon 	int loops;
36832b800e6SMatthew Dillon 
36932b800e6SMatthew Dillon 	/*
37032b800e6SMatthew Dillon 	 * Execute the recursive flush and handle deferrals.
37132b800e6SMatthew Dillon 	 *
37232b800e6SMatthew Dillon 	 * Chains can be ridiculously long (thousands deep), so to
37332b800e6SMatthew Dillon 	 * avoid blowing out the kernel stack the recursive flush has a
37432b800e6SMatthew Dillon 	 * depth limit.  Elements at the limit are placed on a list
37532b800e6SMatthew Dillon 	 * for re-execution after the stack has been popped.
37632b800e6SMatthew Dillon 	 */
37732b800e6SMatthew Dillon 	bzero(&info, sizeof(info));
37853f84d31SMatthew Dillon 	info.flags = flags & ~HAMMER2_FLUSH_TOP;
37932b800e6SMatthew Dillon 
380da6f36f4SMatthew Dillon 	/*
381da6f36f4SMatthew Dillon 	 * Calculate parent (can be NULL), if not NULL the flush core
382da6f36f4SMatthew Dillon 	 * expects the parent to be referenced so it can easily lock/unlock
383da6f36f4SMatthew Dillon 	 * it without it getting ripped up.
384da6f36f4SMatthew Dillon 	 */
385da6f36f4SMatthew Dillon 	if ((info.parent = chain->parent) != NULL)
386da6f36f4SMatthew Dillon 		hammer2_chain_ref(info.parent);
387731b2a84SMatthew Dillon 
388a7720be7SMatthew Dillon 	/*
389a7720be7SMatthew Dillon 	 * Extra ref needed because flush_core expects it when replacing
390a7720be7SMatthew Dillon 	 * chain.
391a7720be7SMatthew Dillon 	 */
392a7720be7SMatthew Dillon 	hammer2_chain_ref(chain);
393eedd52a3SMatthew Dillon 	hmp = chain->hmp;
394925e4ad1SMatthew Dillon 	loops = 0;
395a7720be7SMatthew Dillon 
3960dea3156SMatthew Dillon 	for (;;) {
39732b800e6SMatthew Dillon 		/*
39840498d1cSMatthew Dillon 		 * [re]flush chain as the deep recursion may have generated
39940498d1cSMatthew Dillon 		 * additional modifications.
40032b800e6SMatthew Dillon 		 */
40140498d1cSMatthew Dillon 		if (info.parent != chain->parent) {
40268b321c1SMatthew Dillon 			if (hammer2_debug & 0x0040) {
40368b321c1SMatthew Dillon 				kprintf("LOST CHILD4 %p->%p "
40468b321c1SMatthew Dillon 					"(actual parent %p)\n",
40540498d1cSMatthew Dillon 					info.parent, chain, chain->parent);
40668b321c1SMatthew Dillon 			}
40740498d1cSMatthew Dillon 			hammer2_chain_drop(info.parent);
40840498d1cSMatthew Dillon 			info.parent = chain->parent;
40940498d1cSMatthew Dillon 			hammer2_chain_ref(info.parent);
41040498d1cSMatthew Dillon 		}
411ecfe89b8SMatthew Dillon 		if (hammer2_flush_core(&info, chain, flags) == 0)
41232b800e6SMatthew Dillon 			break;
413925e4ad1SMatthew Dillon 
414925e4ad1SMatthew Dillon 		if (++loops % 1000 == 0) {
4158138a154SMatthew Dillon 			kprintf("hammer2_flush: excessive loops on %p\n",
416925e4ad1SMatthew Dillon 				chain);
417925e4ad1SMatthew Dillon 			if (hammer2_debug & 0x100000)
418925e4ad1SMatthew Dillon 				Debugger("hell4");
419925e4ad1SMatthew Dillon 		}
42032b800e6SMatthew Dillon 	}
4216aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
4226aaf5cb0SMatthew Dillon 	if (info.scan_count >= 10)
4236aaf5cb0SMatthew Dillon 	kprintf("hammer2_flush: scan_count %ld (%ld,%ld,%ld,%ld) "
424ecfe89b8SMatthew Dillon 		"bt(%ld,%ld,%ld,%ld,%ld,%ld)\n",
4256aaf5cb0SMatthew Dillon 		info.scan_count,
4266aaf5cb0SMatthew Dillon 		info.scan_mod_count,
4276aaf5cb0SMatthew Dillon 		info.scan_upd_count,
4286aaf5cb0SMatthew Dillon 		info.scan_onf_count,
4296aaf5cb0SMatthew Dillon 		info.scan_del_count,
4306aaf5cb0SMatthew Dillon 		info.scan_btype[1],
4316aaf5cb0SMatthew Dillon 		info.scan_btype[2],
4326aaf5cb0SMatthew Dillon 		info.scan_btype[3],
4336aaf5cb0SMatthew Dillon 		info.scan_btype[4],
4346aaf5cb0SMatthew Dillon 		info.scan_btype[5],
435ecfe89b8SMatthew Dillon 		info.scan_btype[6]);
4366aaf5cb0SMatthew Dillon #endif
437a7720be7SMatthew Dillon 	hammer2_chain_drop(chain);
438da6f36f4SMatthew Dillon 	if (info.parent)
439da6f36f4SMatthew Dillon 		hammer2_chain_drop(info.parent);
44065cacacfSMatthew Dillon 	return (info.error);
44132b800e6SMatthew Dillon }
44232b800e6SMatthew Dillon 
443476d2aadSMatthew Dillon /*
444ea155208SMatthew Dillon  * This is the core of the chain flushing code.  The chain is locked by the
445a7720be7SMatthew Dillon  * caller and must also have an extra ref on it by the caller, and remains
446fae225dcSMatthew Dillon  * locked and will have an extra ref on return.  info.parent is referenced
447fae225dcSMatthew Dillon  * but not locked.
448fae225dcSMatthew Dillon  *
449fae225dcSMatthew Dillon  * Upon return, the caller can test the UPDATE bit on the chain to determine
450fae225dcSMatthew Dillon  * if the parent needs updating.
451a7720be7SMatthew Dillon  *
452ecfe89b8SMatthew Dillon  * If non-zero is returned, the chain's parent changed during the flush and
453ecfe89b8SMatthew Dillon  * the caller must retry the operation.
454ecfe89b8SMatthew Dillon  *
4558138a154SMatthew Dillon  * (1) Determine if this node is a candidate for the flush, return if it is
4568138a154SMatthew Dillon  *     not.  fchain and vchain are always candidates for the flush.
4570dea3156SMatthew Dillon  *
4588138a154SMatthew Dillon  * (2) If we recurse too deep the chain is entered onto the deferral list and
4598138a154SMatthew Dillon  *     the current flush stack is aborted until after the deferral list is
4608138a154SMatthew Dillon  *     run.
4618138a154SMatthew Dillon  *
4628138a154SMatthew Dillon  * (3) Recursively flush live children (rbtree).  This can create deferrals.
463da6f36f4SMatthew Dillon  *     A successful flush clears the MODIFIED and UPDATE bits on the children
464da6f36f4SMatthew Dillon  *     and typically causes the parent to be marked MODIFIED as the children
465da6f36f4SMatthew Dillon  *     update the parent's block table.  A parent might already be marked
466da6f36f4SMatthew Dillon  *     MODIFIED due to a deletion (whos blocktable update in the parent is
467da6f36f4SMatthew Dillon  *     handled by the frontend), or if the parent itself is modified by the
468da6f36f4SMatthew Dillon  *     frontend for other reasons.
4698138a154SMatthew Dillon  *
470da6f36f4SMatthew Dillon  * (4) Permanently disconnected sub-trees are cleaned up by the front-end.
471da6f36f4SMatthew Dillon  *     Deleted-but-open inodes can still be individually flushed via the
472da6f36f4SMatthew Dillon  *     filesystem syncer.
4738138a154SMatthew Dillon  *
474470dad14SMatthew Dillon  * (5) Delete parents on the way back up if they are normal indirect blocks
475470dad14SMatthew Dillon  *     and have no children.
476470dad14SMatthew Dillon  *
477470dad14SMatthew Dillon  * (6) Note that an unmodified child may still need the block table in its
478da6f36f4SMatthew Dillon  *     parent updated (e.g. rename/move).  The child will have UPDATE set
479da6f36f4SMatthew Dillon  *     in this case.
4808138a154SMatthew Dillon  *
48150456506SMatthew Dillon  *			WARNING ON BREF MODIFY_TID/MIRROR_TID
482925e4ad1SMatthew Dillon  *
483e513e77eSMatthew Dillon  * blockref.modify_tid is consistent only within a PFS, and will not be
484e513e77eSMatthew Dillon  * consistent during synchronization.  mirror_tid is consistent across the
485e513e77eSMatthew Dillon  * block device regardless of the PFS.
486476d2aadSMatthew Dillon  */
487ecfe89b8SMatthew Dillon static int
488da6f36f4SMatthew Dillon hammer2_flush_core(hammer2_flush_info_t *info, hammer2_chain_t *chain,
48953f84d31SMatthew Dillon 		   int flags)
49032b800e6SMatthew Dillon {
491da6f36f4SMatthew Dillon 	hammer2_chain_t *parent;
492506bd6d1SMatthew Dillon 	hammer2_dev_t *hmp;
49365cacacfSMatthew Dillon 	int save_error;
494ecfe89b8SMatthew Dillon 	int retry;
495ecfe89b8SMatthew Dillon 
496ecfe89b8SMatthew Dillon 	retry = 0;
497da6f36f4SMatthew Dillon 
498da6f36f4SMatthew Dillon 	/*
499da6f36f4SMatthew Dillon 	 * (1) Optimize downward recursion to locate nodes needing action.
500da6f36f4SMatthew Dillon 	 *     Nothing to do if none of these flags are set.
501da6f36f4SMatthew Dillon 	 */
502850687d2SMatthew Dillon 	if ((chain->flags & HAMMER2_CHAIN_FLUSH_MASK) == 0) {
503850687d2SMatthew Dillon 		if (hammer2_debug & 0x200) {
504850687d2SMatthew Dillon 			if (info->debug == NULL)
505850687d2SMatthew Dillon 				info->debug = chain;
506850687d2SMatthew Dillon 		} else {
507ecfe89b8SMatthew Dillon 			return 0;
508850687d2SMatthew Dillon 		}
509850687d2SMatthew Dillon 	}
51032b800e6SMatthew Dillon 
511a5913bdfSMatthew Dillon 	hmp = chain->hmp;
51240498d1cSMatthew Dillon 
51340498d1cSMatthew Dillon 	/*
51440498d1cSMatthew Dillon 	 * NOTE: parent can be NULL, usually due to destroy races.
51540498d1cSMatthew Dillon 	 */
51640498d1cSMatthew Dillon 	parent = info->parent;
517fae225dcSMatthew Dillon 	KKASSERT(chain->parent == parent);
518925e4ad1SMatthew Dillon 
5190924b3f8SMatthew Dillon 	/*
520da6f36f4SMatthew Dillon 	 * Downward search recursion
52140498d1cSMatthew Dillon 	 *
522*d0755e6dSMatthew Dillon 	 * We must be careful on cold stops, which often occur on inode
523*d0755e6dSMatthew Dillon 	 * boundaries due to the way hammer2_vfs_sync() sequences the flush.
524*d0755e6dSMatthew Dillon 	 * Be sure to issue an appropriate chain_setflush()
525ea155208SMatthew Dillon 	 */
526ecfe89b8SMatthew Dillon 	if ((chain->flags & HAMMER2_CHAIN_PFSBOUNDARY) &&
52753f84d31SMatthew Dillon 	    (flags & HAMMER2_FLUSH_ALL) == 0 &&
528c42feed6SMatthew Dillon 	    (flags & HAMMER2_FLUSH_TOP) == 0 &&
529c42feed6SMatthew Dillon 	    chain->pmp && chain->pmp->mp) {
5309450e866SMatthew Dillon 		/*
531fae225dcSMatthew Dillon 		 * If FLUSH_ALL is not specified the caller does not want
532c42feed6SMatthew Dillon 		 * to recurse through PFS roots that have been mounted.
533c42feed6SMatthew Dillon 		 *
534c42feed6SMatthew Dillon 		 * (If the PFS has not been mounted there may not be
535c42feed6SMatthew Dillon 		 *  anything monitoring its chains and its up to us
536c42feed6SMatthew Dillon 		 *  to flush it).
537c42feed6SMatthew Dillon 		 *
538c42feed6SMatthew Dillon 		 * The typical sequence is to flush dirty PFS's starting at
539c42feed6SMatthew Dillon 		 * their root downward, then flush the device root (vchain).
540c42feed6SMatthew Dillon 		 * It is this second flush that typically leaves out the
541c42feed6SMatthew Dillon 		 * ALL flag.
5429450e866SMatthew Dillon 		 *
543fae225dcSMatthew Dillon 		 * However we must still process the PFSROOT chains for block
5449450e866SMatthew Dillon 		 * table updates in their parent (which IS part of our flush).
5459450e866SMatthew Dillon 		 *
546fae225dcSMatthew Dillon 		 * NOTE: The volume root, vchain, does not set PFSBOUNDARY.
547fae225dcSMatthew Dillon 		 *
548fae225dcSMatthew Dillon 		 * NOTE: We must re-set ONFLUSH in the parent to retain if
549fae225dcSMatthew Dillon 		 *	 this chain (that we are skipping) requires work.
5509450e866SMatthew Dillon 		 */
551fae225dcSMatthew Dillon 		if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
552fae225dcSMatthew Dillon 				    HAMMER2_CHAIN_DESTROY |
553fae225dcSMatthew Dillon 				    HAMMER2_CHAIN_MODIFIED)) {
554fae225dcSMatthew Dillon 			hammer2_chain_setflush(parent);
555fae225dcSMatthew Dillon 		}
556ecfe89b8SMatthew Dillon 		goto done;
55740498d1cSMatthew Dillon 	} else if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
55840498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_INODE_STOP) &&
55940498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_ALL) == 0 &&
56040498d1cSMatthew Dillon 		   (flags & HAMMER2_FLUSH_TOP) == 0 &&
56140498d1cSMatthew Dillon 		   chain->pmp && chain->pmp->mp) {
56240498d1cSMatthew Dillon 		/*
563ecfe89b8SMatthew Dillon 		 * When FLUSH_INODE_STOP is specified we are being asked not
564ecfe89b8SMatthew Dillon 		 * to include any inode changes for inodes we encounter,
565ecfe89b8SMatthew Dillon 		 * with the exception of the inode that the flush began with.
566ecfe89b8SMatthew Dillon 		 * So: INODE, INODE_STOP, and TOP==0 basically.
567*d0755e6dSMatthew Dillon 		 *
568*d0755e6dSMatthew Dillon 		 * Dirty inodes are flushed based on the hammer2_inode
569*d0755e6dSMatthew Dillon 		 * in-memory structure, issuing a chain_setflush() here
570*d0755e6dSMatthew Dillon 		 * will only cause unnecessary traversals of the topology.
571ecfe89b8SMatthew Dillon 		 */
572ecfe89b8SMatthew Dillon 		goto done;
573ecfe89b8SMatthew Dillon #if 0
574ecfe89b8SMatthew Dillon 		/*
57540498d1cSMatthew Dillon 		 * If FLUSH_INODE_STOP is specified and both ALL and TOP
57640498d1cSMatthew Dillon 		 * are clear, we must not flush the chain.  The chain should
57740498d1cSMatthew Dillon 		 * have already been flushed and any further ONFLUSH/UPDATE
57840498d1cSMatthew Dillon 		 * setting will be related to the next flush.
57940498d1cSMatthew Dillon 		 *
58040498d1cSMatthew Dillon 		 * This features allows us to flush inodes independently of
58140498d1cSMatthew Dillon 		 * each other and meta-data above the inodes separately.
58240498d1cSMatthew Dillon 		 */
58340498d1cSMatthew Dillon 		if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
58440498d1cSMatthew Dillon 				    HAMMER2_CHAIN_DESTROY |
58540498d1cSMatthew Dillon 				    HAMMER2_CHAIN_MODIFIED)) {
58640498d1cSMatthew Dillon 			if (parent)
58740498d1cSMatthew Dillon 				hammer2_chain_setflush(parent);
58840498d1cSMatthew Dillon 		}
589ecfe89b8SMatthew Dillon #endif
59053f84d31SMatthew Dillon 	} else if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT) {
59153f84d31SMatthew Dillon 		/*
59253f84d31SMatthew Dillon 		 * Recursion depth reached.
59353f84d31SMatthew Dillon 		 */
594ecfe89b8SMatthew Dillon 		panic("hammer2: flush depth limit");
5958bbe5025SMatthew Dillon 	} else if (chain->flags & (HAMMER2_CHAIN_ONFLUSH |
5968bbe5025SMatthew Dillon 				   HAMMER2_CHAIN_DESTROY)) {
5978138a154SMatthew Dillon 		/*
598da6f36f4SMatthew Dillon 		 * Downward recursion search (actual flush occurs bottom-up).
59965cacacfSMatthew Dillon 		 * pre-clear ONFLUSH.  It can get set again due to races or
60065cacacfSMatthew Dillon 		 * flush errors, which we want so the scan finds us again in
60165cacacfSMatthew Dillon 		 * the next flush.
6028bbe5025SMatthew Dillon 		 *
6038bbe5025SMatthew Dillon 		 * We must also recurse if DESTROY is set so we can finally
6048bbe5025SMatthew Dillon 		 * get rid of the related children, otherwise the node will
6058bbe5025SMatthew Dillon 		 * just get re-flushed on lastdrop.
606fae225dcSMatthew Dillon 		 *
607fae225dcSMatthew Dillon 		 * WARNING!  The recursion will unlock/relock info->parent
608fae225dcSMatthew Dillon 		 *	     (which is 'chain'), potentially allowing it
609fae225dcSMatthew Dillon 		 *	     to be ripped up.
6108138a154SMatthew Dillon 		 */
611a964af6fSMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH);
61265cacacfSMatthew Dillon 		save_error = info->error;
61365cacacfSMatthew Dillon 		info->error = 0;
6148138a154SMatthew Dillon 		info->parent = chain;
615a964af6fSMatthew Dillon 
616a964af6fSMatthew Dillon 		/*
617a964af6fSMatthew Dillon 		 * We may have to do this twice to catch any indirect
618ecfe89b8SMatthew Dillon 		 * block maintenance that occurs.
619a964af6fSMatthew Dillon 		 */
62094491fa0SMatthew Dillon 		hammer2_spin_ex(&chain->core.spin);
621da6f36f4SMatthew Dillon 		RB_SCAN(hammer2_chain_tree, &chain->core.rbtree,
622da6f36f4SMatthew Dillon 			NULL, hammer2_flush_recurse, info);
623a964af6fSMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_ONFLUSH) {
624a964af6fSMatthew Dillon 			atomic_clear_int(&chain->flags, HAMMER2_CHAIN_ONFLUSH);
625a964af6fSMatthew Dillon 			RB_SCAN(hammer2_chain_tree, &chain->core.rbtree,
626a964af6fSMatthew Dillon 				NULL, hammer2_flush_recurse, info);
62719808ac9SMatthew Dillon 		}
628a964af6fSMatthew Dillon 		hammer2_spin_unex(&chain->core.spin);
629da6f36f4SMatthew Dillon 		info->parent = parent;
63065cacacfSMatthew Dillon 
63165cacacfSMatthew Dillon 		/*
63265cacacfSMatthew Dillon 		 * Re-set the flush bits if the flush was incomplete or
63365cacacfSMatthew Dillon 		 * an error occurred.  If an error occurs it is typically
63465cacacfSMatthew Dillon 		 * an allocation error.  Errors do not cause deferrals.
63565cacacfSMatthew Dillon 		 */
63665cacacfSMatthew Dillon 		if (info->error)
63765cacacfSMatthew Dillon 			hammer2_chain_setflush(chain);
63865cacacfSMatthew Dillon 		info->error |= save_error;
639fae225dcSMatthew Dillon 
640fae225dcSMatthew Dillon 		/*
641fae225dcSMatthew Dillon 		 * If we lost the parent->chain association we have to
642fae225dcSMatthew Dillon 		 * stop processing this chain because it is no longer
643fae225dcSMatthew Dillon 		 * in this recursion.  If it moved, it will be handled
644fae225dcSMatthew Dillon 		 * by the ONFLUSH flag elsewhere.
645fae225dcSMatthew Dillon 		 */
646fae225dcSMatthew Dillon 		if (chain->parent != parent) {
647fae225dcSMatthew Dillon 			kprintf("LOST CHILD2 %p->%p (actual parent %p)\n",
648fae225dcSMatthew Dillon 				parent, chain, chain->parent);
649fae225dcSMatthew Dillon 			goto done;
650fae225dcSMatthew Dillon 		}
6518138a154SMatthew Dillon 	}
6520924b3f8SMatthew Dillon 
65332b800e6SMatthew Dillon 	/*
654da6f36f4SMatthew Dillon 	 * Now we are in the bottom-up part of the recursion.
655da6f36f4SMatthew Dillon 	 *
656ecfe89b8SMatthew Dillon 	 * We continue to try to update the chain on lower-level errors, but
657ecfe89b8SMatthew Dillon 	 * the flush code may decide not to flush the volume root.
65865cacacfSMatthew Dillon 	 *
65965cacacfSMatthew Dillon 	 * XXX should we continue to try to update the chain if an error
66065cacacfSMatthew Dillon 	 *     occurred?
6618138a154SMatthew Dillon 	 */
6628138a154SMatthew Dillon 
6638138a154SMatthew Dillon 	/*
664fae225dcSMatthew Dillon 	 * Both parent and chain must be locked in order to flush chain,
665fae225dcSMatthew Dillon 	 * in order to properly update the parent under certain conditions.
666fae225dcSMatthew Dillon 	 *
667fae225dcSMatthew Dillon 	 * In addition, we can't safely unlock/relock the chain once we
668fae225dcSMatthew Dillon 	 * start flushing the chain itself, which we would have to do later
669fae225dcSMatthew Dillon 	 * on in order to lock the parent if we didn't do that now.
670fae225dcSMatthew Dillon 	 */
6716aaf5cb0SMatthew Dillon 	hammer2_chain_ref_hold(chain);
672fae225dcSMatthew Dillon 	hammer2_chain_unlock(chain);
673fae225dcSMatthew Dillon 	if (parent)
674fae225dcSMatthew Dillon 		hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
675fae225dcSMatthew Dillon 	hammer2_chain_lock(chain, HAMMER2_RESOLVE_MAYBE);
6766aaf5cb0SMatthew Dillon 	hammer2_chain_drop_unhold(chain);
67765cacacfSMatthew Dillon 
67865cacacfSMatthew Dillon 	/*
67965cacacfSMatthew Dillon 	 * Can't process if we can't access their content.
68065cacacfSMatthew Dillon 	 */
68165cacacfSMatthew Dillon 	if ((parent && parent->error) || chain->error) {
68265cacacfSMatthew Dillon 		kprintf("hammer2: chain error during flush\n");
68365cacacfSMatthew Dillon 		info->error |= chain->error;
68465cacacfSMatthew Dillon 		if (parent) {
68565cacacfSMatthew Dillon 			info->error |= parent->error;
68665cacacfSMatthew Dillon 			hammer2_chain_unlock(parent);
68765cacacfSMatthew Dillon 		}
68865cacacfSMatthew Dillon 		goto done;
68965cacacfSMatthew Dillon 	}
69065cacacfSMatthew Dillon 
691fae225dcSMatthew Dillon 	if (chain->parent != parent) {
69268b321c1SMatthew Dillon 		if (hammer2_debug & 0x0040) {
693fae225dcSMatthew Dillon 			kprintf("LOST CHILD3 %p->%p (actual parent %p)\n",
694fae225dcSMatthew Dillon 				parent, chain, chain->parent);
69568b321c1SMatthew Dillon 		}
696fae225dcSMatthew Dillon 		KKASSERT(parent != NULL);
697fae225dcSMatthew Dillon 		hammer2_chain_unlock(parent);
698ecfe89b8SMatthew Dillon 		retry = 1;
699fae225dcSMatthew Dillon 		goto done;
700fae225dcSMatthew Dillon 	}
701fae225dcSMatthew Dillon 
702fae225dcSMatthew Dillon 	/*
703da6f36f4SMatthew Dillon 	 * Propagate the DESTROY flag downwards.  This dummies up the flush
704da6f36f4SMatthew Dillon 	 * code and tries to invalidate related buffer cache buffers to
705da6f36f4SMatthew Dillon 	 * avoid the disk write.
706623d43d4SMatthew Dillon 	 */
707da6f36f4SMatthew Dillon 	if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY))
708da6f36f4SMatthew Dillon 		atomic_set_int(&chain->flags, HAMMER2_CHAIN_DESTROY);
709623d43d4SMatthew Dillon 
710623d43d4SMatthew Dillon 	/*
711e513e77eSMatthew Dillon 	 * Dispose of the modified bit.
712e513e77eSMatthew Dillon 	 *
7133f4ec3cfSMatthew Dillon 	 * If parent is present, the UPDATE bit should already be set.
714e513e77eSMatthew Dillon 	 * UPDATE should already be set.
715e513e77eSMatthew Dillon 	 * bref.mirror_tid should already be set.
71632b800e6SMatthew Dillon 	 */
71765cacacfSMatthew Dillon 	if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
718da6f36f4SMatthew Dillon 		KKASSERT((chain->flags & HAMMER2_CHAIN_UPDATE) ||
7193f4ec3cfSMatthew Dillon 			 chain->parent == NULL);
7200dea3156SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED);
721f9f4459eSMatthew Dillon 		atomic_add_long(&hammer2_count_modified_chains, -1);
7228db69c9fSMatthew Dillon 
7238db69c9fSMatthew Dillon 		/*
724e513e77eSMatthew Dillon 		 * Manage threads waiting for excessive dirty memory to
725e513e77eSMatthew Dillon 		 * be retired.
7268db69c9fSMatthew Dillon 		 */
727e513e77eSMatthew Dillon 		if (chain->pmp)
728e513e77eSMatthew Dillon 			hammer2_pfs_memory_wakeup(chain->pmp);
7298138a154SMatthew Dillon 
7303f4ec3cfSMatthew Dillon #if 0
7313f4ec3cfSMatthew Dillon 		if ((chain->flags & HAMMER2_CHAIN_UPDATE) == 0 &&
7323f4ec3cfSMatthew Dillon 		    chain != &hmp->vchain &&
7333f4ec3cfSMatthew Dillon 		    chain != &hmp->fchain) {
7348138a154SMatthew Dillon 			/*
7353f4ec3cfSMatthew Dillon 			 * Set UPDATE bit indicating that the parent block
7363f4ec3cfSMatthew Dillon 			 * table requires updating.
7378138a154SMatthew Dillon 			 */
738da6f36f4SMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
7390dea3156SMatthew Dillon 		}
7403f4ec3cfSMatthew Dillon #endif
7410dea3156SMatthew Dillon 
7420dea3156SMatthew Dillon 		/*
743a71db85dSMatthew Dillon 		 * Issue the flush.  This is indirect via the DIO.
7440dea3156SMatthew Dillon 		 *
745a71db85dSMatthew Dillon 		 * NOTE: A DELETED node that reaches this point must be
746a71db85dSMatthew Dillon 		 *	 flushed for synchronization point consistency.
747a71db85dSMatthew Dillon 		 *
748a71db85dSMatthew Dillon 		 * NOTE: Even though MODIFIED was already set, the related DIO
749a71db85dSMatthew Dillon 		 *	 might not be dirty due to a system buffer cache
750a71db85dSMatthew Dillon 		 *	 flush and must be set dirty if we are going to make
751a71db85dSMatthew Dillon 		 *	 further modifications to the buffer.  Chains with
752a71db85dSMatthew Dillon 		 *	 embedded data don't need this.
7530dea3156SMatthew Dillon 		 */
754a7720be7SMatthew Dillon 		if (hammer2_debug & 0x1000) {
7557fece146SMatthew Dillon 			kprintf("Flush %p.%d %016jx/%d data=%016jx\n",
756a7720be7SMatthew Dillon 				chain, chain->bref.type,
757c603b86bSMatthew Dillon 				(uintmax_t)chain->bref.key,
758c603b86bSMatthew Dillon 				chain->bref.keybits,
759c603b86bSMatthew Dillon 				(uintmax_t)chain->bref.data_off);
760a7720be7SMatthew Dillon 		}
761a7720be7SMatthew Dillon 		if (hammer2_debug & 0x2000) {
762a7720be7SMatthew Dillon 			Debugger("Flush hell");
763a7720be7SMatthew Dillon 		}
76410136ab6SMatthew Dillon 
76532b800e6SMatthew Dillon 		/*
766da6f36f4SMatthew Dillon 		 * Update chain CRCs for flush.
76732b800e6SMatthew Dillon 		 *
768da6f36f4SMatthew Dillon 		 * NOTE: Volume headers are NOT flushed here as they require
769da6f36f4SMatthew Dillon 		 *	 special processing.
77032b800e6SMatthew Dillon 		 */
77132b800e6SMatthew Dillon 		switch(chain->bref.type) {
7721a7cfe5aSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP:
773a71db85dSMatthew Dillon 			/*
774e513e77eSMatthew Dillon 			 * Update the volume header's freemap_tid to the
775e513e77eSMatthew Dillon 			 * freemap's flushing mirror_tid.
776e513e77eSMatthew Dillon 			 *
777a71db85dSMatthew Dillon 			 * (note: embedded data, do not call setdirty)
778a71db85dSMatthew Dillon 			 */
77950456506SMatthew Dillon 			KKASSERT(hmp->vchain.flags & HAMMER2_CHAIN_MODIFIED);
780e513e77eSMatthew Dillon 			KKASSERT(chain == &hmp->fchain);
781e513e77eSMatthew Dillon 			hmp->voldata.freemap_tid = chain->bref.mirror_tid;
7825d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
7835d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
784e513e77eSMatthew Dillon 				kprintf("sync freemap mirror_tid %08jx\n",
785e513e77eSMatthew Dillon 					(intmax_t)chain->bref.mirror_tid);
7865d37f96dSMatthew Dillon 			}
787e513e77eSMatthew Dillon 
788e513e77eSMatthew Dillon 			/*
789e513e77eSMatthew Dillon 			 * The freemap can be flushed independently of the
790e513e77eSMatthew Dillon 			 * main topology, but for the case where it is
791e513e77eSMatthew Dillon 			 * flushed in the same transaction, and flushed
792e513e77eSMatthew Dillon 			 * before vchain (a case we want to allow for
793e513e77eSMatthew Dillon 			 * performance reasons), make sure modifications
794e513e77eSMatthew Dillon 			 * made during the flush under vchain use a new
795e513e77eSMatthew Dillon 			 * transaction id.
796e513e77eSMatthew Dillon 			 *
797e513e77eSMatthew Dillon 			 * Otherwise the mount recovery code will get confused.
798e513e77eSMatthew Dillon 			 */
799e513e77eSMatthew Dillon 			++hmp->voldata.mirror_tid;
8001a7cfe5aSMatthew Dillon 			break;
80132b800e6SMatthew Dillon 		case HAMMER2_BREF_TYPE_VOLUME:
80232b800e6SMatthew Dillon 			/*
803e513e77eSMatthew Dillon 			 * The free block table is flushed by
804e513e77eSMatthew Dillon 			 * hammer2_vfs_sync() before it flushes vchain.
805e513e77eSMatthew Dillon 			 * We must still hold fchain locked while copying
806e513e77eSMatthew Dillon 			 * voldata to volsync, however.
807a71db85dSMatthew Dillon 			 *
80865cacacfSMatthew Dillon 			 * These do not error per-say since their data does
80965cacacfSMatthew Dillon 			 * not need to be re-read from media on lock.
81065cacacfSMatthew Dillon 			 *
811a71db85dSMatthew Dillon 			 * (note: embedded data, do not call setdirty)
8121a7cfe5aSMatthew Dillon 			 */
813da6f36f4SMatthew Dillon 			hammer2_chain_lock(&hmp->fchain,
814da6f36f4SMatthew Dillon 					   HAMMER2_RESOLVE_ALWAYS);
815a6cf1052SMatthew Dillon 			hammer2_voldata_lock(hmp);
8165d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
8175d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
818e513e77eSMatthew Dillon 				kprintf("sync volume  mirror_tid %08jx\n",
819da6f36f4SMatthew Dillon 					(intmax_t)chain->bref.mirror_tid);
8205d37f96dSMatthew Dillon 			}
8211a7cfe5aSMatthew Dillon 
8221a7cfe5aSMatthew Dillon 			/*
823e513e77eSMatthew Dillon 			 * Update the volume header's mirror_tid to the
824e513e77eSMatthew Dillon 			 * main topology's flushing mirror_tid.  It is
825e513e77eSMatthew Dillon 			 * possible that voldata.mirror_tid is already
826e513e77eSMatthew Dillon 			 * beyond bref.mirror_tid due to the bump we made
827e513e77eSMatthew Dillon 			 * above in BREF_TYPE_FREEMAP.
828e513e77eSMatthew Dillon 			 */
829e513e77eSMatthew Dillon 			if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) {
830e513e77eSMatthew Dillon 				hmp->voldata.mirror_tid =
831e513e77eSMatthew Dillon 					chain->bref.mirror_tid;
832e513e77eSMatthew Dillon 			}
833e513e77eSMatthew Dillon 
834e513e77eSMatthew Dillon 			/*
835da6f36f4SMatthew Dillon 			 * The volume header is flushed manually by the
836da6f36f4SMatthew Dillon 			 * syncer, not here.  All we do here is adjust the
837da6f36f4SMatthew Dillon 			 * crc's.
83832b800e6SMatthew Dillon 			 */
83932b800e6SMatthew Dillon 			KKASSERT(chain->data != NULL);
840fdf62707SMatthew Dillon 			KKASSERT(chain->dio == NULL);
84132b800e6SMatthew Dillon 
84232b800e6SMatthew Dillon 			hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
84332b800e6SMatthew Dillon 				hammer2_icrc32(
84432b800e6SMatthew Dillon 					(char *)&hmp->voldata +
84532b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRC1_OFF,
84632b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRC1_SIZE);
84732b800e6SMatthew Dillon 			hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
84832b800e6SMatthew Dillon 				hammer2_icrc32(
84932b800e6SMatthew Dillon 					(char *)&hmp->voldata +
85032b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRC0_OFF,
85132b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRC0_SIZE);
85232b800e6SMatthew Dillon 			hmp->voldata.icrc_volheader =
85332b800e6SMatthew Dillon 				hammer2_icrc32(
85432b800e6SMatthew Dillon 					(char *)&hmp->voldata +
85532b800e6SMatthew Dillon 					 HAMMER2_VOLUME_ICRCVH_OFF,
85632b800e6SMatthew Dillon 					HAMMER2_VOLUME_ICRCVH_SIZE);
857e513e77eSMatthew Dillon 
8585d37f96dSMatthew Dillon 			if (hammer2_debug & 0x8000) {
8595d37f96dSMatthew Dillon 				/* debug only, avoid syslogd loop */
860e513e77eSMatthew Dillon 				kprintf("syncvolhdr %016jx %016jx\n",
861e513e77eSMatthew Dillon 					hmp->voldata.mirror_tid,
862e513e77eSMatthew Dillon 					hmp->vchain.bref.mirror_tid);
8635d37f96dSMatthew Dillon 			}
86432b800e6SMatthew Dillon 			hmp->volsync = hmp->voldata;
8650dea3156SMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_VOLUMESYNC);
86650456506SMatthew Dillon 			hammer2_voldata_unlock(hmp);
867a6cf1052SMatthew Dillon 			hammer2_chain_unlock(&hmp->fchain);
86832b800e6SMatthew Dillon 			break;
86932b800e6SMatthew Dillon 		case HAMMER2_BREF_TYPE_DATA:
87032b800e6SMatthew Dillon 			/*
871da6f36f4SMatthew Dillon 			 * Data elements have already been flushed via the
872da6f36f4SMatthew Dillon 			 * logical file buffer cache.  Their hash was set in
873a71db85dSMatthew Dillon 			 * the bref by the vop_write code.  Do not re-dirty.
87432b800e6SMatthew Dillon 			 *
875da6f36f4SMatthew Dillon 			 * Make sure any device buffer(s) have been flushed
876da6f36f4SMatthew Dillon 			 * out here (there aren't usually any to flush) XXX.
87732b800e6SMatthew Dillon 			 */
87832b800e6SMatthew Dillon 			break;
879512beabdSMatthew Dillon 		case HAMMER2_BREF_TYPE_INDIRECT:
8801a7cfe5aSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_NODE:
88191caa51cSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_LEAF:
882da6f36f4SMatthew Dillon 			/*
883da6f36f4SMatthew Dillon 			 * Buffer I/O will be cleaned up when the volume is
884da6f36f4SMatthew Dillon 			 * flushed (but the kernel is free to flush it before
885da6f36f4SMatthew Dillon 			 * then, as well).
886da6f36f4SMatthew Dillon 			 */
88750456506SMatthew Dillon 			KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
888a71db85dSMatthew Dillon 			hammer2_chain_setcheck(chain, chain->data);
88950456506SMatthew Dillon 			break;
890da0cdd33SMatthew Dillon 		case HAMMER2_BREF_TYPE_DIRENT:
891da0cdd33SMatthew Dillon 			/*
892da0cdd33SMatthew Dillon 			 * A directory entry can use the check area to store
893da0cdd33SMatthew Dillon 			 * the filename for filenames <= 64 bytes, don't blow
894da0cdd33SMatthew Dillon 			 * it up!
895da0cdd33SMatthew Dillon 			 */
896da0cdd33SMatthew Dillon 			KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
897da0cdd33SMatthew Dillon 			if (chain->bytes)
898da0cdd33SMatthew Dillon 				hammer2_chain_setcheck(chain, chain->data);
899da0cdd33SMatthew Dillon 			break;
90091caa51cSMatthew Dillon 		case HAMMER2_BREF_TYPE_INODE:
901a71db85dSMatthew Dillon 			/*
902a71db85dSMatthew Dillon 			 * NOTE: We must call io_setdirty() to make any late
903a71db85dSMatthew Dillon 			 *	 changes to the inode data, the system might
904a71db85dSMatthew Dillon 			 *	 have already flushed the buffer.
905a71db85dSMatthew Dillon 			 */
906b0f58de8SMatthew Dillon 			if (chain->data->ipdata.meta.op_flags &
907da6f36f4SMatthew Dillon 			    HAMMER2_OPFLAG_PFSROOT) {
908837bd39bSMatthew Dillon 				/*
909da6f36f4SMatthew Dillon 				 * non-NULL pmp if mounted as a PFS.  We must
91018e8ab5fSMatthew Dillon 				 * sync fields cached in the pmp? XXX
911837bd39bSMatthew Dillon 				 */
912837bd39bSMatthew Dillon 				hammer2_inode_data_t *ipdata;
913837bd39bSMatthew Dillon 
914a71db85dSMatthew Dillon 				hammer2_io_setdirty(chain->dio);
915837bd39bSMatthew Dillon 				ipdata = &chain->data->ipdata;
916e513e77eSMatthew Dillon 				if (chain->pmp) {
917b0f58de8SMatthew Dillon 					ipdata->meta.pfs_inum =
918e513e77eSMatthew Dillon 						chain->pmp->inode_tid;
919e513e77eSMatthew Dillon 				}
92050456506SMatthew Dillon 			} else {
92150456506SMatthew Dillon 				/* can't be mounted as a PFS */
92250456506SMatthew Dillon 			}
923b3659de2SMatthew Dillon 
924512beabdSMatthew Dillon 			KKASSERT((chain->flags & HAMMER2_CHAIN_EMBEDDED) == 0);
925a71db85dSMatthew Dillon 			hammer2_chain_setcheck(chain, chain->data);
9261a7cfe5aSMatthew Dillon 			break;
92732b800e6SMatthew Dillon 		default:
92891caa51cSMatthew Dillon 			KKASSERT(chain->flags & HAMMER2_CHAIN_EMBEDDED);
929da6f36f4SMatthew Dillon 			panic("hammer2_flush_core: unsupported "
930da6f36f4SMatthew Dillon 			      "embedded bref %d",
93191caa51cSMatthew Dillon 			      chain->bref.type);
93291caa51cSMatthew Dillon 			/* NOT REACHED */
93332b800e6SMatthew Dillon 		}
93432b800e6SMatthew Dillon 
93532b800e6SMatthew Dillon 		/*
9363d4f397aSMatthew Dillon 		 * If the chain was destroyed try to avoid unnecessary I/O
9373d4f397aSMatthew Dillon 		 * that might not have yet occurred.  Remove the data range
9383d4f397aSMatthew Dillon 		 * from dedup candidacy and attempt to invalidation that
9393d4f397aSMatthew Dillon 		 * potentially dirty portion of the I/O buffer.
940da6f36f4SMatthew Dillon 		 */
9417767d389SMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_DESTROY) {
9420b8efeb7SMatthew Dillon 			hammer2_io_dedup_delete(hmp,
9430b8efeb7SMatthew Dillon 						chain->bref.type,
9443d4f397aSMatthew Dillon 						chain->bref.data_off,
9453d4f397aSMatthew Dillon 						chain->bytes);
9463d4f397aSMatthew Dillon #if 0
9477767d389SMatthew Dillon 			hammer2_io_t *dio;
9487767d389SMatthew Dillon 			if (chain->dio) {
9493d4f397aSMatthew Dillon 				hammer2_io_inval(chain->dio,
9507d565a4fSMatthew Dillon 						 chain->bref.data_off,
9517d565a4fSMatthew Dillon 						 chain->bytes);
9527767d389SMatthew Dillon 			} else if ((dio = hammer2_io_getquick(hmp,
9537767d389SMatthew Dillon 						  chain->bref.data_off,
9543d4f397aSMatthew Dillon 						  chain->bytes,
9553d4f397aSMatthew Dillon 						  1)) != NULL) {
9563d4f397aSMatthew Dillon 				hammer2_io_inval(dio,
9577767d389SMatthew Dillon 						 chain->bref.data_off,
9587767d389SMatthew Dillon 						 chain->bytes);
9597767d389SMatthew Dillon 				hammer2_io_putblk(&dio);
9607767d389SMatthew Dillon 			}
9613d4f397aSMatthew Dillon #endif
962da6f36f4SMatthew Dillon 		}
963da6f36f4SMatthew Dillon 	}
964da6f36f4SMatthew Dillon 
965da6f36f4SMatthew Dillon 	/*
966da6f36f4SMatthew Dillon 	 * If UPDATE is set the parent block table may need to be updated.
96765cacacfSMatthew Dillon 	 * This can fail if the hammer2_chain_modify() fails.
968da6f36f4SMatthew Dillon 	 *
969da6f36f4SMatthew Dillon 	 * NOTE: UPDATE may be set on vchain or fchain in which case
970ecfe89b8SMatthew Dillon 	 *	 parent could be NULL, or on an inode that has not yet
971ecfe89b8SMatthew Dillon 	 *	 been inserted into the radix tree.  It's easiest to allow
972ecfe89b8SMatthew Dillon 	 *	 the case and test for NULL.  parent can also wind up being
973ecfe89b8SMatthew Dillon 	 *	 NULL due to a deletion so we need to handle the case anyway.
974ecfe89b8SMatthew Dillon 	 *
975ecfe89b8SMatthew Dillon 	 * NOTE: UPDATE can be set when chains are renamed into or out of
976ecfe89b8SMatthew Dillon 	 *	 an indirect block, without the chain itself being flagged
977ecfe89b8SMatthew Dillon 	 *	 MODIFIED.
978da6f36f4SMatthew Dillon 	 *
979da6f36f4SMatthew Dillon 	 * If no parent exists we can just clear the UPDATE bit.  If the
980da6f36f4SMatthew Dillon 	 * chain gets reattached later on the bit will simply get set
981da6f36f4SMatthew Dillon 	 * again.
982da6f36f4SMatthew Dillon 	 */
9833f4ec3cfSMatthew Dillon 	if ((chain->flags & HAMMER2_CHAIN_UPDATE) && parent == NULL)
984da6f36f4SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
985da6f36f4SMatthew Dillon 
986da6f36f4SMatthew Dillon 	/*
987ecfe89b8SMatthew Dillon 	 * When flushing an inode outside of a FLUSH_FSSYNC we must NOT
988ecfe89b8SMatthew Dillon 	 * update the parent block table to point at the flushed inode.
989ecfe89b8SMatthew Dillon 	 * The block table should only ever be updated by the filesystem
990ecfe89b8SMatthew Dillon 	 * sync code.  If we do, inode<->inode dependencies (such as
991ecfe89b8SMatthew Dillon 	 * directory entries vs inode nlink count) can wind up not being
992ecfe89b8SMatthew Dillon 	 * flushed together and result in a broken topology if a crash/reboot
993ecfe89b8SMatthew Dillon 	 * occurs at the wrong time.
994ecfe89b8SMatthew Dillon 	 */
995ecfe89b8SMatthew Dillon 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE &&
9966f445d15SMatthew Dillon 	    (flags & HAMMER2_FLUSH_INODE_STOP) &&
997ecfe89b8SMatthew Dillon 	    (flags & HAMMER2_FLUSH_FSSYNC) == 0 &&
998ecfe89b8SMatthew Dillon 	    (flags & HAMMER2_FLUSH_ALL) == 0 &&
999ecfe89b8SMatthew Dillon 	    chain->pmp && chain->pmp->mp) {
10006f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC
10016f445d15SMatthew Dillon 		kprintf("inum %ld do not update parent, non-fssync\n",
10026f445d15SMatthew Dillon 			(long)chain->bref.key);
10036f445d15SMatthew Dillon #endif
1004ecfe89b8SMatthew Dillon 		goto skipupdate;
1005ecfe89b8SMatthew Dillon 	}
10066f445d15SMatthew Dillon #ifdef HAMMER2_DEBUG_SYNC
10076f445d15SMatthew Dillon 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
10086f445d15SMatthew Dillon 		kprintf("inum %ld update parent\n", (long)chain->bref.key);
10096f445d15SMatthew Dillon #endif
1010ecfe89b8SMatthew Dillon 
1011ecfe89b8SMatthew Dillon 	/*
1012ecfe89b8SMatthew Dillon 	 * The chain may need its blockrefs updated in the parent, normal
1013ecfe89b8SMatthew Dillon 	 * path.
1014da6f36f4SMatthew Dillon 	 */
1015da6f36f4SMatthew Dillon 	if (chain->flags & HAMMER2_CHAIN_UPDATE) {
1016da6f36f4SMatthew Dillon 		hammer2_blockref_t *base;
1017da6f36f4SMatthew Dillon 		int count;
1018da6f36f4SMatthew Dillon 
1019da6f36f4SMatthew Dillon 		/*
1020a6cf1052SMatthew Dillon 		 * Clear UPDATE flag, mark parent modified, update its
1021a6cf1052SMatthew Dillon 		 * modify_tid if necessary, and adjust the parent blockmap.
1022da6f36f4SMatthew Dillon 		 */
1023da6f36f4SMatthew Dillon 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
1024a6cf1052SMatthew Dillon 
1025eedd52a3SMatthew Dillon 		/*
1026eedd52a3SMatthew Dillon 		 * (optional code)
1027eedd52a3SMatthew Dillon 		 *
1028eedd52a3SMatthew Dillon 		 * Avoid actually modifying and updating the parent if it
1029eedd52a3SMatthew Dillon 		 * was flagged for destruction.  This can greatly reduce
1030eedd52a3SMatthew Dillon 		 * disk I/O in large tree removals because the
1031eedd52a3SMatthew Dillon 		 * hammer2_io_setinval() call in the upward recursion
1032eedd52a3SMatthew Dillon 		 * (see MODIFIED code above) can only handle a few cases.
1033eedd52a3SMatthew Dillon 		 */
1034eedd52a3SMatthew Dillon 		if (parent->flags & HAMMER2_CHAIN_DESTROY) {
1035eedd52a3SMatthew Dillon 			if (parent->bref.modify_tid < chain->bref.modify_tid) {
1036eedd52a3SMatthew Dillon 				parent->bref.modify_tid =
1037eedd52a3SMatthew Dillon 					chain->bref.modify_tid;
1038eedd52a3SMatthew Dillon 			}
1039eedd52a3SMatthew Dillon 			atomic_clear_int(&chain->flags, HAMMER2_CHAIN_BMAPPED |
1040eedd52a3SMatthew Dillon 							HAMMER2_CHAIN_BMAPUPD);
1041eedd52a3SMatthew Dillon 			goto skipupdate;
1042eedd52a3SMatthew Dillon 		}
1043eedd52a3SMatthew Dillon 
1044eedd52a3SMatthew Dillon 		/*
1045470dad14SMatthew Dillon 		 * The flusher is responsible for deleting empty indirect
1046470dad14SMatthew Dillon 		 * blocks at this point.  If we don't do this, no major harm
1047470dad14SMatthew Dillon 		 * will be done but the empty indirect blocks will stay in
1048850d3f60SMatthew Dillon 		 * the topology and make it a messy and inefficient.
104930b0abf3SMatthew Dillon 		 *
1050850d3f60SMatthew Dillon 		 * The flusher is also responsible for collapsing the
1051850d3f60SMatthew Dillon 		 * content of an indirect block into its parent whenever
1052850d3f60SMatthew Dillon 		 * possible (with some hysteresis).  Not doing this will also
1053850d3f60SMatthew Dillon 		 * not harm the topology, but would make it messy and
1054850d3f60SMatthew Dillon 		 * inefficient.
1055470dad14SMatthew Dillon 		 */
1056850d3f60SMatthew Dillon 		if (chain->bref.type == HAMMER2_BREF_TYPE_INDIRECT) {
1057850d3f60SMatthew Dillon 			if (hammer2_chain_indirect_maintenance(parent, chain))
1058470dad14SMatthew Dillon 				goto skipupdate;
1059470dad14SMatthew Dillon 		}
1060470dad14SMatthew Dillon 
1061470dad14SMatthew Dillon 		/*
1062eedd52a3SMatthew Dillon 		 * We are updating the parent's blockmap, the parent must
106365cacacfSMatthew Dillon 		 * be set modified.  If this fails we re-set the UPDATE flag
106465cacacfSMatthew Dillon 		 * in the child.
106565cacacfSMatthew Dillon 		 *
106665cacacfSMatthew Dillon 		 * NOTE! A modification error can be ENOSPC.  We still want
106765cacacfSMatthew Dillon 		 *	 to flush modified chains recursively, not break out,
106865cacacfSMatthew Dillon 		 *	 so we just skip the update in this situation and
106965cacacfSMatthew Dillon 		 *	 continue.  That is, we still need to try to clean
107065cacacfSMatthew Dillon 		 *	 out dirty chains and buffers.
107165cacacfSMatthew Dillon 		 *
107265cacacfSMatthew Dillon 		 *	 This may not help bulkfree though. XXX
1073eedd52a3SMatthew Dillon 		 */
107465cacacfSMatthew Dillon 		save_error = hammer2_chain_modify(parent, 0, 0, 0);
107565cacacfSMatthew Dillon 		if (save_error) {
107665cacacfSMatthew Dillon 			info->error |= save_error;
107765cacacfSMatthew Dillon 			kprintf("hammer2_flush: %016jx.%02x error=%08x\n",
107865cacacfSMatthew Dillon 				parent->bref.data_off, parent->bref.type,
107965cacacfSMatthew Dillon 				save_error);
108065cacacfSMatthew Dillon 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_UPDATE);
108165cacacfSMatthew Dillon 			goto skipupdate;
108265cacacfSMatthew Dillon 		}
1083a6cf1052SMatthew Dillon 		if (parent->bref.modify_tid < chain->bref.modify_tid)
1084a6cf1052SMatthew Dillon 			parent->bref.modify_tid = chain->bref.modify_tid;
1085da6f36f4SMatthew Dillon 
1086da6f36f4SMatthew Dillon 		/*
1087da6f36f4SMatthew Dillon 		 * Calculate blockmap pointer
1088da6f36f4SMatthew Dillon 		 */
1089da6f36f4SMatthew Dillon 		switch(parent->bref.type) {
1090da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_INODE:
1091da6f36f4SMatthew Dillon 			/*
1092da6f36f4SMatthew Dillon 			 * Access the inode's block array.  However, there is
1093da6f36f4SMatthew Dillon 			 * no block array if the inode is flagged DIRECTDATA.
1094da6f36f4SMatthew Dillon 			 */
1095da6f36f4SMatthew Dillon 			if (parent->data &&
1096b0f58de8SMatthew Dillon 			    (parent->data->ipdata.meta.op_flags &
1097da6f36f4SMatthew Dillon 			     HAMMER2_OPFLAG_DIRECTDATA) == 0) {
1098da6f36f4SMatthew Dillon 				base = &parent->data->
1099da6f36f4SMatthew Dillon 					ipdata.u.blockset.blockref[0];
1100da6f36f4SMatthew Dillon 			} else {
1101da6f36f4SMatthew Dillon 				base = NULL;
1102da6f36f4SMatthew Dillon 			}
1103da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1104da6f36f4SMatthew Dillon 			break;
1105da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_INDIRECT:
1106da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_NODE:
1107da6f36f4SMatthew Dillon 			if (parent->data)
1108da6f36f4SMatthew Dillon 				base = &parent->data->npdata[0];
1109da6f36f4SMatthew Dillon 			else
1110da6f36f4SMatthew Dillon 				base = NULL;
1111da6f36f4SMatthew Dillon 			count = parent->bytes / sizeof(hammer2_blockref_t);
1112da6f36f4SMatthew Dillon 			break;
1113da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_VOLUME:
1114da6f36f4SMatthew Dillon 			base = &chain->hmp->voldata.sroot_blockset.blockref[0];
1115da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1116da6f36f4SMatthew Dillon 			break;
1117da6f36f4SMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP:
1118da6f36f4SMatthew Dillon 			base = &parent->data->npdata[0];
1119da6f36f4SMatthew Dillon 			count = HAMMER2_SET_COUNT;
1120da6f36f4SMatthew Dillon 			break;
1121da6f36f4SMatthew Dillon 		default:
1122da6f36f4SMatthew Dillon 			base = NULL;
1123da6f36f4SMatthew Dillon 			count = 0;
1124da6f36f4SMatthew Dillon 			panic("hammer2_flush_core: "
1125da6f36f4SMatthew Dillon 			      "unrecognized blockref type: %d",
1126da6f36f4SMatthew Dillon 			      parent->bref.type);
1127da6f36f4SMatthew Dillon 		}
1128da6f36f4SMatthew Dillon 
1129da6f36f4SMatthew Dillon 		/*
1130da6f36f4SMatthew Dillon 		 * Blocktable updates
1131b3659de2SMatthew Dillon 		 *
1132b3659de2SMatthew Dillon 		 * We synchronize pending statistics at this time.  Delta
1133b3659de2SMatthew Dillon 		 * adjustments designated for the current and upper level
1134b3659de2SMatthew Dillon 		 * are synchronized.
1135da6f36f4SMatthew Dillon 		 */
1136da6f36f4SMatthew Dillon 		if (base && (chain->flags & HAMMER2_CHAIN_BMAPUPD)) {
1137da6f36f4SMatthew Dillon 			if (chain->flags & HAMMER2_CHAIN_BMAPPED) {
11380cc33e20SMatthew Dillon 				hammer2_spin_ex(&parent->core.spin);
1139ecfe89b8SMatthew Dillon 				hammer2_base_delete(parent, base, count, chain,
1140ecfe89b8SMatthew Dillon 						    NULL);
11410cc33e20SMatthew Dillon 				hammer2_spin_unex(&parent->core.spin);
1142b3659de2SMatthew Dillon 				/* base_delete clears both bits */
1143b3659de2SMatthew Dillon 			} else {
1144b3659de2SMatthew Dillon 				atomic_clear_int(&chain->flags,
1145b3659de2SMatthew Dillon 						 HAMMER2_CHAIN_BMAPUPD);
1146da6f36f4SMatthew Dillon 			}
1147da6f36f4SMatthew Dillon 		}
1148da6f36f4SMatthew Dillon 		if (base && (chain->flags & HAMMER2_CHAIN_BMAPPED) == 0) {
11490cc33e20SMatthew Dillon 			hammer2_spin_ex(&parent->core.spin);
1150850d3f60SMatthew Dillon 			hammer2_base_insert(parent, base, count,
1151850d3f60SMatthew Dillon 					    chain, &chain->bref);
11520cc33e20SMatthew Dillon 			hammer2_spin_unex(&parent->core.spin);
1153b3659de2SMatthew Dillon 			/* base_insert sets BMAPPED */
1154da6f36f4SMatthew Dillon 		}
1155da6f36f4SMatthew Dillon 	}
1156eedd52a3SMatthew Dillon skipupdate:
1157fae225dcSMatthew Dillon 	if (parent)
1158fae225dcSMatthew Dillon 		hammer2_chain_unlock(parent);
1159da6f36f4SMatthew Dillon 
1160da6f36f4SMatthew Dillon 	/*
11618138a154SMatthew Dillon 	 * Final cleanup after flush
11628138a154SMatthew Dillon 	 */
11638138a154SMatthew Dillon done:
1164e513e77eSMatthew Dillon 	KKASSERT(chain->refs > 0);
1165850687d2SMatthew Dillon 	if (hammer2_debug & 0x200) {
1166850687d2SMatthew Dillon 		if (info->debug == chain)
1167850687d2SMatthew Dillon 			info->debug = NULL;
1168850687d2SMatthew Dillon 	}
1169ecfe89b8SMatthew Dillon 	return retry;
11708138a154SMatthew Dillon }
11718138a154SMatthew Dillon 
11728138a154SMatthew Dillon /*
1173da6f36f4SMatthew Dillon  * Flush recursion helper, called from flush_core, calls flush_core.
11740dea3156SMatthew Dillon  *
11758138a154SMatthew Dillon  * Flushes the children of the caller's chain (info->parent), restricted
11768138a154SMatthew Dillon  * by sync_tid.  Set info->domodify if the child's blockref must propagate
11778138a154SMatthew Dillon  * back up to the parent.
11780dea3156SMatthew Dillon  *
117965cacacfSMatthew Dillon  * This function may set info->error as a side effect.
118065cacacfSMatthew Dillon  *
11818138a154SMatthew Dillon  * Ripouts can move child from rbtree to dbtree or dbq but the caller's
11828138a154SMatthew Dillon  * flush scan order prevents any chains from being lost.  A child can be
1183da6f36f4SMatthew Dillon  * executes more than once.
1184ea155208SMatthew Dillon  *
11858138a154SMatthew Dillon  * WARNING! If we do not call hammer2_flush_core() we must update
11868138a154SMatthew Dillon  *	    bref.mirror_tid ourselves to indicate that the flush has
11878138a154SMatthew Dillon  *	    processed the child.
1188925e4ad1SMatthew Dillon  *
11898138a154SMatthew Dillon  * WARNING! parent->core spinlock is held on entry and return.
119032b800e6SMatthew Dillon  */
11910dea3156SMatthew Dillon static int
1192da6f36f4SMatthew Dillon hammer2_flush_recurse(hammer2_chain_t *child, void *data)
119332b800e6SMatthew Dillon {
11940dea3156SMatthew Dillon 	hammer2_flush_info_t *info = data;
11950dea3156SMatthew Dillon 	hammer2_chain_t *parent = info->parent;
1196925e4ad1SMatthew Dillon 
11976aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
11986aaf5cb0SMatthew Dillon 	++info->scan_count;
11996aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_MODIFIED)
12006aaf5cb0SMatthew Dillon 		++info->scan_mod_count;
12016aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_UPDATE)
12026aaf5cb0SMatthew Dillon 		++info->scan_upd_count;
12036aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_ONFLUSH)
12046aaf5cb0SMatthew Dillon 		++info->scan_onf_count;
12056aaf5cb0SMatthew Dillon #endif
12066aaf5cb0SMatthew Dillon 
12070dea3156SMatthew Dillon 	/*
120810136ab6SMatthew Dillon 	 * (child can never be fchain or vchain so a special check isn't
120910136ab6SMatthew Dillon 	 *  needed).
1210da6f36f4SMatthew Dillon 	 *
1211a4dc31e0SMatthew Dillon 	 * We must ref the child before unlocking the spinlock.
1212a4dc31e0SMatthew Dillon 	 *
1213a4dc31e0SMatthew Dillon 	 * The caller has added a ref to the parent so we can temporarily
1214fae225dcSMatthew Dillon 	 * unlock it in order to lock the child.  However, if it no longer
1215fae225dcSMatthew Dillon 	 * winds up being the child of the parent we must skip this child.
121665cacacfSMatthew Dillon 	 *
121765cacacfSMatthew Dillon 	 * NOTE! chain locking errors are fatal.  They are never out-of-space
121865cacacfSMatthew Dillon 	 *	 errors.
1219a4dc31e0SMatthew Dillon 	 */
1220ea155208SMatthew Dillon 	hammer2_chain_ref(child);
122194491fa0SMatthew Dillon 	hammer2_spin_unex(&parent->core.spin);
12220dea3156SMatthew Dillon 
12236aaf5cb0SMatthew Dillon 	hammer2_chain_ref_hold(parent);
12240dea3156SMatthew Dillon 	hammer2_chain_unlock(parent);
12250dea3156SMatthew Dillon 	hammer2_chain_lock(child, HAMMER2_RESOLVE_MAYBE);
1226fae225dcSMatthew Dillon 	if (child->parent != parent) {
1227fae225dcSMatthew Dillon 		kprintf("LOST CHILD1 %p->%p (actual parent %p)\n",
1228fae225dcSMatthew Dillon 			parent, child, child->parent);
1229fae225dcSMatthew Dillon 		goto done;
1230fae225dcSMatthew Dillon 	}
123165cacacfSMatthew Dillon 	if (child->error) {
123265cacacfSMatthew Dillon 		kprintf("CHILD ERROR DURING FLUSH LOCK %p->%p\n",
123365cacacfSMatthew Dillon 			parent, child);
123465cacacfSMatthew Dillon 		info->error |= child->error;
123565cacacfSMatthew Dillon 		goto done;
123665cacacfSMatthew Dillon 	}
12370dea3156SMatthew Dillon 
123803faa7d5SMatthew Dillon 	/*
12398bbe5025SMatthew Dillon 	 * Must propagate the DESTROY flag downwards, otherwise the
12408bbe5025SMatthew Dillon 	 * parent could end up never being removed because it will
12418bbe5025SMatthew Dillon 	 * be requeued to the flusher if it survives this run due to
12428bbe5025SMatthew Dillon 	 * the flag.
12438bbe5025SMatthew Dillon 	 */
12448bbe5025SMatthew Dillon 	if (parent && (parent->flags & HAMMER2_CHAIN_DESTROY))
12458bbe5025SMatthew Dillon 		atomic_set_int(&child->flags, HAMMER2_CHAIN_DESTROY);
12466aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
12476aaf5cb0SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_DESTROY)
12486aaf5cb0SMatthew Dillon 		++info->scan_del_count;
12496aaf5cb0SMatthew Dillon #endif
1250ecfe89b8SMatthew Dillon 	/*
1251ecfe89b8SMatthew Dillon 	 * Special handling of the root inode.  Because the root inode
1252ecfe89b8SMatthew Dillon 	 * contains an index of all the inodes in the PFS in addition to
1253ecfe89b8SMatthew Dillon 	 * its normal directory entries, any flush that is not part of a
1254ecfe89b8SMatthew Dillon 	 * filesystem sync must only flush the directory entries, and not
1255ecfe89b8SMatthew Dillon 	 * anything else.
1256ecfe89b8SMatthew Dillon 	 *
1257ecfe89b8SMatthew Dillon 	 * The child might be an indirect block, but H2 guarantees that
1258ecfe89b8SMatthew Dillon 	 * the key-range will fully partition the inode index from the
1259ecfe89b8SMatthew Dillon 	 * directory entries so the case just works naturally.
1260ecfe89b8SMatthew Dillon 	 */
1261ecfe89b8SMatthew Dillon 	if ((parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) &&
1262ecfe89b8SMatthew Dillon 	    (child->flags & HAMMER2_CHAIN_DESTROY) == 0 &&
1263ecfe89b8SMatthew Dillon 	    parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
1264ecfe89b8SMatthew Dillon 	    (info->flags & HAMMER2_FLUSH_FSSYNC) == 0) {
1265ecfe89b8SMatthew Dillon 		if ((child->bref.key & HAMMER2_DIRHASH_VISIBLE) == 0) {
1266ecfe89b8SMatthew Dillon 			if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) {
1267ecfe89b8SMatthew Dillon 				hammer2_chain_setflush(parent);
1268ecfe89b8SMatthew Dillon 			}
12696f445d15SMatthew Dillon 			kprintf("inum %ld do not dive root inode\n",
12706f445d15SMatthew Dillon 				(long)parent->bref.key);
1271ecfe89b8SMatthew Dillon 			goto done;
1272ecfe89b8SMatthew Dillon 		}
1273ecfe89b8SMatthew Dillon 	}
12748bbe5025SMatthew Dillon 
12758bbe5025SMatthew Dillon 	/*
1276e513e77eSMatthew Dillon 	 * Recurse and collect deferral data.  We're in the media flush,
1277e513e77eSMatthew Dillon 	 * this can cross PFS boundaries.
127803faa7d5SMatthew Dillon 	 */
1279da6f36f4SMatthew Dillon 	if (child->flags & HAMMER2_CHAIN_FLUSH_MASK) {
12806aaf5cb0SMatthew Dillon #ifdef HAMMER2_SCAN_DEBUG
12816aaf5cb0SMatthew Dillon 		if (child->bref.type < 7)
12826aaf5cb0SMatthew Dillon 			++info->scan_btype[child->bref.type];
12836aaf5cb0SMatthew Dillon #endif
12840dea3156SMatthew Dillon 		++info->depth;
128553f84d31SMatthew Dillon 		hammer2_flush_core(info, child, info->flags);
12860dea3156SMatthew Dillon 		--info->depth;
1287850687d2SMatthew Dillon 	} else if (hammer2_debug & 0x200) {
1288850687d2SMatthew Dillon 		if (info->debug == NULL)
1289850687d2SMatthew Dillon 			info->debug = child;
1290850687d2SMatthew Dillon 		++info->depth;
129153f84d31SMatthew Dillon 		hammer2_flush_core(info, child, info->flags);
1292850687d2SMatthew Dillon 		--info->depth;
1293850687d2SMatthew Dillon 		if (info->debug == child)
1294850687d2SMatthew Dillon 			info->debug = NULL;
12958138a154SMatthew Dillon 	}
12960dea3156SMatthew Dillon 
1297fae225dcSMatthew Dillon done:
1298a4dc31e0SMatthew Dillon 	/*
129965cacacfSMatthew Dillon 	 * Relock to continue the loop.
1300a4dc31e0SMatthew Dillon 	 */
1301a4dc31e0SMatthew Dillon 	hammer2_chain_unlock(child);
1302ea155208SMatthew Dillon 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_MAYBE);
13036aaf5cb0SMatthew Dillon 	hammer2_chain_drop_unhold(parent);
130465cacacfSMatthew Dillon 	if (parent->error) {
130565cacacfSMatthew Dillon 		kprintf("PARENT ERROR DURING FLUSH LOCK %p->%p\n",
130665cacacfSMatthew Dillon 			parent, child);
130765cacacfSMatthew Dillon 		info->error |= parent->error;
130865cacacfSMatthew Dillon 	}
1309a4dc31e0SMatthew Dillon 	hammer2_chain_drop(child);
1310a4dc31e0SMatthew Dillon 	KKASSERT(info->parent == parent);
131194491fa0SMatthew Dillon 	hammer2_spin_ex(&parent->core.spin);
13120dea3156SMatthew Dillon 
13130dea3156SMatthew Dillon 	return (0);
13140dea3156SMatthew Dillon }
131512ff971cSMatthew Dillon 
131653f84d31SMatthew Dillon /*
131712ff971cSMatthew Dillon  * flush helper (backend threaded)
131812ff971cSMatthew Dillon  *
131940498d1cSMatthew Dillon  * Flushes chain topology for the specified inode.
132040498d1cSMatthew Dillon  *
1321ecfe89b8SMatthew Dillon  * HAMMER2_XOP_INODE_STOP	The flush recursion stops at inode boundaries.
1322ecfe89b8SMatthew Dillon  *				Inodes belonging to the same flush are flushed
1323ecfe89b8SMatthew Dillon  *				separately.
132440498d1cSMatthew Dillon  *
1325ecfe89b8SMatthew Dillon  * chain->parent can be NULL, usually due to destroy races or detached inodes.
132612ff971cSMatthew Dillon  *
132712ff971cSMatthew Dillon  * Primarily called from vfs_sync().
132812ff971cSMatthew Dillon  */
132912ff971cSMatthew Dillon void
1330c4421f07SMatthew Dillon hammer2_xop_inode_flush(hammer2_xop_t *arg, void *scratch __unused, int clindex)
133112ff971cSMatthew Dillon {
133212ff971cSMatthew Dillon 	hammer2_xop_flush_t *xop = &arg->xop_flush;
133312ff971cSMatthew Dillon 	hammer2_chain_t *chain;
13346f445d15SMatthew Dillon 	hammer2_inode_t *ip;
133512ff971cSMatthew Dillon 	hammer2_dev_t *hmp;
13366f445d15SMatthew Dillon 	hammer2_pfs_t *pmp;
133765cacacfSMatthew Dillon 	int flush_error = 0;
133865cacacfSMatthew Dillon 	int fsync_error = 0;
133912ff971cSMatthew Dillon 	int total_error = 0;
134012ff971cSMatthew Dillon 	int j;
134140498d1cSMatthew Dillon 	int xflags;
134240498d1cSMatthew Dillon 	int ispfsroot = 0;
134340498d1cSMatthew Dillon 
134440498d1cSMatthew Dillon 	xflags = HAMMER2_FLUSH_TOP;
134540498d1cSMatthew Dillon 	if (xop->head.flags & HAMMER2_XOP_INODE_STOP)
134640498d1cSMatthew Dillon 		xflags |= HAMMER2_FLUSH_INODE_STOP;
1347ecfe89b8SMatthew Dillon 	if (xop->head.flags & HAMMER2_XOP_FSSYNC)
1348ecfe89b8SMatthew Dillon 		xflags |= HAMMER2_FLUSH_FSSYNC;
134912ff971cSMatthew Dillon 
135012ff971cSMatthew Dillon 	/*
135112ff971cSMatthew Dillon 	 * Flush core chains
135212ff971cSMatthew Dillon 	 */
13536f445d15SMatthew Dillon 	ip = xop->head.ip1;
13546f445d15SMatthew Dillon 	pmp = ip->pmp;
13556f445d15SMatthew Dillon 	chain = hammer2_inode_chain(ip, clindex, HAMMER2_RESOLVE_ALWAYS);
135612ff971cSMatthew Dillon 	if (chain) {
135712ff971cSMatthew Dillon 		hmp = chain->hmp;
1358ecfe89b8SMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_FLUSH_MASK) {
1359ecfe89b8SMatthew Dillon 			/*
1360ecfe89b8SMatthew Dillon 			 * Due to flush partitioning the chain topology
1361ecfe89b8SMatthew Dillon 			 * above the inode's chain may no longer be flagged.
1362ecfe89b8SMatthew Dillon 			 * When asked to flush an inode, remark the topology
1363ecfe89b8SMatthew Dillon 			 * leading to that inode.
1364ecfe89b8SMatthew Dillon 			 */
1365ecfe89b8SMatthew Dillon 			if (chain->parent)
1366ecfe89b8SMatthew Dillon 				hammer2_chain_setflush(chain->parent);
136740498d1cSMatthew Dillon 			hammer2_flush(chain, xflags);
1368ecfe89b8SMatthew Dillon 
13696f445d15SMatthew Dillon 			/* XXX cluster */
13706f445d15SMatthew Dillon 			if (ip == pmp->iroot && pmp != hmp->spmp) {
13716f445d15SMatthew Dillon 				hammer2_spin_ex(&pmp->inum_spin);
13726f445d15SMatthew Dillon 				pmp->pfs_iroot_blocksets[clindex] =
13736f445d15SMatthew Dillon 					chain->data->ipdata.u.blockset;
13746f445d15SMatthew Dillon 				hammer2_spin_unex(&pmp->inum_spin);
13756f445d15SMatthew Dillon 			}
13766f445d15SMatthew Dillon 
1377ecfe89b8SMatthew Dillon #if 0
1378ecfe89b8SMatthew Dillon 			/*
1379ecfe89b8SMatthew Dillon 			 * Propogate upwards but only cross an inode boundary
1380ecfe89b8SMatthew Dillon 			 * for inodes associated with the current filesystem
1381ecfe89b8SMatthew Dillon 			 * sync.
1382ecfe89b8SMatthew Dillon 			 */
1383ecfe89b8SMatthew Dillon 			if ((xop->head.flags & HAMMER2_XOP_PARENTONFLUSH) ||
1384ecfe89b8SMatthew Dillon 			    chain->bref.type != HAMMER2_BREF_TYPE_INODE) {
138512ff971cSMatthew Dillon 				parent = chain->parent;
138640498d1cSMatthew Dillon 				if (parent)
138712ff971cSMatthew Dillon 					hammer2_chain_setflush(parent);
138812ff971cSMatthew Dillon 			}
1389ecfe89b8SMatthew Dillon #endif
1390ecfe89b8SMatthew Dillon 		}
139140498d1cSMatthew Dillon 		if (chain->flags & HAMMER2_CHAIN_PFSBOUNDARY)
139240498d1cSMatthew Dillon 			ispfsroot = 1;
139312ff971cSMatthew Dillon 		hammer2_chain_unlock(chain);
139412ff971cSMatthew Dillon 		hammer2_chain_drop(chain);
139512ff971cSMatthew Dillon 		chain = NULL;
139612ff971cSMatthew Dillon 	} else {
139712ff971cSMatthew Dillon 		hmp = NULL;
139812ff971cSMatthew Dillon 	}
139912ff971cSMatthew Dillon 
140012ff971cSMatthew Dillon 	/*
140165c894ffSMatthew Dillon 	 * Only flush the volume header if asked to, plus the inode must also
140265c894ffSMatthew Dillon 	 * be the PFS root.
140340498d1cSMatthew Dillon 	 */
140465c894ffSMatthew Dillon 	if ((xop->head.flags & HAMMER2_XOP_VOLHDR) == 0)
140565c894ffSMatthew Dillon 		goto skip;
140640498d1cSMatthew Dillon 	if (ispfsroot == 0)
140740498d1cSMatthew Dillon 		goto skip;
140840498d1cSMatthew Dillon 
140940498d1cSMatthew Dillon 	/*
141012ff971cSMatthew Dillon 	 * Flush volume roots.  Avoid replication, we only want to
141112ff971cSMatthew Dillon 	 * flush each hammer2_dev (hmp) once.
141212ff971cSMatthew Dillon 	 */
1413c4421f07SMatthew Dillon 	for (j = clindex - 1; j >= 0; --j) {
14146f445d15SMatthew Dillon 		if ((chain = ip->cluster.array[j].chain) != NULL) {
141512ff971cSMatthew Dillon 			if (chain->hmp == hmp) {
141612ff971cSMatthew Dillon 				chain = NULL;	/* safety */
141712ff971cSMatthew Dillon 				goto skip;
141812ff971cSMatthew Dillon 			}
141912ff971cSMatthew Dillon 		}
142012ff971cSMatthew Dillon 	}
142112ff971cSMatthew Dillon 	chain = NULL;	/* safety */
142212ff971cSMatthew Dillon 
142312ff971cSMatthew Dillon 	/*
142412ff971cSMatthew Dillon 	 * spmp transaction.  The super-root is never directly mounted so
142512ff971cSMatthew Dillon 	 * there shouldn't be any vnodes, let alone any dirty vnodes
142653f84d31SMatthew Dillon 	 * associated with it, so we shouldn't have to mess around with any
142753f84d31SMatthew Dillon 	 * vnode flushes here.
142812ff971cSMatthew Dillon 	 */
142912ff971cSMatthew Dillon 	hammer2_trans_init(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
143012ff971cSMatthew Dillon 
143112ff971cSMatthew Dillon 	/*
14326f445d15SMatthew Dillon 	 * We must flush the superroot down to the PFS iroot.  Remember
14336f445d15SMatthew Dillon 	 * that hammer2_chain_setflush() stops at inode boundaries, so
14346f445d15SMatthew Dillon 	 * the pmp->iroot has been flushed and flagged down to the superroot,
14356f445d15SMatthew Dillon 	 * but the volume root (vchain) probably has not yet been flagged.
14366f445d15SMatthew Dillon 	 */
14376f445d15SMatthew Dillon 	if (hmp->spmp->iroot) {
14386f445d15SMatthew Dillon 		chain = hmp->spmp->iroot->cluster.array[0].chain;
14396f445d15SMatthew Dillon 		if (chain) {
14406f445d15SMatthew Dillon 			hammer2_chain_ref(chain);
14416f445d15SMatthew Dillon 			hammer2_chain_lock(chain, HAMMER2_RESOLVE_ALWAYS);
14426f445d15SMatthew Dillon 			flush_error |=
14436f445d15SMatthew Dillon 				hammer2_flush(chain,
14446f445d15SMatthew Dillon 					      HAMMER2_FLUSH_TOP |
14456f445d15SMatthew Dillon 					      HAMMER2_FLUSH_INODE_STOP |
14466f445d15SMatthew Dillon 					      HAMMER2_FLUSH_FSSYNC);
14476f445d15SMatthew Dillon 			hammer2_chain_unlock(chain);
14486f445d15SMatthew Dillon 			hammer2_chain_drop(chain);
14496f445d15SMatthew Dillon 		}
14506f445d15SMatthew Dillon 	}
14516f445d15SMatthew Dillon 
14526f445d15SMatthew Dillon 	/*
145312ff971cSMatthew Dillon 	 * Media mounts have two 'roots', vchain for the topology
145412ff971cSMatthew Dillon 	 * and fchain for the free block table.  Flush both.
145512ff971cSMatthew Dillon 	 *
145612ff971cSMatthew Dillon 	 * Note that the topology and free block table are handled
145712ff971cSMatthew Dillon 	 * independently, so the free block table can wind up being
145812ff971cSMatthew Dillon 	 * ahead of the topology.  We depend on the bulk free scan
145912ff971cSMatthew Dillon 	 * code to deal with any loose ends.
146065cacacfSMatthew Dillon 	 *
146165cacacfSMatthew Dillon 	 * vchain and fchain do not error on-lock since their data does
146265cacacfSMatthew Dillon 	 * not have to be re-read from media.
146312ff971cSMatthew Dillon 	 */
146412ff971cSMatthew Dillon 	hammer2_chain_ref(&hmp->vchain);
146512ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
146612ff971cSMatthew Dillon 	hammer2_chain_ref(&hmp->fchain);
146712ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->fchain, HAMMER2_RESOLVE_ALWAYS);
146812ff971cSMatthew Dillon 	if (hmp->fchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
146912ff971cSMatthew Dillon 		/*
147012ff971cSMatthew Dillon 		 * This will also modify vchain as a side effect,
147112ff971cSMatthew Dillon 		 * mark vchain as modified now.
147212ff971cSMatthew Dillon 		 */
147312ff971cSMatthew Dillon 		hammer2_voldata_modify(hmp);
147412ff971cSMatthew Dillon 		chain = &hmp->fchain;
147565cacacfSMatthew Dillon 		flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
147612ff971cSMatthew Dillon 		KKASSERT(chain == &hmp->fchain);
147712ff971cSMatthew Dillon 	}
147812ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->fchain);
147912ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->vchain);
148012ff971cSMatthew Dillon 	hammer2_chain_drop(&hmp->fchain);
148112ff971cSMatthew Dillon 	/* vchain dropped down below */
148212ff971cSMatthew Dillon 
148312ff971cSMatthew Dillon 	hammer2_chain_lock(&hmp->vchain, HAMMER2_RESOLVE_ALWAYS);
148412ff971cSMatthew Dillon 	if (hmp->vchain.flags & HAMMER2_CHAIN_FLUSH_MASK) {
148512ff971cSMatthew Dillon 		chain = &hmp->vchain;
148665cacacfSMatthew Dillon 		flush_error |= hammer2_flush(chain, HAMMER2_FLUSH_TOP);
148712ff971cSMatthew Dillon 		KKASSERT(chain == &hmp->vchain);
148812ff971cSMatthew Dillon 	}
148912ff971cSMatthew Dillon 	hammer2_chain_unlock(&hmp->vchain);
149012ff971cSMatthew Dillon 	hammer2_chain_drop(&hmp->vchain);
149112ff971cSMatthew Dillon 
149212ff971cSMatthew Dillon 	/*
149312ff971cSMatthew Dillon 	 * We can't safely flush the volume header until we have
149412ff971cSMatthew Dillon 	 * flushed any device buffers which have built up.
149512ff971cSMatthew Dillon 	 *
149612ff971cSMatthew Dillon 	 * XXX this isn't being incremental
149712ff971cSMatthew Dillon 	 */
149812ff971cSMatthew Dillon 	vn_lock(hmp->devvp, LK_EXCLUSIVE | LK_RETRY);
149965cacacfSMatthew Dillon 	fsync_error = VOP_FSYNC(hmp->devvp, MNT_WAIT, 0);
150012ff971cSMatthew Dillon 	vn_unlock(hmp->devvp);
150165cacacfSMatthew Dillon 	if (fsync_error || flush_error) {
150265cacacfSMatthew Dillon 		kprintf("hammer2: sync error fsync=%d h2flush=0x%04x dev=%s\n",
150365cacacfSMatthew Dillon 			fsync_error, flush_error, hmp->devrepname);
150465cacacfSMatthew Dillon 	}
150512ff971cSMatthew Dillon 
150612ff971cSMatthew Dillon 	/*
150712ff971cSMatthew Dillon 	 * The flush code sets CHAIN_VOLUMESYNC to indicate that the
150812ff971cSMatthew Dillon 	 * volume header needs synchronization via hmp->volsync.
150912ff971cSMatthew Dillon 	 *
151012ff971cSMatthew Dillon 	 * XXX synchronize the flag & data with only this flush XXX
151112ff971cSMatthew Dillon 	 */
151265cacacfSMatthew Dillon 	if (fsync_error == 0 && flush_error == 0 &&
151312ff971cSMatthew Dillon 	    (hmp->vchain.flags & HAMMER2_CHAIN_VOLUMESYNC)) {
151412ff971cSMatthew Dillon 		struct buf *bp;
151565cacacfSMatthew Dillon 		int vol_error = 0;
151612ff971cSMatthew Dillon 
151712ff971cSMatthew Dillon 		/*
151812ff971cSMatthew Dillon 		 * Synchronize the disk before flushing the volume
151912ff971cSMatthew Dillon 		 * header.
152012ff971cSMatthew Dillon 		 */
152112ff971cSMatthew Dillon 		bp = getpbuf(NULL);
152212ff971cSMatthew Dillon 		bp->b_bio1.bio_offset = 0;
152312ff971cSMatthew Dillon 		bp->b_bufsize = 0;
152412ff971cSMatthew Dillon 		bp->b_bcount = 0;
152512ff971cSMatthew Dillon 		bp->b_cmd = BUF_CMD_FLUSH;
152612ff971cSMatthew Dillon 		bp->b_bio1.bio_done = biodone_sync;
152712ff971cSMatthew Dillon 		bp->b_bio1.bio_flags |= BIO_SYNC;
152812ff971cSMatthew Dillon 		vn_strategy(hmp->devvp, &bp->b_bio1);
152965cacacfSMatthew Dillon 		fsync_error = biowait(&bp->b_bio1, "h2vol");
153012ff971cSMatthew Dillon 		relpbuf(bp, NULL);
153112ff971cSMatthew Dillon 
153212ff971cSMatthew Dillon 		/*
153312ff971cSMatthew Dillon 		 * Then we can safely flush the version of the
153412ff971cSMatthew Dillon 		 * volume header synchronized by the flush code.
153512ff971cSMatthew Dillon 		 */
153612ff971cSMatthew Dillon 		j = hmp->volhdrno + 1;
153719808ac9SMatthew Dillon 		if (j < 0)
153819808ac9SMatthew Dillon 			j = 0;
153912ff971cSMatthew Dillon 		if (j >= HAMMER2_NUM_VOLHDRS)
154012ff971cSMatthew Dillon 			j = 0;
154112ff971cSMatthew Dillon 		if (j * HAMMER2_ZONE_BYTES64 + HAMMER2_SEGSIZE >
154212ff971cSMatthew Dillon 		    hmp->volsync.volu_size) {
154312ff971cSMatthew Dillon 			j = 0;
154412ff971cSMatthew Dillon 		}
15455d37f96dSMatthew Dillon 		if (hammer2_debug & 0x8000) {
15465d37f96dSMatthew Dillon 			/* debug only, avoid syslogd loop */
154712ff971cSMatthew Dillon 			kprintf("sync volhdr %d %jd\n",
154812ff971cSMatthew Dillon 				j, (intmax_t)hmp->volsync.volu_size);
15495d37f96dSMatthew Dillon 		}
155012ff971cSMatthew Dillon 		bp = getblk(hmp->devvp, j * HAMMER2_ZONE_BYTES64,
155104b8e839SMatthew Dillon 			    HAMMER2_PBUFSIZE, GETBLK_KVABIO, 0);
155212ff971cSMatthew Dillon 		atomic_clear_int(&hmp->vchain.flags,
155312ff971cSMatthew Dillon 				 HAMMER2_CHAIN_VOLUMESYNC);
155404b8e839SMatthew Dillon 		bkvasync(bp);
155512ff971cSMatthew Dillon 		bcopy(&hmp->volsync, bp->b_data, HAMMER2_PBUFSIZE);
155665cacacfSMatthew Dillon 		vol_error = bwrite(bp);
155712ff971cSMatthew Dillon 		hmp->volhdrno = j;
155865cacacfSMatthew Dillon 		if (vol_error)
155965cacacfSMatthew Dillon 			fsync_error = vol_error;
156012ff971cSMatthew Dillon 	}
156165cacacfSMatthew Dillon 	if (flush_error)
156265cacacfSMatthew Dillon 		total_error = flush_error;
156365cacacfSMatthew Dillon 	if (fsync_error)
156465cacacfSMatthew Dillon 		total_error = hammer2_errno_to_error(fsync_error);
156512ff971cSMatthew Dillon 
1566ecfe89b8SMatthew Dillon 	/* spmp trans */
1567ecfe89b8SMatthew Dillon 	hammer2_trans_done(hmp->spmp, HAMMER2_TRANS_ISFLUSH);
156812ff971cSMatthew Dillon skip:
1569c4421f07SMatthew Dillon 	hammer2_xop_feed(&xop->head, NULL, clindex, total_error);
157012ff971cSMatthew Dillon }
1571