xref: /dragonfly/sys/vfs/hammer2/hammer2_flush.c (revision 20c2db9a)
1 /*
2  * Copyright (c) 2011-2012 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 
36 #include <sys/cdefs.h>
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/types.h>
40 #include <sys/lock.h>
41 #include <sys/uuid.h>
42 
43 #include "hammer2.h"
44 
45 /*
46  * Recursively flush the specified chain.  The chain is locked and
47  * referenced by the caller and will remain so on return.  The chain
48  * will remain referenced throughout but can temporarily lose its
49  * lock during the recursion to avoid unnecessarily stalling user
50  * processes.
51  */
52 struct hammer2_flush_info {
53 	struct flush_deferral_list flush_list;
54 	int		depth;
55 	hammer2_tid_t	modify_tid;
56 };
57 
58 typedef struct hammer2_flush_info hammer2_flush_info_t;
59 
60 static void hammer2_chain_flush_pass1(hammer2_mount_t *hmp,
61 			hammer2_chain_t *chain, hammer2_flush_info_t *info);
62 static void hammer2_saved_child_cleanup(hammer2_mount_t *hmp,
63 			hammer2_chain_t *parent, hammer2_chain_t *child);
64 
65 /*
66  * Stand-alone flush.  If the chain is unable to completely flush we have
67  * to be sure that SUBMODIFIED propagates up the parent chain.  We must not
68  * clear the MOVED bit after flushing in this situation or our desynchronized
69  * bref will not properly update in the parent.
70  *
71  * This routine can be called from several places but the most important
72  * is from the hammer2_vop_reclaim() function.  We want to try to completely
73  * clean out the inode structure to prevent disconnected inodes from
74  * building up and blowing out the kmalloc pool.
75  *
76  * If modify_tid is 0 (usual case), a new modify_tid is allocated and
77  * applied to the flush.  The depth-limit handling code is the only
78  * code which passes a non-zero modify_tid to hammer2_chain_flush().
79  *
80  * chain is locked on call and will remain locked on return.
81  */
82 void
83 hammer2_chain_flush(hammer2_mount_t *hmp, hammer2_chain_t *chain,
84 		    hammer2_tid_t modify_tid)
85 {
86 	hammer2_chain_t *parent;
87 	hammer2_chain_t *scan;
88 	hammer2_blockref_t *base;
89 	hammer2_flush_info_t info;
90 	int count;
91 	int reflush;
92 
93 	/*
94 	 * Execute the recursive flush and handle deferrals.
95 	 *
96 	 * Chains can be ridiculously long (thousands deep), so to
97 	 * avoid blowing out the kernel stack the recursive flush has a
98 	 * depth limit.  Elements at the limit are placed on a list
99 	 * for re-execution after the stack has been popped.
100 	 */
101 	bzero(&info, sizeof(info));
102 	TAILQ_INIT(&info.flush_list);
103 
104 	if (modify_tid == 0) {
105 		hammer2_voldata_lock(hmp);
106 		info.modify_tid = hmp->voldata.alloc_tid++;
107 		atomic_set_int(&hmp->vchain.flags, HAMMER2_CHAIN_MODIFIED_AUX);
108 		hammer2_voldata_unlock(hmp);
109 	} else {
110 		info.modify_tid = modify_tid;
111 	}
112 	reflush = 1;
113 
114 	while (reflush) {
115 		/*
116 		 * Primary recursion
117 		 */
118 		hammer2_chain_flush_pass1(hmp, chain, &info);
119 		reflush = 0;
120 
121 		while ((scan = TAILQ_FIRST(&info.flush_list)) != NULL) {
122 			/*
123 			 * Secondary recursion.  Note that a reference is
124 			 * retained from the element's presence on the
125 			 * deferral list.
126 			 */
127 			KKASSERT(scan->flags & HAMMER2_CHAIN_DEFERRED);
128 			TAILQ_REMOVE(&info.flush_list, scan, flush_node);
129 			atomic_clear_int(&scan->flags, HAMMER2_CHAIN_DEFERRED);
130 
131 			/*
132 			 * Now that we've popped back up we can do a secondary
133 			 * recursion on the deferred elements.
134 			 */
135 			if (hammer2_debug & 0x0040)
136 				kprintf("defered flush %p\n", scan);
137 			hammer2_chain_lock(hmp, scan, HAMMER2_RESOLVE_MAYBE);
138 			hammer2_chain_flush(hmp, scan, info.modify_tid);
139 			hammer2_chain_unlock(hmp, scan);
140 
141 			/*
142 			 * Only flag a reflush if SUBMODIFIED is no longer
143 			 * set.  If SUBMODIFIED is set the element will just
144 			 * wind up on our flush_list again.
145 			 */
146 			if ((scan->flags & (HAMMER2_CHAIN_SUBMODIFIED |
147 					    HAMMER2_CHAIN_MODIFIED |
148 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
149 				reflush = 1;
150 			}
151 			hammer2_chain_drop(hmp, scan);
152 		}
153 		if ((hammer2_debug & 0x0040) && reflush)
154 			kprintf("reflush %p\n", chain);
155 	}
156 
157 	/*
158 	 * The SUBMODIFIED bit must propagate upward if the chain could not
159 	 * be completely flushed.
160 	 */
161 	if (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
162 			    HAMMER2_CHAIN_MODIFIED |
163 			    HAMMER2_CHAIN_MODIFIED_AUX |
164 			    HAMMER2_CHAIN_MOVED)) {
165 		hammer2_chain_parent_setsubmod(hmp, chain);
166 	}
167 
168 	/*
169 	 * If the only thing left is a simple bref update try to
170 	 * pro-actively update the parent, otherwise return early.
171 	 */
172 	parent = chain->parent;
173 	if (parent == NULL) {
174 		return;
175 	}
176 	if (chain->bref.type != HAMMER2_BREF_TYPE_INODE ||
177 	    (chain->flags & (HAMMER2_CHAIN_SUBMODIFIED |
178 			     HAMMER2_CHAIN_MODIFIED |
179 			     HAMMER2_CHAIN_MODIFIED_AUX |
180 			     HAMMER2_CHAIN_MOVED)) != HAMMER2_CHAIN_MOVED) {
181 		return;
182 	}
183 
184 	/*
185 	 * We are locking backwards so allow the lock to fail.
186 	 */
187 	if (ccms_thread_lock_nonblock(&parent->cst, CCMS_STATE_EXCLUSIVE))
188 		return;
189 
190 	/*
191 	 * We are updating brefs but we have to call chain_modify()
192 	 * because our caller is not being run from a recursive flush.
193 	 *
194 	 * This will also chain up the parent list and set the SUBMODIFIED
195 	 * flag.
196 	 *
197 	 * We do not want to set HAMMER2_CHAIN_MODIFY_TID here because the
198 	 * modification is only related to updating a bref in the parent.
199 	 *
200 	 * When updating the blockset embedded in the volume header we must
201 	 * also update voldata.mirror_tid.
202 	 */
203 	hammer2_chain_lock(hmp, parent, HAMMER2_RESOLVE_MAYBE);
204 	hammer2_chain_modify(hmp, parent, HAMMER2_MODIFY_NO_MODIFY_TID);
205 
206 	switch(parent->bref.type) {
207 	case HAMMER2_BREF_TYPE_INODE:
208 		base = &parent->data->ipdata.u.blockset.
209 			blockref[0];
210 		count = HAMMER2_SET_COUNT;
211 		break;
212 	case HAMMER2_BREF_TYPE_INDIRECT:
213 		base = &parent->data->npdata.blockref[0];
214 		count = parent->bytes /
215 			sizeof(hammer2_blockref_t);
216 		break;
217 	case HAMMER2_BREF_TYPE_VOLUME:
218 		base = &hmp->voldata.sroot_blockset.blockref[0];
219 		count = HAMMER2_SET_COUNT;
220 		if (chain->flags & HAMMER2_CHAIN_MOVED) {
221 			if (hmp->voldata.mirror_tid < chain->bref.mirror_tid) {
222 				hmp->voldata.mirror_tid =
223 					chain->bref.mirror_tid;
224 			}
225 		}
226 		break;
227 	default:
228 		base = NULL;
229 		panic("hammer2_chain_flush: "
230 		      "unrecognized blockref type: %d",
231 		      parent->bref.type);
232 	}
233 
234 	/*
235 	 * Update the blockref in the parent.  We do not have to set
236 	 * MOVED in the parent because the parent has been marked modified,
237 	 * so the flush sequence will pick up the bref change.
238 	 *
239 	 * We do have to propagate mirror_tid upward.
240 	 */
241 	KKASSERT(chain->index >= 0 &&
242 		 chain->index < count);
243 	KKASSERT(chain->parent == parent);
244 	if (chain->flags & HAMMER2_CHAIN_MOVED) {
245 		base[chain->index] = chain->bref_flush;
246 		if (parent->bref.mirror_tid < chain->bref_flush.mirror_tid)
247 			parent->bref.mirror_tid = chain->bref_flush.mirror_tid;
248 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MOVED);
249 		hammer2_chain_drop(hmp, chain);
250 	} else if (bcmp(&base[chain->index], &chain->bref_flush,
251 		   sizeof(chain->bref)) != 0) {
252 		panic("hammer2: unflagged bref update(2)");
253 	}
254 	ccms_thread_unlock(&parent->cst);		/* release manual op */
255 	hammer2_chain_unlock(hmp, parent);
256 }
257 
258 /*
259  * chain is locked by the caller and remains locked on return.
260  */
261 static void
262 hammer2_chain_flush_pass1(hammer2_mount_t *hmp, hammer2_chain_t *chain,
263 			  hammer2_flush_info_t *info)
264 {
265 	hammer2_blockref_t *bref;
266 	hammer2_off_t pbase;
267 	size_t bbytes;
268 	size_t boff;
269 	char *bdata;
270 	struct buf *bp;
271 	int error;
272 	int wasmodified;
273 
274 	/*
275 	 * If we hit the stack recursion depth limit defer the operation.
276 	 * The controller of the info structure will execute the deferral
277 	 * list and then retry.
278 	 *
279 	 * This is only applicable if SUBMODIFIED is set.  After a reflush
280 	 * SUBMODIFIED will probably be cleared and we want to drop through
281 	 * to finish processing the current element so our direct parent
282 	 * can process the results.
283 	 */
284 	if (info->depth == HAMMER2_FLUSH_DEPTH_LIMIT &&
285 	    (chain->flags & HAMMER2_CHAIN_SUBMODIFIED)) {
286 		if ((chain->flags & HAMMER2_CHAIN_DEFERRED) == 0) {
287 			hammer2_chain_ref(hmp, chain);
288 			TAILQ_INSERT_TAIL(&info->flush_list,
289 					  chain, flush_node);
290 			atomic_set_int(&chain->flags, HAMMER2_CHAIN_DEFERRED);
291 		}
292 		return;
293 	}
294 
295 	if (hammer2_debug & 0x0008)
296 		kprintf("%*.*sCHAIN type=%d@%08jx %p/%d %04x {\n",
297 			info->depth, info->depth, "",
298 			chain->bref.type, chain->bref.data_off,
299 			chain, chain->refs, chain->flags);
300 
301 	/*
302 	 * If SUBMODIFIED is set we recurse the flush and adjust the
303 	 * blockrefs accordingly.
304 	 *
305 	 * NOTE: Looping on SUBMODIFIED can prevent a flush from ever
306 	 *	 finishing in the face of filesystem activity.
307 	 */
308 	if (chain->flags & HAMMER2_CHAIN_SUBMODIFIED) {
309 		hammer2_chain_t *child;
310 		hammer2_chain_t *saved;
311 		hammer2_blockref_t *base;
312 		int count;
313 
314 		/*
315 		 * Clear SUBMODIFIED to catch races.  Note that if any
316 		 * child has to be flushed SUBMODIFIED will wind up being
317 		 * set again (for next time), but this does not stop us from
318 		 * synchronizing block updates which occurred.
319 		 *
320 		 * We don't want to set our chain to MODIFIED gratuitously.
321 		 *
322 		 * We need an extra ref on chain because we are going to
323 		 * release its lock temporarily in our child loop.
324 		 */
325 		/* XXX SUBMODIFIED not interlocked, can race */
326 		atomic_clear_int(&chain->flags, HAMMER2_CHAIN_SUBMODIFIED);
327 		hammer2_chain_ref(hmp, chain);
328 
329 		/*
330 		 * Flush the children and update the blockrefs in the chain.
331 		 * Be careful of ripouts during the loop.
332 		 *
333 		 * The flushing counter prevents ripouts on lastdrop and
334 		 * also prevents moves (causes renames to sleep/retry).
335 		 * Be very careful with it.
336 		 */
337 		RB_FOREACH(child, hammer2_chain_tree, &chain->rbhead) {
338 			KASSERT(child->parent == chain,
339 				("hammer2_flush: child->parent mismatch %p/%p",
340 				 child->parent, chain));
341 
342 			/*
343 			 * We only recurse if SUBMODIFIED (internal node)
344 			 * or MODIFIED (internal node or leaf) is set.
345 			 * However, we must still track whether any MOVED
346 			 * entries are present to determine if the chain's
347 			 * blockref's need updating or not.
348 			 */
349 			if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED |
350 					     HAMMER2_CHAIN_MODIFIED |
351 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
352 				continue;
353 			}
354 
355 			/*
356 			 * flushing can only be adjusted while its parent
357 			 * is locked, and prevent the destruction/removal
358 			 * of the child from the parent's B-Tree.  This allows
359 			 * us to temporarily unlock the parent.
360 			 *
361 			 * To unwind, we must hold the parent locked before
362 			 * decrementing flushing to prevent child corruption
363 			 * during our loop.
364 			 */
365 			atomic_add_int(&child->flushing, 1);
366 			hammer2_chain_unlock(hmp, chain);
367 			hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_MAYBE);
368 			KASSERT(child->parent == chain,
369 				("hammer2_flush: child->parent mismatch %p/%p",
370 				 child->parent, chain));
371 			if ((child->flags & (HAMMER2_CHAIN_SUBMODIFIED |
372 					     HAMMER2_CHAIN_MODIFIED |
373 					    HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
374 				hammer2_chain_unlock(hmp, child);
375 				hammer2_chain_lock(hmp, chain,
376 						   HAMMER2_RESOLVE_ALWAYS);
377 				KKASSERT(child->parent == chain);
378 				atomic_add_int(&child->flushing, -1);
379 				continue;
380 			}
381 
382 			/*
383 			 * Propagate the DESTROYED flag if found set, then
384 			 * recurse the flush.
385 			 */
386 			if ((chain->flags & HAMMER2_CHAIN_DESTROYED) &&
387 			    (child->flags & HAMMER2_CHAIN_DESTROYED) == 0) {
388 				atomic_set_int(&child->flags,
389 					       HAMMER2_CHAIN_DESTROYED |
390 					       HAMMER2_CHAIN_SUBMODIFIED);
391 			}
392 			++info->depth;
393 			hammer2_chain_flush_pass1(hmp, child, info);
394 			--info->depth;
395 			hammer2_chain_unlock(hmp, child);
396 
397 			/*
398 			 * Always resolve when relocking the parent.
399 			 */
400 			hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
401 			KASSERT(child->parent == chain,
402 				("hammer2_flush: child->parent mismatch %p/%p",
403 				 child->parent, chain));
404 			atomic_add_int(&child->flushing, -1);
405 		}
406 
407 		/*
408 		 * Now synchronize any block updates and handle any
409 		 * chains marked DELETED.
410 		 *
411 		 * The flushing counter prevents ripouts on lastdrop and
412 		 * also prevents moves (causes renames to sleep/retry).
413 		 * Be very careful with it.
414 		 */
415 		saved = NULL;
416 		RB_FOREACH(child, hammer2_chain_tree, &chain->rbhead) {
417 			if ((child->flags & (HAMMER2_CHAIN_MOVED |
418 					     HAMMER2_CHAIN_DELETED)) == 0) {
419 				continue;
420 			}
421 			atomic_add_int(&child->flushing, 1);
422 			if (saved) {
423 				hammer2_saved_child_cleanup(hmp, chain, saved);
424 				saved = NULL;
425 			}
426 			saved = child;
427 			hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_NEVER);
428 			KKASSERT(child->parent == chain);
429 			if ((child->flags & (HAMMER2_CHAIN_MOVED |
430 					     HAMMER2_CHAIN_DELETED)) == 0) {
431 				hammer2_chain_unlock(hmp, child);
432 				continue;
433 			}
434 			if (child->flags & HAMMER2_CHAIN_MOVED) {
435 				hammer2_chain_modify(hmp, chain,
436 					     HAMMER2_MODIFY_NO_MODIFY_TID);
437 			}
438 
439 			switch(chain->bref.type) {
440 			case HAMMER2_BREF_TYPE_INODE:
441 				KKASSERT((chain->data->ipdata.op_flags &
442 					  HAMMER2_OPFLAG_DIRECTDATA) == 0);
443 				base = &chain->data->ipdata.u.blockset.
444 					blockref[0];
445 				count = HAMMER2_SET_COUNT;
446 				break;
447 			case HAMMER2_BREF_TYPE_INDIRECT:
448 				if (chain->data) {
449 					base = &chain->data->npdata.blockref[0];
450 				} else {
451 					base = NULL;
452 					KKASSERT(child->flags &
453 						 HAMMER2_CHAIN_DELETED);
454 				}
455 				count = chain->bytes /
456 					sizeof(hammer2_blockref_t);
457 				break;
458 			case HAMMER2_BREF_TYPE_VOLUME:
459 				base = &hmp->voldata.sroot_blockset.blockref[0];
460 				count = HAMMER2_SET_COUNT;
461 				break;
462 			default:
463 				base = NULL;
464 				panic("hammer2_chain_get: "
465 				      "unrecognized blockref type: %d",
466 				      chain->bref.type);
467 			}
468 
469 			KKASSERT(child->index >= 0);
470 
471 			if (chain->bref.mirror_tid <
472 			    child->bref_flush.mirror_tid) {
473 				chain->bref.mirror_tid =
474 					child->bref_flush.mirror_tid;
475 			}
476 			if (chain->bref.type == HAMMER2_BREF_TYPE_VOLUME &&
477 			    hmp->voldata.mirror_tid <
478 			    child->bref_flush.mirror_tid) {
479 				hmp->voldata.mirror_tid =
480 					child->bref_flush.mirror_tid;
481 			}
482 			if (child->flags & HAMMER2_CHAIN_DELETED) {
483 				bzero(&child->bref_flush,
484 				      sizeof(child->bref_flush));
485 			}
486 			if (base)
487 				base[child->index] = child->bref_flush;
488 			if (child->flags & HAMMER2_CHAIN_MOVED) {
489 				atomic_clear_int(&child->flags,
490 						 HAMMER2_CHAIN_MOVED);
491 				hammer2_chain_drop(hmp, child); /* flag */
492 			}
493 			hammer2_chain_unlock(hmp, child);
494 		}
495 		if (saved) {
496 			hammer2_saved_child_cleanup(hmp, chain, saved);
497 			saved = NULL;
498 		}
499 		hammer2_chain_drop(hmp, chain);
500 	}
501 
502 	/*
503 	 * If destroying the object we unconditonally clear the MODIFIED
504 	 * and MOVED bits, and we destroy the buffer without writing it
505 	 * out.
506 	 *
507 	 * We don't bother updating the hash/crc or the chain bref.
508 	 *
509 	 * NOTE: The destroy'd object's bref has already been updated.
510 	 *	 so we can clear MOVED without propagating mirror_tid
511 	 *	 or modify_tid upward.
512 	 *
513 	 * XXX allocations for unflushed data can be returned to the
514 	 *     free pool.
515 	 */
516 	if (chain->flags & HAMMER2_CHAIN_DESTROYED) {
517 		if (chain->flags & HAMMER2_CHAIN_MODIFIED) {
518 			if (chain->bp) {
519 				chain->bp->b_flags |= B_INVAL|B_RELBUF;
520 			}
521 			atomic_clear_int(&chain->flags,
522 					 HAMMER2_CHAIN_MODIFIED |
523 					 HAMMER2_CHAIN_MODIFY_TID);
524 			hammer2_chain_drop(hmp, chain);
525 		}
526 		if (chain->flags & HAMMER2_CHAIN_MODIFIED_AUX) {
527 			atomic_clear_int(&chain->flags,
528 					 HAMMER2_CHAIN_MODIFIED_AUX);
529 		}
530 		if (chain->flags & HAMMER2_CHAIN_MOVED) {
531 			atomic_clear_int(&chain->flags,
532 					 HAMMER2_CHAIN_MOVED);
533 			hammer2_chain_drop(hmp, chain);
534 		}
535 		return;
536 	}
537 
538 	/*
539 	 * Flush this chain entry only if it is marked modified.
540 	 */
541 	if ((chain->flags & (HAMMER2_CHAIN_MODIFIED |
542 			     HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
543 		goto done;
544 	}
545 
546 #if 0
547 	/*
548 	 * Synchronize cumulative data and inode count adjustments to
549 	 * the inode and propagate the deltas upward to the parent.
550 	 *
551 	 * XXX removed atm
552 	 */
553 	if (chain->bref.type == HAMMER2_BREF_TYPE_INODE) {
554 		hammer2_inode_t *ip;
555 
556 		ip = chain->u.ip;
557 		ip->ip_data.inode_count += ip->delta_icount;
558 		ip->ip_data.data_count += ip->delta_dcount;
559 		if (ip->pip) {
560 			ip->pip->delta_icount += ip->delta_icount;
561 			ip->pip->delta_dcount += ip->delta_dcount;
562 		}
563 		ip->delta_icount = 0;
564 		ip->delta_dcount = 0;
565 	}
566 #endif
567 
568 	/*
569 	 * Flush if MODIFIED or MODIFIED_AUX is set.  MODIFIED_AUX is only
570 	 * used by the volume header (&hmp->vchain).
571 	 */
572 	if ((chain->flags & (HAMMER2_CHAIN_MODIFIED |
573 			     HAMMER2_CHAIN_MODIFIED_AUX)) == 0) {
574 		goto done;
575 	}
576 	atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED_AUX);
577 
578 	/*
579 	 * Clear MODIFIED and set HAMMER2_CHAIN_MOVED.  The caller
580 	 * will re-test the MOVED bit.  We must also update the mirror_tid
581 	 * and modify_tid fields as appropriate.
582 	 *
583 	 * bits own a single chain ref and the MOVED bit owns its own
584 	 * chain ref.
585 	 */
586 	chain->bref.mirror_tid = info->modify_tid;
587 	if (chain->flags & HAMMER2_CHAIN_MODIFY_TID)
588 		chain->bref.modify_tid = info->modify_tid;
589 	wasmodified = (chain->flags & HAMMER2_CHAIN_MODIFIED) != 0;
590 	atomic_clear_int(&chain->flags, HAMMER2_CHAIN_MODIFIED |
591 					HAMMER2_CHAIN_MODIFY_TID);
592 
593 	if (chain->flags & HAMMER2_CHAIN_MOVED) {
594 		/*
595 		 * Drop the ref from the MODIFIED bit we cleared.
596 		 */
597 		if (wasmodified)
598 			hammer2_chain_drop(hmp, chain);
599 	} else {
600 		/*
601 		 * If we were MODIFIED we inherit the ref from clearing
602 		 * that bit, otherwise we need another ref.
603 		 */
604 		if (wasmodified == 0)
605 			hammer2_chain_ref(hmp, chain);
606 		atomic_set_int(&chain->flags, HAMMER2_CHAIN_MOVED);
607 	}
608 	chain->bref_flush = chain->bref;
609 
610 	/*
611 	 * If this is part of a recursive flush we can go ahead and write
612 	 * out the buffer cache buffer and pass a new bref back up the chain.
613 	 *
614 	 * This will never be a volume header.
615 	 */
616 	switch(chain->bref.type) {
617 	case HAMMER2_BREF_TYPE_VOLUME:
618 		/*
619 		 * The volume header is flushed manually by the syncer, not
620 		 * here.
621 		 */
622 		KKASSERT(chain->data != NULL);
623 		KKASSERT(chain->bp == NULL);
624 		kprintf("volume header mirror_tid %jd\n",
625 			hmp->voldata.mirror_tid);
626 
627 		hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT1]=
628 			hammer2_icrc32(
629 				(char *)&hmp->voldata +
630 				 HAMMER2_VOLUME_ICRC1_OFF,
631 				HAMMER2_VOLUME_ICRC1_SIZE);
632 		hmp->voldata.icrc_sects[HAMMER2_VOL_ICRC_SECT0]=
633 			hammer2_icrc32(
634 				(char *)&hmp->voldata +
635 				 HAMMER2_VOLUME_ICRC0_OFF,
636 				HAMMER2_VOLUME_ICRC0_SIZE);
637 		hmp->voldata.icrc_volheader =
638 			hammer2_icrc32(
639 				(char *)&hmp->voldata +
640 				 HAMMER2_VOLUME_ICRCVH_OFF,
641 				HAMMER2_VOLUME_ICRCVH_SIZE);
642 		hmp->volsync = hmp->voldata;
643 		break;
644 	case HAMMER2_BREF_TYPE_DATA:
645 		/*
646 		 * Data elements have already been flushed via the logical
647 		 * file buffer cache.  Their hash was set in the bref by
648 		 * the vop_write code.
649 		 *
650 		 * Make sure the buffer(s) have been flushed out here.
651 		 */
652 		bbytes = chain->bytes;
653 		pbase = chain->bref.data_off & ~(hammer2_off_t)(bbytes - 1);
654 		boff = chain->bref.data_off & HAMMER2_OFF_MASK & (bbytes - 1);
655 
656 		bp = getblk(hmp->devvp, pbase, bbytes, GETBLK_NOWAIT, 0);
657 		if (bp) {
658 			if ((bp->b_flags & (B_CACHE | B_DIRTY)) ==
659 			    (B_CACHE | B_DIRTY)) {
660 				kprintf("x");
661 				cluster_awrite(bp);
662 			} else {
663 				bp->b_flags |= B_RELBUF;
664 				brelse(bp);
665 			}
666 		}
667 		break;
668 	case HAMMER2_BREF_TYPE_INDIRECT:
669 		/*
670 		 * Indirect blocks may be in an INITIAL state.  Use the
671 		 * chain_lock() call to ensure that the buffer has been
672 		 * instantiated (even though it is already locked the buffer
673 		 * might not have been instantiated).
674 		 *
675 		 * Only write the buffer out if it is dirty, it is possible
676 		 * the operating system had already written out the buffer.
677 		 */
678 		hammer2_chain_lock(hmp, chain, HAMMER2_RESOLVE_ALWAYS);
679 		KKASSERT(chain->bp != NULL);
680 
681 		bp = chain->bp;
682 		if ((chain->flags & HAMMER2_CHAIN_DIRTYBP) ||
683 		    (bp->b_flags & B_DIRTY)) {
684 			bdwrite(chain->bp);
685 		} else {
686 			brelse(chain->bp);
687 		}
688 		chain->bp = NULL;
689 		chain->data = NULL;
690 		hammer2_chain_unlock(hmp, chain);
691 		break;
692 	default:
693 		/*
694 		 * Embedded elements have to be flushed out.
695 		 */
696 		KKASSERT(chain->data != NULL);
697 		KKASSERT(chain->bp == NULL);
698 		bref = &chain->bref;
699 
700 		KKASSERT((bref->data_off & HAMMER2_OFF_MASK) != 0);
701 		KKASSERT(HAMMER2_DEC_CHECK(chain->bref.methods) ==
702 			 HAMMER2_CHECK_ISCSI32);
703 
704 		if (chain->bp == NULL) {
705 			/*
706 			 * The data is embedded, we have to acquire the
707 			 * buffer cache buffer and copy the data into it.
708 			 */
709 			if ((bbytes = chain->bytes) < HAMMER2_MINIOSIZE)
710 				bbytes = HAMMER2_MINIOSIZE;
711 			pbase = bref->data_off & ~(hammer2_off_t)(bbytes - 1);
712 			boff = bref->data_off & HAMMER2_OFF_MASK & (bbytes - 1);
713 
714 			/*
715 			 * The getblk() optimization can only be used if the
716 			 * physical block size matches the request.
717 			 */
718 			if (chain->bytes == bbytes) {
719 				bp = getblk(hmp->devvp, pbase, bbytes, 0, 0);
720 				error = 0;
721 			} else {
722 				error = bread(hmp->devvp, pbase, bbytes, &bp);
723 				KKASSERT(error == 0);
724 			}
725 			bdata = (char *)bp->b_data + boff;
726 
727 			/*
728 			 * Copy the data to the buffer, mark the buffer
729 			 * dirty, and convert the chain to unmodified.
730 			 */
731 			bcopy(chain->data, bdata, chain->bytes);
732 			bp->b_flags |= B_CLUSTEROK;
733 			bdwrite(bp);
734 			bp = NULL;
735 			chain->bref.check.iscsi32.value =
736 				hammer2_icrc32(chain->data, chain->bytes);
737 			if (chain->bref.type == HAMMER2_BREF_TYPE_INODE)
738 				++hammer2_iod_meta_write;
739 			else
740 				++hammer2_iod_indr_write;
741 		} else {
742 			chain->bref.check.iscsi32.value =
743 				hammer2_icrc32(chain->data, chain->bytes);
744 		}
745 	}
746 done:
747 	if (hammer2_debug & 0x0008) {
748 		kprintf("%*.*s} %p/%d %04x ",
749 			info->depth, info->depth, "",
750 			chain, chain->refs, chain->flags);
751 	}
752 }
753 
754 #if 0
755 /*
756  * PASS2 - not yet implemented (should be called only with the root chain?)
757  */
758 static void
759 hammer2_chain_flush_pass2(hammer2_mount_t *hmp, hammer2_chain_t *chain)
760 {
761 }
762 #endif
763 
764 static
765 void
766 hammer2_saved_child_cleanup(hammer2_mount_t *hmp,
767 			    hammer2_chain_t *parent, hammer2_chain_t *child)
768 {
769 	atomic_add_int(&child->flushing, -1);
770 	if (child->flushing == 0 && (child->flags & HAMMER2_CHAIN_DELETED)) {
771 		kprintf("hammer2: fixup deferred deleted child\n");
772 		hammer2_chain_lock(hmp, child, HAMMER2_RESOLVE_MAYBE);
773 		hammer2_chain_delete(hmp, parent, child, 0);
774 		hammer2_chain_unlock(hmp, child);
775 	}
776 }
777