13f01ebaaSMatthew Dillon /*
2acbbd0efSMatthew Dillon  * Copyright (c) 2013-2019 The DragonFly Project.  All rights reserved.
33f01ebaaSMatthew Dillon  *
43f01ebaaSMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
53f01ebaaSMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
63f01ebaaSMatthew Dillon  *
73f01ebaaSMatthew Dillon  * Redistribution and use in source and binary forms, with or without
83f01ebaaSMatthew Dillon  * modification, are permitted provided that the following conditions
93f01ebaaSMatthew Dillon  * are met:
103f01ebaaSMatthew Dillon  *
113f01ebaaSMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
123f01ebaaSMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
133f01ebaaSMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
143f01ebaaSMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
153f01ebaaSMatthew Dillon  *    the documentation and/or other materials provided with the
163f01ebaaSMatthew Dillon  *    distribution.
173f01ebaaSMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
183f01ebaaSMatthew Dillon  *    contributors may be used to endorse or promote products derived
193f01ebaaSMatthew Dillon  *    from this software without specific, prior written permission.
203f01ebaaSMatthew Dillon  *
213f01ebaaSMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
223f01ebaaSMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
233f01ebaaSMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
243f01ebaaSMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
253f01ebaaSMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
263f01ebaaSMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
273f01ebaaSMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
283f01ebaaSMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
293f01ebaaSMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
303f01ebaaSMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
313f01ebaaSMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
323f01ebaaSMatthew Dillon  * SUCH DAMAGE.
333f01ebaaSMatthew Dillon  */
343f01ebaaSMatthew Dillon #include <sys/param.h>
353f01ebaaSMatthew Dillon #include <sys/systm.h>
363f01ebaaSMatthew Dillon #include <sys/kernel.h>
373f01ebaaSMatthew Dillon #include <sys/proc.h>
383f01ebaaSMatthew Dillon #include <sys/mount.h>
393f01ebaaSMatthew Dillon #include <vm/vm_kern.h>
403f01ebaaSMatthew Dillon #include <vm/vm_extern.h>
413f01ebaaSMatthew Dillon 
423f01ebaaSMatthew Dillon #include "hammer2.h"
433f01ebaaSMatthew Dillon 
4465cacacfSMatthew Dillon /*
453f01ebaaSMatthew Dillon  * breadth-first search
463f01ebaaSMatthew Dillon  */
473f01ebaaSMatthew Dillon typedef struct hammer2_chain_save {
483f01ebaaSMatthew Dillon 	TAILQ_ENTRY(hammer2_chain_save)	entry;
49125966e8SMatthew Dillon 	hammer2_chain_t	*chain;
503f01ebaaSMatthew Dillon } hammer2_chain_save_t;
513f01ebaaSMatthew Dillon 
523f01ebaaSMatthew Dillon TAILQ_HEAD(hammer2_chain_save_list, hammer2_chain_save);
533f01ebaaSMatthew Dillon typedef struct hammer2_chain_save_list hammer2_chain_save_list_t;
543f01ebaaSMatthew Dillon 
55125966e8SMatthew Dillon typedef struct hammer2_bulkfree_info {
56125966e8SMatthew Dillon 	hammer2_dev_t		*hmp;
57125966e8SMatthew Dillon 	kmem_anon_desc_t	kp;
58125966e8SMatthew Dillon 	hammer2_off_t		sbase;		/* sub-loop iteration */
59125966e8SMatthew Dillon 	hammer2_off_t		sstop;
60125966e8SMatthew Dillon 	hammer2_bmap_data_t	*bmap;
61125966e8SMatthew Dillon 	int			depth;
62b5b45574SMatthew Dillon 	long			count_10_00;	/* staged->free	     */
63b5b45574SMatthew Dillon 	long			count_11_10;	/* allocated->staged */
64b5b45574SMatthew Dillon 	long			count_00_11;	/* (should not happen) */
65b5b45574SMatthew Dillon 	long			count_01_11;	/* (should not happen) */
66b5b45574SMatthew Dillon 	long			count_10_11;	/* staged->allocated */
67125966e8SMatthew Dillon 	long			count_l0cleans;
68125966e8SMatthew Dillon 	long			count_linadjusts;
69125966e8SMatthew Dillon 	long			count_inodes_scanned;
70c152fb8dSMatthew Dillon 	long			count_dirents_scanned;
71125966e8SMatthew Dillon 	long			count_dedup_factor;
72c152fb8dSMatthew Dillon 	long			count_bytes_scanned;
73c152fb8dSMatthew Dillon 	long			count_chains_scanned;
74c152fb8dSMatthew Dillon 	long			count_chains_reported;
754ff0b408SMatthew Dillon 	long			bulkfree_calls;
764ff0b408SMatthew Dillon 	int			bulkfree_ticks;
771dddac0aSMatthew Dillon 	int			list_alert;
78125966e8SMatthew Dillon 	hammer2_off_t		adj_free;
79125966e8SMatthew Dillon 	hammer2_tid_t		mtid;
80125966e8SMatthew Dillon 	time_t			save_time;
81125966e8SMatthew Dillon 	hammer2_chain_save_list_t list;
821dddac0aSMatthew Dillon 	long			list_count;
83784d105bSMatthew Dillon 	long			list_count_max;
84f7a9ce16SMatthew Dillon 	hammer2_chain_save_t	*backout;	/* ins pt while backing out */
85125966e8SMatthew Dillon 	hammer2_dedup_t		*dedup;
86125966e8SMatthew Dillon 	int			pri;
87125966e8SMatthew Dillon } hammer2_bulkfree_info_t;
88125966e8SMatthew Dillon 
89125966e8SMatthew Dillon static int h2_bulkfree_test(hammer2_bulkfree_info_t *info,
90acbbd0efSMatthew Dillon 			hammer2_blockref_t *bref, int pri, int saved_error);
91691854bdSMatthew Dillon static uint32_t bigmask_get(hammer2_bmap_data_t *bmap);
92691854bdSMatthew Dillon static int bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask);
93125966e8SMatthew Dillon 
943f01ebaaSMatthew Dillon /*
953f01ebaaSMatthew Dillon  * General bulk scan function with callback.  Called with a referenced
96125966e8SMatthew Dillon  * but UNLOCKED parent.  The parent is returned in the same state.
973f01ebaaSMatthew Dillon  */
98125966e8SMatthew Dillon static
993f01ebaaSMatthew Dillon int
hammer2_bulkfree_scan(hammer2_chain_t * parent,int (* func)(hammer2_bulkfree_info_t * info,hammer2_blockref_t * bref),hammer2_bulkfree_info_t * info)10076fc35dbSTomohiro Kusumi hammer2_bulkfree_scan(hammer2_chain_t *parent,
101125966e8SMatthew Dillon 		  int (*func)(hammer2_bulkfree_info_t *info,
102125966e8SMatthew Dillon 			      hammer2_blockref_t *bref),
103125966e8SMatthew Dillon 		  hammer2_bulkfree_info_t *info)
1043f01ebaaSMatthew Dillon {
105125966e8SMatthew Dillon 	hammer2_blockref_t bref;
1063f01ebaaSMatthew Dillon 	hammer2_chain_t *chain;
107acbbd0efSMatthew Dillon 	hammer2_chain_save_t *tail;
108acbbd0efSMatthew Dillon 	hammer2_chain_save_t *save;
109125966e8SMatthew Dillon 	int first = 1;
1101e2c8208SMatthew Dillon 	int rup_error;
1111e2c8208SMatthew Dillon 	int error;
112acbbd0efSMatthew Dillon 	int e2;
1133f01ebaaSMatthew Dillon 
114125966e8SMatthew Dillon 	++info->pri;
1153f01ebaaSMatthew Dillon 
116125966e8SMatthew Dillon 	chain = NULL;
1171e2c8208SMatthew Dillon 	rup_error = 0;
1181e2c8208SMatthew Dillon 	error = 0;
1193f01ebaaSMatthew Dillon 
120acbbd0efSMatthew Dillon 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS |
121acbbd0efSMatthew Dillon 				   HAMMER2_RESOLVE_SHARED);
122784d105bSMatthew Dillon 
123784d105bSMatthew Dillon 	/*
124784d105bSMatthew Dillon 	 * End of scan if parent is a PFS
125784d105bSMatthew Dillon 	 */
126784d105bSMatthew Dillon 	tail = TAILQ_FIRST(&info->list);
127acbbd0efSMatthew Dillon 
128acbbd0efSMatthew Dillon 	/*
129acbbd0efSMatthew Dillon 	 * The parent was previously retrieved NODATA and thus has not
130acbbd0efSMatthew Dillon 	 * tested the CRC.  Now that we have locked it normally, check
131acbbd0efSMatthew Dillon 	 * for a CRC problem and skip it if we found one.  The bulk scan
132acbbd0efSMatthew Dillon 	 * cannot safely traverse invalid block tables (we could end up
133acbbd0efSMatthew Dillon 	 * in an endless loop or cause a panic).
134acbbd0efSMatthew Dillon 	 */
135acbbd0efSMatthew Dillon 	if (parent->error & HAMMER2_ERROR_CHECK) {
136acbbd0efSMatthew Dillon 		error = parent->error;
137acbbd0efSMatthew Dillon 		goto done;
138acbbd0efSMatthew Dillon 	}
139acbbd0efSMatthew Dillon 
140acbbd0efSMatthew Dillon 	/*
141acbbd0efSMatthew Dillon 	 * Report which PFS is being scanned
142acbbd0efSMatthew Dillon 	 */
143acbbd0efSMatthew Dillon 	if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
144acbbd0efSMatthew Dillon 	    (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) {
145acbbd0efSMatthew Dillon 		kprintf("hammer2_bulkfree: Scanning %s\n",
146acbbd0efSMatthew Dillon 			parent->data->ipdata.filename);
147acbbd0efSMatthew Dillon 	}
148acbbd0efSMatthew Dillon 
1493f01ebaaSMatthew Dillon 	/*
1503f01ebaaSMatthew Dillon 	 * Generally loop on the contents if we have not been flagged
1513f01ebaaSMatthew Dillon 	 * for abort.
152125966e8SMatthew Dillon 	 *
153125966e8SMatthew Dillon 	 * Remember that these chains are completely isolated from
154125966e8SMatthew Dillon 	 * the frontend, so we can release locks temporarily without
155125966e8SMatthew Dillon 	 * imploding.
1563f01ebaaSMatthew Dillon 	 */
1571e2c8208SMatthew Dillon 	for (;;) {
1581e2c8208SMatthew Dillon 		error |= hammer2_chain_scan(parent, &chain, &bref, &first,
1593f01ebaaSMatthew Dillon 					    HAMMER2_LOOKUP_NODATA |
1601e2c8208SMatthew Dillon 					    HAMMER2_LOOKUP_SHARED);
1611e2c8208SMatthew Dillon 
1621e2c8208SMatthew Dillon 		/*
1631e2c8208SMatthew Dillon 		 * Handle EOF or other error at current level.  This stops
1641e2c8208SMatthew Dillon 		 * the bulkfree scan.
1651e2c8208SMatthew Dillon 		 */
166acbbd0efSMatthew Dillon 		if (error & ~HAMMER2_ERROR_CHECK)
1671e2c8208SMatthew Dillon 			break;
1681e2c8208SMatthew Dillon 
1694ff0b408SMatthew Dillon 		/*
1704ff0b408SMatthew Dillon 		 * Account for dirents before thre data_off test, since most
1714ff0b408SMatthew Dillon 		 * dirents do not need a data reference.
1724ff0b408SMatthew Dillon 		 */
173c152fb8dSMatthew Dillon 		if (bref.type == HAMMER2_BREF_TYPE_DIRENT)
174c152fb8dSMatthew Dillon 			++info->count_dirents_scanned;
175c152fb8dSMatthew Dillon 
176c152fb8dSMatthew Dillon 		/*
177c152fb8dSMatthew Dillon 		 * Ignore brefs without data (typically dirents)
178c152fb8dSMatthew Dillon 		 */
179c152fb8dSMatthew Dillon 		if ((bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
180c152fb8dSMatthew Dillon 			continue;
181c152fb8dSMatthew Dillon 
182125966e8SMatthew Dillon 		/*
183125966e8SMatthew Dillon 		 * Process bref, chain is only non-NULL if the bref
184125966e8SMatthew Dillon 		 * might be recursable (its possible that we sometimes get
185125966e8SMatthew Dillon 		 * a non-NULL chain where the bref cannot be recursed).
186acbbd0efSMatthew Dillon 		 *
187acbbd0efSMatthew Dillon 		 * If we already ran down this tree we do not have to do it
188acbbd0efSMatthew Dillon 		 * again, but we must still recover any cumulative error
189acbbd0efSMatthew Dillon 		 * recorded from the time we did.
190125966e8SMatthew Dillon 		 */
191125966e8SMatthew Dillon 		++info->pri;
192acbbd0efSMatthew Dillon 		e2 = h2_bulkfree_test(info, &bref, 1, 0);
193acbbd0efSMatthew Dillon 		if (e2) {
194acbbd0efSMatthew Dillon 			error |= e2 & ~HAMMER2_ERROR_EOF;
195125966e8SMatthew Dillon 			continue;
196acbbd0efSMatthew Dillon 		}
197125966e8SMatthew Dillon 
198c152fb8dSMatthew Dillon 		if (bref.type == HAMMER2_BREF_TYPE_INODE)
199c152fb8dSMatthew Dillon 			++info->count_inodes_scanned;
200c152fb8dSMatthew Dillon 
2011e2c8208SMatthew Dillon 		error |= func(info, &bref);
202acbbd0efSMatthew Dillon 		if (error & ~HAMMER2_ERROR_CHECK)
203125966e8SMatthew Dillon 			break;
204125966e8SMatthew Dillon 
205125966e8SMatthew Dillon 		/*
206125966e8SMatthew Dillon 		 * A non-null chain is always returned if it is
207125966e8SMatthew Dillon 		 * recursive, otherwise a non-null chain might be
208125966e8SMatthew Dillon 		 * returned but usually is not when not recursive.
209125966e8SMatthew Dillon 		 */
210125966e8SMatthew Dillon 		if (chain == NULL)
211125966e8SMatthew Dillon 			continue;
212125966e8SMatthew Dillon 
213c152fb8dSMatthew Dillon 		info->count_bytes_scanned += chain->bytes;
214c152fb8dSMatthew Dillon 		++info->count_chains_scanned;
215c152fb8dSMatthew Dillon 
216c152fb8dSMatthew Dillon 		if (info->count_chains_scanned >=
217944ddad0SMatthew Dillon 		    info->count_chains_reported + 1000000 ||
218944ddad0SMatthew Dillon 		    (info->count_chains_scanned < 1000000 &&
219944ddad0SMatthew Dillon 		     info->count_chains_scanned >=
220944ddad0SMatthew Dillon 		     info->count_chains_reported + 100000)) {
221c152fb8dSMatthew Dillon 			kprintf(" chains %-7ld inodes %-7ld "
222c152fb8dSMatthew Dillon 				"dirents %-7ld bytes %5ldMB\n",
223c152fb8dSMatthew Dillon 				info->count_chains_scanned,
224c152fb8dSMatthew Dillon 				info->count_inodes_scanned,
225c152fb8dSMatthew Dillon 				info->count_dirents_scanned,
226c152fb8dSMatthew Dillon 				info->count_bytes_scanned / 1000000);
227944ddad0SMatthew Dillon 			info->count_chains_reported =
228944ddad0SMatthew Dillon 				info->count_chains_scanned;
229c152fb8dSMatthew Dillon 		}
230c152fb8dSMatthew Dillon 
231125966e8SMatthew Dillon 		/*
232125966e8SMatthew Dillon 		 * Else check type and setup depth-first scan.
233125966e8SMatthew Dillon 		 *
234125966e8SMatthew Dillon 		 * Account for bytes actually read.
235125966e8SMatthew Dillon 		 */
2363f01ebaaSMatthew Dillon 		switch(chain->bref.type) {
2373f01ebaaSMatthew Dillon 		case HAMMER2_BREF_TYPE_INODE:
2383f01ebaaSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP_NODE:
2393f01ebaaSMatthew Dillon 		case HAMMER2_BREF_TYPE_INDIRECT:
2403f01ebaaSMatthew Dillon 		case HAMMER2_BREF_TYPE_VOLUME:
2413f01ebaaSMatthew Dillon 		case HAMMER2_BREF_TYPE_FREEMAP:
242125966e8SMatthew Dillon 			++info->depth;
243acbbd0efSMatthew Dillon 			if (chain->error & HAMMER2_ERROR_CHECK) {
244acbbd0efSMatthew Dillon 				/*
245acbbd0efSMatthew Dillon 				 * Cannot safely recurse chains with crc
246acbbd0efSMatthew Dillon 				 * errors, even in emergency mode.
247acbbd0efSMatthew Dillon 				 */
248acbbd0efSMatthew Dillon 				/* NOP */
2491dddac0aSMatthew Dillon 			} else if (info->depth > 16 ||
250f7a9ce16SMatthew Dillon 				   info->backout ||
251784d105bSMatthew Dillon 				   (info->depth > hammer2_limit_saved_depth &&
252784d105bSMatthew Dillon 				   info->list_count >=
253784d105bSMatthew Dillon 				    (hammer2_limit_saved_chains >> 2)))
2541dddac0aSMatthew Dillon 			{
255784d105bSMatthew Dillon 				/*
256784d105bSMatthew Dillon 				 * We must defer the recursion if it runs
257784d105bSMatthew Dillon 				 * too deep or if too many saved chains are
258784d105bSMatthew Dillon 				 * allocated.
259784d105bSMatthew Dillon 				 *
260784d105bSMatthew Dillon 				 * In the case of too many saved chains, we
261784d105bSMatthew Dillon 				 * have to stop recursing ASAP to avoid an
262784d105bSMatthew Dillon 				 * explosion of memory use since each radix
263784d105bSMatthew Dillon 				 * level can hold 512 elements.
264f7a9ce16SMatthew Dillon 				 *
265f7a9ce16SMatthew Dillon 				 * If we had to defer at a deeper level
266f7a9ce16SMatthew Dillon 				 * backout is non-NULL.  We must backout
267f7a9ce16SMatthew Dillon 				 * completely before resuming.
268784d105bSMatthew Dillon 				 */
269784d105bSMatthew Dillon 				if (info->list_count >
270784d105bSMatthew Dillon 				     hammer2_limit_saved_chains &&
271784d105bSMatthew Dillon 				    info->list_alert == 0)
2721dddac0aSMatthew Dillon 				{
2731dddac0aSMatthew Dillon 					kprintf("hammer2: during bulkfree, "
2741dddac0aSMatthew Dillon 						"saved chains exceeded %ld "
2751dddac0aSMatthew Dillon 						"at depth %d, "
2761dddac0aSMatthew Dillon 						"backing off to less-efficient "
2771dddac0aSMatthew Dillon 						"operation\n",
2781dddac0aSMatthew Dillon 						hammer2_limit_saved_chains,
2791dddac0aSMatthew Dillon 						info->depth);
2801dddac0aSMatthew Dillon 					info->list_alert = 1;
2811dddac0aSMatthew Dillon 				}
2821dddac0aSMatthew Dillon 
283784d105bSMatthew Dillon 				/*
284784d105bSMatthew Dillon 				 * Must be placed at head so pfsroot scan
285784d105bSMatthew Dillon 				 * can exhaust saved elements for that pfs
286784d105bSMatthew Dillon 				 * first.
287784d105bSMatthew Dillon 				 *
288784d105bSMatthew Dillon 				 * Must be placed at head for depth-first
289784d105bSMatthew Dillon 				 * recovery when too many saved chains, to
290784d105bSMatthew Dillon 				 * limit number of chains saved during
291f7a9ce16SMatthew Dillon 				 * saved-chain reruns.  The worst-case excess
292f7a9ce16SMatthew Dillon 				 * is (maximum_depth * 512) saved chains above
293f7a9ce16SMatthew Dillon 				 * the threshold.
294f7a9ce16SMatthew Dillon 				 *
295f7a9ce16SMatthew Dillon 				 * The maximum_depth generally occurs in the
296f7a9ce16SMatthew Dillon 				 * inode index and can be fairly deep once
297f7a9ce16SMatthew Dillon 				 * the radix tree becomes a bit fragmented.
298f7a9ce16SMatthew Dillon 				 * nominally 100M inodes would be only 4 deep,
299f7a9ce16SMatthew Dillon 				 * plus a maximally sized file would be another
300f7a9ce16SMatthew Dillon 				 * 8 deep, but with fragmentation it can wind
301f7a9ce16SMatthew Dillon 				 * up being a lot more.
302f7a9ce16SMatthew Dillon 				 *
303f7a9ce16SMatthew Dillon 				 * However, when backing out, we have to place
304f7a9ce16SMatthew Dillon 				 * all the entries in each parent node not
305f7a9ce16SMatthew Dillon 				 * yet processed on the list too, and because
306f7a9ce16SMatthew Dillon 				 * these entries are shallower they must be
307f7a9ce16SMatthew Dillon 				 * placed after each other in order to maintain
308f7a9ce16SMatthew Dillon 				 * our depth-first processing.
309784d105bSMatthew Dillon 				 */
310125966e8SMatthew Dillon 				save = kmalloc(sizeof(*save), M_HAMMER2,
3113f01ebaaSMatthew Dillon 					       M_WAITOK | M_ZERO);
312125966e8SMatthew Dillon 				save->chain = chain;
3133f01ebaaSMatthew Dillon 				hammer2_chain_ref(chain);
314f7a9ce16SMatthew Dillon 
315f7a9ce16SMatthew Dillon 				if (info->backout) {
316f7a9ce16SMatthew Dillon 					TAILQ_INSERT_AFTER(&info->list,
317f7a9ce16SMatthew Dillon 							   info->backout,
318f7a9ce16SMatthew Dillon 							   save, entry);
319f7a9ce16SMatthew Dillon 				} else {
320f7a9ce16SMatthew Dillon 					TAILQ_INSERT_HEAD(&info->list,
321f7a9ce16SMatthew Dillon 							  save, entry);
322f7a9ce16SMatthew Dillon 				}
323f7a9ce16SMatthew Dillon 				info->backout = save;
3241dddac0aSMatthew Dillon 				++info->list_count;
325784d105bSMatthew Dillon 				if (info->list_count_max < info->list_count)
326784d105bSMatthew Dillon 					info->list_count_max = info->list_count;
327125966e8SMatthew Dillon 
328125966e8SMatthew Dillon 				/* guess */
329125966e8SMatthew Dillon 				info->pri += 10;
330125966e8SMatthew Dillon 			} else {
331125966e8SMatthew Dillon 				int savepri = info->pri;
332125966e8SMatthew Dillon 
333125966e8SMatthew Dillon 				hammer2_chain_unlock(chain);
33465cacacfSMatthew Dillon 				hammer2_chain_unlock(parent);
335125966e8SMatthew Dillon 				info->pri = 0;
33676fc35dbSTomohiro Kusumi 				rup_error |= hammer2_bulkfree_scan(chain,
337acbbd0efSMatthew Dillon 								   func, info);
338125966e8SMatthew Dillon 				info->pri += savepri;
33965cacacfSMatthew Dillon 				hammer2_chain_lock(parent,
34065cacacfSMatthew Dillon 						   HAMMER2_RESOLVE_ALWAYS |
34165cacacfSMatthew Dillon 						   HAMMER2_RESOLVE_SHARED);
342125966e8SMatthew Dillon 				hammer2_chain_lock(chain,
343125966e8SMatthew Dillon 						   HAMMER2_RESOLVE_ALWAYS |
344125966e8SMatthew Dillon 						   HAMMER2_RESOLVE_SHARED);
345125966e8SMatthew Dillon 			}
346125966e8SMatthew Dillon 			--info->depth;
3473f01ebaaSMatthew Dillon 			break;
348c152fb8dSMatthew Dillon 		case HAMMER2_BREF_TYPE_DATA:
349c152fb8dSMatthew Dillon 			break;
3503f01ebaaSMatthew Dillon 		default:
3513f01ebaaSMatthew Dillon 			/* does not recurse */
3523f01ebaaSMatthew Dillon 			break;
3533f01ebaaSMatthew Dillon 		}
3541e2c8208SMatthew Dillon 		if (rup_error & HAMMER2_ERROR_ABORTED)
3551e2c8208SMatthew Dillon 			break;
3563f01ebaaSMatthew Dillon 	}
357125966e8SMatthew Dillon 	if (chain) {
358125966e8SMatthew Dillon 		hammer2_chain_unlock(chain);
359125966e8SMatthew Dillon 		hammer2_chain_drop(chain);
360125966e8SMatthew Dillon 	}
3613f01ebaaSMatthew Dillon 
3623f01ebaaSMatthew Dillon 	/*
363acbbd0efSMatthew Dillon 	 * If this is a PFSROOT, also re-run any defered elements
364acbbd0efSMatthew Dillon 	 * added during our scan so we can report any cumulative errors
365acbbd0efSMatthew Dillon 	 * for the PFS.
366acbbd0efSMatthew Dillon 	 */
367acbbd0efSMatthew Dillon 	if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
368acbbd0efSMatthew Dillon 	    (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) {
369acbbd0efSMatthew Dillon 		for (;;) {
370acbbd0efSMatthew Dillon 			int opri;
371acbbd0efSMatthew Dillon 
372784d105bSMatthew Dillon 			save = TAILQ_FIRST(&info->list);
373784d105bSMatthew Dillon 			if (save == tail)	/* exhaust this PFS only */
374acbbd0efSMatthew Dillon 				break;
375acbbd0efSMatthew Dillon 
376acbbd0efSMatthew Dillon 			TAILQ_REMOVE(&info->list, save, entry);
377f7a9ce16SMatthew Dillon 			info->backout = NULL;
3781dddac0aSMatthew Dillon 			--info->list_count;
379acbbd0efSMatthew Dillon 			opri = info->pri;
380acbbd0efSMatthew Dillon 			info->pri = 0;
38176fc35dbSTomohiro Kusumi 			rup_error |= hammer2_bulkfree_scan(save->chain, func, info);
382acbbd0efSMatthew Dillon 			hammer2_chain_drop(save->chain);
383acbbd0efSMatthew Dillon 			kfree(save, M_HAMMER2);
384acbbd0efSMatthew Dillon 			info->pri = opri;
385acbbd0efSMatthew Dillon 		}
386acbbd0efSMatthew Dillon 	}
387acbbd0efSMatthew Dillon 
388acbbd0efSMatthew Dillon 	error |= rup_error;
389acbbd0efSMatthew Dillon 
390acbbd0efSMatthew Dillon 	/*
391acbbd0efSMatthew Dillon 	 * Report which PFS the errors were encountered in.
392acbbd0efSMatthew Dillon 	 */
393acbbd0efSMatthew Dillon 	if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
394acbbd0efSMatthew Dillon 	    (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) &&
395acbbd0efSMatthew Dillon 	    (error & ~HAMMER2_ERROR_EOF)) {
396acbbd0efSMatthew Dillon 		kprintf("hammer2_bulkfree: Encountered errors (%08x) "
397acbbd0efSMatthew Dillon 			"while scanning \"%s\"\n",
398acbbd0efSMatthew Dillon 			error, parent->data->ipdata.filename);
399acbbd0efSMatthew Dillon 	}
400acbbd0efSMatthew Dillon 
401acbbd0efSMatthew Dillon 	/*
402125966e8SMatthew Dillon 	 * Save with higher pri now that we know what it is.
4033f01ebaaSMatthew Dillon 	 */
404acbbd0efSMatthew Dillon 	h2_bulkfree_test(info, &parent->bref, info->pri + 1,
405acbbd0efSMatthew Dillon 			 (error & ~HAMMER2_ERROR_EOF));
406125966e8SMatthew Dillon 
407acbbd0efSMatthew Dillon done:
4083f01ebaaSMatthew Dillon 	hammer2_chain_unlock(parent);
4093f01ebaaSMatthew Dillon 
410acbbd0efSMatthew Dillon 	return (error & ~HAMMER2_ERROR_EOF);
4113f01ebaaSMatthew Dillon }
4123f01ebaaSMatthew Dillon 
4133f01ebaaSMatthew Dillon /*
4143f01ebaaSMatthew Dillon  * Bulkfree algorithm
4153f01ebaaSMatthew Dillon  *
4163f01ebaaSMatthew Dillon  * Repeat {
4173f01ebaaSMatthew Dillon  *	Chain flush (partial synchronization) XXX removed
4183f01ebaaSMatthew Dillon  *	Scan the whole topology - build in-memory freemap (mark 11)
4193f01ebaaSMatthew Dillon  *	Reconcile the in-memory freemap against the on-disk freemap.
4203f01ebaaSMatthew Dillon  *		ondisk xx -> ondisk 11 (if allocated)
4213f01ebaaSMatthew Dillon  *		ondisk 11 -> ondisk 10 (if free in-memory)
4223f01ebaaSMatthew Dillon  *		ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
4233f01ebaaSMatthew Dillon  * }
4243f01ebaaSMatthew Dillon  *
4253f01ebaaSMatthew Dillon  * The topology scan may have to be performed multiple times to window
4263f01ebaaSMatthew Dillon  * freemaps which are too large to fit in kernel memory.
4273f01ebaaSMatthew Dillon  *
4283f01ebaaSMatthew Dillon  * Races are handled using a double-transition (11->10, 10->00).  The bulkfree
4293f01ebaaSMatthew Dillon  * scan snapshots the volume root's blockset and thus can run concurrent with
4303f01ebaaSMatthew Dillon  * normal operations, as long as a full flush is made between each pass to
4313f01ebaaSMatthew Dillon  * synchronize any modified chains (otherwise their blocks might be improperly
4323f01ebaaSMatthew Dillon  * freed).
4333f01ebaaSMatthew Dillon  *
43463a8b78dSTomohiro Kusumi  * Temporary memory in multiples of 32KB is required to reconstruct the leaf
4353f01ebaaSMatthew Dillon  * hammer2_bmap_data blocks so they can later be compared against the live
43663a8b78dSTomohiro Kusumi  * freemap.  Each 32KB represents 256 x 16KB x 256 = ~1 GB of storage.
4373f01ebaaSMatthew Dillon  * A 32MB save area thus represents around ~1 TB.  The temporary memory
4383f01ebaaSMatthew Dillon  * allocated can be specified.  If it is not sufficient multiple topology
4393f01ebaaSMatthew Dillon  * passes will be made.
4403f01ebaaSMatthew Dillon  */
4413f01ebaaSMatthew Dillon 
4423f01ebaaSMatthew Dillon /*
4433f01ebaaSMatthew Dillon  * Bulkfree callback info
4443f01ebaaSMatthew Dillon  */
4459dca9515SMatthew Dillon static void hammer2_bulkfree_thread(void *arg __unused);
446470dad14SMatthew Dillon static void cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size);
447125966e8SMatthew Dillon static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo,
448125966e8SMatthew Dillon 			hammer2_blockref_t *bref);
449c8c0a18aSMatthew Dillon static int h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo);
4503f01ebaaSMatthew Dillon static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
4517767d389SMatthew Dillon 			hammer2_off_t data_off, hammer2_bmap_data_t *live,
45285b1f267SMatthew Dillon 			hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base);
4533f01ebaaSMatthew Dillon 
4549dca9515SMatthew Dillon void
hammer2_bulkfree_init(hammer2_dev_t * hmp)4559dca9515SMatthew Dillon hammer2_bulkfree_init(hammer2_dev_t *hmp)
4569dca9515SMatthew Dillon {
4579dca9515SMatthew Dillon 	hammer2_thr_create(&hmp->bfthr, NULL, hmp,
4589dca9515SMatthew Dillon 			   hmp->devrepname, -1, -1,
4599dca9515SMatthew Dillon 			   hammer2_bulkfree_thread);
4609dca9515SMatthew Dillon }
4619dca9515SMatthew Dillon 
4629dca9515SMatthew Dillon void
hammer2_bulkfree_uninit(hammer2_dev_t * hmp)4639dca9515SMatthew Dillon hammer2_bulkfree_uninit(hammer2_dev_t *hmp)
4649dca9515SMatthew Dillon {
4659dca9515SMatthew Dillon 	hammer2_thr_delete(&hmp->bfthr);
4669dca9515SMatthew Dillon }
4679dca9515SMatthew Dillon 
4689dca9515SMatthew Dillon static void
hammer2_bulkfree_thread(void * arg)4699dca9515SMatthew Dillon hammer2_bulkfree_thread(void *arg)
4709dca9515SMatthew Dillon {
4719dca9515SMatthew Dillon 	hammer2_thread_t *thr = arg;
4729dca9515SMatthew Dillon 	hammer2_ioc_bulkfree_t bfi;
4739dca9515SMatthew Dillon 	uint32_t flags;
4749dca9515SMatthew Dillon 
4759dca9515SMatthew Dillon 	for (;;) {
4769dca9515SMatthew Dillon 		hammer2_thr_wait_any(thr,
4779dca9515SMatthew Dillon 				     HAMMER2_THREAD_STOP |
4789dca9515SMatthew Dillon 				     HAMMER2_THREAD_FREEZE |
4799dca9515SMatthew Dillon 				     HAMMER2_THREAD_UNFREEZE |
4809dca9515SMatthew Dillon 				     HAMMER2_THREAD_REMASTER,
4819dca9515SMatthew Dillon 				     hz * 60);
4829dca9515SMatthew Dillon 
4839dca9515SMatthew Dillon 		flags = thr->flags;
4849dca9515SMatthew Dillon 		cpu_ccfence();
4859dca9515SMatthew Dillon 		if (flags & HAMMER2_THREAD_STOP)
4869dca9515SMatthew Dillon 			break;
4879dca9515SMatthew Dillon 		if (flags & HAMMER2_THREAD_FREEZE) {
4889dca9515SMatthew Dillon 			hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN,
4899dca9515SMatthew Dillon 						 HAMMER2_THREAD_FREEZE);
4909dca9515SMatthew Dillon 			continue;
4919dca9515SMatthew Dillon 		}
4929dca9515SMatthew Dillon 		if (flags & HAMMER2_THREAD_UNFREEZE) {
4939dca9515SMatthew Dillon 			hammer2_thr_signal2(thr, 0,
4949dca9515SMatthew Dillon 						 HAMMER2_THREAD_FROZEN |
4959dca9515SMatthew Dillon 						 HAMMER2_THREAD_UNFREEZE);
4969dca9515SMatthew Dillon 			continue;
4979dca9515SMatthew Dillon 		}
4989dca9515SMatthew Dillon 		if (flags & HAMMER2_THREAD_FROZEN)
4999dca9515SMatthew Dillon 			continue;
5009dca9515SMatthew Dillon 		if (flags & HAMMER2_THREAD_REMASTER) {
5019dca9515SMatthew Dillon 			hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER);
5029dca9515SMatthew Dillon 			bzero(&bfi, sizeof(bfi));
5039dca9515SMatthew Dillon 			bfi.size = 8192 * 1024;
504f079a0a8SMatthew Dillon 			/* hammer2_bulkfree_pass(thr->hmp, &bfi); */
5059dca9515SMatthew Dillon 		}
5069dca9515SMatthew Dillon 	}
5079dca9515SMatthew Dillon 	thr->td = NULL;
5089dca9515SMatthew Dillon 	hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED);
5099dca9515SMatthew Dillon 	/* structure can go invalid at this point */
5109dca9515SMatthew Dillon }
5119dca9515SMatthew Dillon 
5123f01ebaaSMatthew Dillon int
hammer2_bulkfree_pass(hammer2_dev_t * hmp,hammer2_chain_t * vchain,hammer2_ioc_bulkfree_t * bfi)513f079a0a8SMatthew Dillon hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
514f079a0a8SMatthew Dillon 		      hammer2_ioc_bulkfree_t *bfi)
5153f01ebaaSMatthew Dillon {
5163f01ebaaSMatthew Dillon 	hammer2_bulkfree_info_t cbinfo;
517125966e8SMatthew Dillon 	hammer2_chain_save_t *save;
5183f01ebaaSMatthew Dillon 	hammer2_off_t incr;
5193f01ebaaSMatthew Dillon 	size_t size;
5201e2c8208SMatthew Dillon 	int error;
5213f01ebaaSMatthew Dillon 
5223f01ebaaSMatthew Dillon 	/*
523125966e8SMatthew Dillon 	 * We have to clear the live dedup cache as it might have entries
524125966e8SMatthew Dillon 	 * that are freeable as of now.  Any new entries in the dedup cache
525125966e8SMatthew Dillon 	 * made after this point, even if they become freeable, will have
526125966e8SMatthew Dillon 	 * previously been fully allocated and will be protected by the
527125966e8SMatthew Dillon 	 * 2-stage bulkfree.
528125966e8SMatthew Dillon 	 */
529125966e8SMatthew Dillon 	hammer2_dedup_clear(hmp);
530125966e8SMatthew Dillon 
5313f01ebaaSMatthew Dillon 	/*
532d1b65214SMatthew Dillon 	 * Setup for free pass using the buffer size specified by the
533d1b65214SMatthew Dillon 	 * hammer2 utility, 32K-aligned.
5343f01ebaaSMatthew Dillon 	 */
5353f01ebaaSMatthew Dillon 	bzero(&cbinfo, sizeof(cbinfo));
5363f01ebaaSMatthew Dillon 	size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
5373f01ebaaSMatthew Dillon 	       ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
538d1b65214SMatthew Dillon 
539d1b65214SMatthew Dillon 	/*
540d1b65214SMatthew Dillon 	 * Cap at 1/4 physical memory (hammer2 utility will not normally
541d1b65214SMatthew Dillon 	 * ever specify a buffer this big, but leave the option available).
542d1b65214SMatthew Dillon 	 */
543944ddad0SMatthew Dillon 	if (size > kmem_lim_size() * 1024 * 1024 / 4) {
544944ddad0SMatthew Dillon 		size = kmem_lim_size() * 1024 * 1024 / 4;
545944ddad0SMatthew Dillon 		kprintf("hammer2: Warning: capping bulkfree buffer at %jdM\n",
546944ddad0SMatthew Dillon 			(intmax_t)size / (1024 * 1024));
547944ddad0SMatthew Dillon 	}
548c152fb8dSMatthew Dillon 
549d1b65214SMatthew Dillon #define HAMMER2_FREEMAP_SIZEDIV	\
550d1b65214SMatthew Dillon 	(HAMMER2_FREEMAP_LEVEL1_SIZE / HAMMER2_FREEMAP_LEVELN_PSIZE)
551d1b65214SMatthew Dillon 
552d1b65214SMatthew Dillon 	/*
553d1b65214SMatthew Dillon 	 * Cap at the size needed to cover the whole volume to avoid
554d1b65214SMatthew Dillon 	 * making an unnecessarily large allocation.
555d1b65214SMatthew Dillon 	 */
556*23d820f0STomohiro Kusumi 	if (size > hmp->total_size / HAMMER2_FREEMAP_SIZEDIV)
557*23d820f0STomohiro Kusumi 		size = howmany(hmp->total_size, HAMMER2_FREEMAP_SIZEDIV);
558d1b65214SMatthew Dillon 
559d1b65214SMatthew Dillon 	/*
560d1b65214SMatthew Dillon 	 * Minimum bitmap buffer size, then align to a LEVELN_PSIZE (32K)
561d1b65214SMatthew Dillon 	 * boundary.
562d1b65214SMatthew Dillon 	 */
563d1b65214SMatthew Dillon 	if (size < 1024 * 1024)
564d1b65214SMatthew Dillon 		size = 1024 * 1024;
565d1b65214SMatthew Dillon 	size = (size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
566d1b65214SMatthew Dillon 	       ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
567d1b65214SMatthew Dillon 
5683f01ebaaSMatthew Dillon 	cbinfo.hmp = hmp;
5693091de50SMatthew Dillon 	cbinfo.bmap = kmem_alloc_swapbacked(&cbinfo.kp, size, VM_SUBSYS_HAMMER);
570125966e8SMatthew Dillon 	cbinfo.dedup = kmalloc(sizeof(*cbinfo.dedup) * HAMMER2_DEDUP_HEUR_SIZE,
571125966e8SMatthew Dillon 			       M_HAMMER2, M_WAITOK | M_ZERO);
572125966e8SMatthew Dillon 
573d1b65214SMatthew Dillon 	kprintf("hammer2: bulkfree buf=%jdM\n",
574d1b65214SMatthew Dillon 		(intmax_t)size / (1024 * 1024));
575d1b65214SMatthew Dillon 
5763f01ebaaSMatthew Dillon 	/*
57763a8b78dSTomohiro Kusumi 	 * Normalize start point to a 1GB boundary.  We operate on a
57863a8b78dSTomohiro Kusumi 	 * 32KB leaf bitmap boundary which represents 1GB of storage.
5793f01ebaaSMatthew Dillon 	 */
5803f01ebaaSMatthew Dillon 	cbinfo.sbase = bfi->sbase;
5810b738157STomohiro Kusumi 	if (cbinfo.sbase > hmp->total_size)
5820b738157STomohiro Kusumi 		cbinfo.sbase = hmp->total_size;
5833f01ebaaSMatthew Dillon 	cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
584125966e8SMatthew Dillon 	TAILQ_INIT(&cbinfo.list);
5853f01ebaaSMatthew Dillon 
5864ff0b408SMatthew Dillon 	cbinfo.bulkfree_ticks = ticks;
5874ff0b408SMatthew Dillon 
5883f01ebaaSMatthew Dillon 	/*
5893f01ebaaSMatthew Dillon 	 * Loop on a full meta-data scan as many times as required to
5903f01ebaaSMatthew Dillon 	 * get through all available storage.
5913f01ebaaSMatthew Dillon 	 */
5921e2c8208SMatthew Dillon 	error = 0;
5930b738157STomohiro Kusumi 	while (cbinfo.sbase < hmp->total_size) {
5943f01ebaaSMatthew Dillon 		/*
5953f01ebaaSMatthew Dillon 		 * We have enough ram to represent (incr) bytes of storage.
59663a8b78dSTomohiro Kusumi 		 * Each 32KB of ram represents 1GB of storage.
59785b1f267SMatthew Dillon 		 *
59885b1f267SMatthew Dillon 		 * We must also clean out our de-duplication heuristic for
59985b1f267SMatthew Dillon 		 * each (incr) bytes of storage, otherwise we wind up not
60085b1f267SMatthew Dillon 		 * scanning meta-data for later areas of storage because
60185b1f267SMatthew Dillon 		 * they had already been scanned in earlier areas of storage.
60285b1f267SMatthew Dillon 		 * Since the ranging is different, we have to restart
60385b1f267SMatthew Dillon 		 * the dedup heuristic too.
6043f01ebaaSMatthew Dillon 		 */
605d1b65214SMatthew Dillon 		int allmedia;
606d1b65214SMatthew Dillon 
607470dad14SMatthew Dillon 		cbinfo_bmap_init(&cbinfo, size);
608be36dcd4SMatthew Dillon 		bzero(cbinfo.dedup, sizeof(*cbinfo.dedup) *
609be36dcd4SMatthew Dillon 				    HAMMER2_DEDUP_HEUR_SIZE);
610c152fb8dSMatthew Dillon 		cbinfo.count_inodes_scanned = 0;
611c152fb8dSMatthew Dillon 		cbinfo.count_dirents_scanned = 0;
612c152fb8dSMatthew Dillon 		cbinfo.count_bytes_scanned = 0;
613c152fb8dSMatthew Dillon 		cbinfo.count_chains_scanned = 0;
614c152fb8dSMatthew Dillon 		cbinfo.count_chains_reported = 0;
615c152fb8dSMatthew Dillon 
6163f01ebaaSMatthew Dillon 		incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE *
6173f01ebaaSMatthew Dillon 		       HAMMER2_FREEMAP_LEVEL1_SIZE;
6180b738157STomohiro Kusumi 		if (hmp->total_size - cbinfo.sbase <= incr) {
6190b738157STomohiro Kusumi 			cbinfo.sstop = hmp->total_size;
620d1b65214SMatthew Dillon 			allmedia = 1;
621d1b65214SMatthew Dillon 		} else {
6223f01ebaaSMatthew Dillon 			cbinfo.sstop = cbinfo.sbase + incr;
623d1b65214SMatthew Dillon 			allmedia = 0;
624d1b65214SMatthew Dillon 		}
625d1b65214SMatthew Dillon 		kprintf("hammer2: pass %016jx-%016jx ",
6263f01ebaaSMatthew Dillon 			(intmax_t)cbinfo.sbase,
627d1b65214SMatthew Dillon 			(intmax_t)cbinfo.sstop);
628d1b65214SMatthew Dillon 		if (allmedia && cbinfo.sbase == 0)
629d1b65214SMatthew Dillon 			kprintf("(all media)\n");
630d1b65214SMatthew Dillon 		else if (allmedia)
631d1b65214SMatthew Dillon 			kprintf("(remaining media)\n");
632d1b65214SMatthew Dillon 		else
633d1b65214SMatthew Dillon 			kprintf("(%jdGB of media)\n",
634944ddad0SMatthew Dillon 				(intmax_t)incr / (1024L*1024*1024));
6353f01ebaaSMatthew Dillon 
636470dad14SMatthew Dillon 		/*
637470dad14SMatthew Dillon 		 * Scan topology for stuff inside this range.
638c88e038bSMatthew Dillon 		 *
639c88e038bSMatthew Dillon 		 * NOTE - By not using a transaction the operation can
640c88e038bSMatthew Dillon 		 *	  run concurrent with the frontend as well as
641c88e038bSMatthew Dillon 		 *	  with flushes.
642c88e038bSMatthew Dillon 		 *
643c88e038bSMatthew Dillon 		 *	  We cannot safely set a mtid without a transaction,
644c88e038bSMatthew Dillon 		 *	  and in fact we don't want to set one anyway.  We
645c88e038bSMatthew Dillon 		 *	  want the bulkfree to be passive and no interfere
646c88e038bSMatthew Dillon 		 *	  with crash recovery.
647470dad14SMatthew Dillon 		 */
648c88e038bSMatthew Dillon #undef HAMMER2_BULKFREE_TRANS	/* undef - don't use transaction */
649c88e038bSMatthew Dillon #ifdef HAMMER2_BULKFREE_TRANS
6503f01ebaaSMatthew Dillon 		hammer2_trans_init(hmp->spmp, 0);
6513f01ebaaSMatthew Dillon 		cbinfo.mtid = hammer2_trans_sub(hmp->spmp);
652c88e038bSMatthew Dillon #else
653c88e038bSMatthew Dillon 		cbinfo.mtid = 0;
654c88e038bSMatthew Dillon #endif
655125966e8SMatthew Dillon 		cbinfo.pri = 0;
65676fc35dbSTomohiro Kusumi 		error |= hammer2_bulkfree_scan(vchain,
657acbbd0efSMatthew Dillon 					       h2_bulkfree_callback, &cbinfo);
658125966e8SMatthew Dillon 
659125966e8SMatthew Dillon 		while ((save = TAILQ_FIRST(&cbinfo.list)) != NULL &&
660acbbd0efSMatthew Dillon 		       (error & ~HAMMER2_ERROR_CHECK) == 0) {
661125966e8SMatthew Dillon 			TAILQ_REMOVE(&cbinfo.list, save, entry);
6621dddac0aSMatthew Dillon 			--cbinfo.list_count;
663125966e8SMatthew Dillon 			cbinfo.pri = 0;
664f7a9ce16SMatthew Dillon 			cbinfo.backout = NULL;
66576fc35dbSTomohiro Kusumi 			error |= hammer2_bulkfree_scan(save->chain,
666125966e8SMatthew Dillon 						       h2_bulkfree_callback,
667125966e8SMatthew Dillon 						       &cbinfo);
668125966e8SMatthew Dillon 			hammer2_chain_drop(save->chain);
669125966e8SMatthew Dillon 			kfree(save, M_HAMMER2);
670125966e8SMatthew Dillon 		}
671125966e8SMatthew Dillon 		while (save) {
672125966e8SMatthew Dillon 			TAILQ_REMOVE(&cbinfo.list, save, entry);
6731dddac0aSMatthew Dillon 			--cbinfo.list_count;
674125966e8SMatthew Dillon 			hammer2_chain_drop(save->chain);
675125966e8SMatthew Dillon 			kfree(save, M_HAMMER2);
676125966e8SMatthew Dillon 			save = TAILQ_FIRST(&cbinfo.list);
677125966e8SMatthew Dillon 		}
678f7a9ce16SMatthew Dillon 		cbinfo.backout = NULL;
679125966e8SMatthew Dillon 
6803f01ebaaSMatthew Dillon 		/*
68165c894ffSMatthew Dillon 		 * If the complete scan succeeded we can synchronize our
6823f01ebaaSMatthew Dillon 		 * in-memory freemap against live storage.  If an abort
68365c894ffSMatthew Dillon 		 * occured we cannot safely synchronize our partially
6843f01ebaaSMatthew Dillon 		 * filled-out in-memory freemap.
685acbbd0efSMatthew Dillon 		 *
686acbbd0efSMatthew Dillon 		 * We still synchronize on CHECK failures.  That is, we still
687acbbd0efSMatthew Dillon 		 * want bulkfree to operate even if the filesystem has defects.
6883f01ebaaSMatthew Dillon 		 */
689acbbd0efSMatthew Dillon 		if (error & ~HAMMER2_ERROR_CHECK) {
690746bb04fSAaron LI 			kprintf("bulkfree lastdrop %d %d error=0x%04x\n",
691746bb04fSAaron LI 				vchain->refs, vchain->core.chain_count, error);
692746bb04fSAaron LI 		} else {
693acbbd0efSMatthew Dillon 			if (error & HAMMER2_ERROR_CHECK) {
694acbbd0efSMatthew Dillon 				kprintf("bulkfree lastdrop %d %d "
695acbbd0efSMatthew Dillon 					"(with check errors)\n",
696acbbd0efSMatthew Dillon 					vchain->refs, vchain->core.chain_count);
697acbbd0efSMatthew Dillon 			} else {
698746bb04fSAaron LI 				kprintf("bulkfree lastdrop %d %d\n",
699746bb04fSAaron LI 					vchain->refs, vchain->core.chain_count);
700acbbd0efSMatthew Dillon 			}
701746bb04fSAaron LI 
702c8c0a18aSMatthew Dillon 			error = h2_bulkfree_sync(&cbinfo);
7033f01ebaaSMatthew Dillon 
7043f01ebaaSMatthew Dillon 			hammer2_voldata_lock(hmp);
7053f01ebaaSMatthew Dillon 			hammer2_voldata_modify(hmp);
7063f01ebaaSMatthew Dillon 			hmp->voldata.allocator_free += cbinfo.adj_free;
7073f01ebaaSMatthew Dillon 			hammer2_voldata_unlock(hmp);
7083f01ebaaSMatthew Dillon 		}
7093f01ebaaSMatthew Dillon 
7103f01ebaaSMatthew Dillon 		/*
7113f01ebaaSMatthew Dillon 		 * Cleanup for next loop.
7123f01ebaaSMatthew Dillon 		 */
713c88e038bSMatthew Dillon #ifdef HAMMER2_BULKFREE_TRANS
714257c2728SMatthew Dillon 		hammer2_trans_done(hmp->spmp, 0);
715c88e038bSMatthew Dillon #endif
716acbbd0efSMatthew Dillon 		if (error & ~HAMMER2_ERROR_CHECK)
7173f01ebaaSMatthew Dillon 			break;
7183f01ebaaSMatthew Dillon 		cbinfo.sbase = cbinfo.sstop;
719470dad14SMatthew Dillon 		cbinfo.adj_free = 0;
7203f01ebaaSMatthew Dillon 	}
7213f01ebaaSMatthew Dillon 	kmem_free_swapbacked(&cbinfo.kp);
722125966e8SMatthew Dillon 	kfree(cbinfo.dedup, M_HAMMER2);
723125966e8SMatthew Dillon 	cbinfo.dedup = NULL;
7243f01ebaaSMatthew Dillon 
7253f01ebaaSMatthew Dillon 	bfi->sstop = cbinfo.sbase;
7263f01ebaaSMatthew Dillon 
7270b738157STomohiro Kusumi 	incr = bfi->sstop / (hmp->total_size / 10000);
7283f01ebaaSMatthew Dillon 	if (incr > 10000)
7293f01ebaaSMatthew Dillon 		incr = 10000;
7303f01ebaaSMatthew Dillon 
7313f01ebaaSMatthew Dillon 	kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n",
7323f01ebaaSMatthew Dillon 		(int)incr / 100,
7333f01ebaaSMatthew Dillon 		(int)incr % 100);
7343f01ebaaSMatthew Dillon 
735acbbd0efSMatthew Dillon 	if (error & ~HAMMER2_ERROR_CHECK) {
7361e2c8208SMatthew Dillon 		kprintf("    bulkfree was aborted\n");
7371e2c8208SMatthew Dillon 	} else {
738acbbd0efSMatthew Dillon 		if (error & HAMMER2_ERROR_CHECK) {
739acbbd0efSMatthew Dillon 			kprintf("    WARNING: bulkfree "
740acbbd0efSMatthew Dillon 				"encountered CRC errors\n");
741acbbd0efSMatthew Dillon 		}
7423f01ebaaSMatthew Dillon 		kprintf("    transition->free   %ld\n", cbinfo.count_10_00);
7433f01ebaaSMatthew Dillon 		kprintf("    transition->staged %ld\n", cbinfo.count_11_10);
744b5b45574SMatthew Dillon 		kprintf("    ERR(00)->allocated %ld\n", cbinfo.count_00_11);
745b5b45574SMatthew Dillon 		kprintf("    ERR(01)->allocated %ld\n", cbinfo.count_01_11);
746b5b45574SMatthew Dillon 		kprintf("    staged->allocated  %ld\n", cbinfo.count_10_11);
747b0ac2d29STomohiro Kusumi 		kprintf("    ~4MB segs cleaned  %ld\n", cbinfo.count_l0cleans);
7481e2c8208SMatthew Dillon 		kprintf("    linear adjusts     %ld\n",
7491e2c8208SMatthew Dillon 			cbinfo.count_linadjusts);
7501e2c8208SMatthew Dillon 		kprintf("    dedup factor       %ld\n",
7511e2c8208SMatthew Dillon 			cbinfo.count_dedup_factor);
752784d105bSMatthew Dillon 		kprintf("    max saved chains   %ld\n", cbinfo.list_count_max);
7531e2c8208SMatthew Dillon 	}
7543f01ebaaSMatthew Dillon 
7551e2c8208SMatthew Dillon 	return error;
7563f01ebaaSMatthew Dillon }
7573f01ebaaSMatthew Dillon 
758470dad14SMatthew Dillon static void
cbinfo_bmap_init(hammer2_bulkfree_info_t * cbinfo,size_t size)759470dad14SMatthew Dillon cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size)
760470dad14SMatthew Dillon {
761470dad14SMatthew Dillon 	hammer2_bmap_data_t *bmap = cbinfo->bmap;
762470dad14SMatthew Dillon 	hammer2_key_t key = cbinfo->sbase;
763470dad14SMatthew Dillon 	hammer2_key_t lokey;
764470dad14SMatthew Dillon 	hammer2_key_t hikey;
765470dad14SMatthew Dillon 
766470dad14SMatthew Dillon 	lokey = (cbinfo->hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
767470dad14SMatthew Dillon 		~HAMMER2_SEGMASK64;
7680b738157STomohiro Kusumi 	hikey = cbinfo->hmp->total_size & ~HAMMER2_SEGMASK64;
769470dad14SMatthew Dillon 
770470dad14SMatthew Dillon 	bzero(bmap, size);
771470dad14SMatthew Dillon 	while (size) {
77265cacacfSMatthew Dillon 		bzero(bmap, sizeof(*bmap));
77365cacacfSMatthew Dillon 		if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX))
77465cacacfSMatthew Dillon 			lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
77565cacacfSMatthew Dillon 		if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64)
77665cacacfSMatthew Dillon 			lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64;
777470dad14SMatthew Dillon 		if (key < lokey || key >= hikey) {
778470dad14SMatthew Dillon 			memset(bmap->bitmapq, -1,
779470dad14SMatthew Dillon 			       sizeof(bmap->bitmapq));
780470dad14SMatthew Dillon 			bmap->avail = 0;
781470dad14SMatthew Dillon 			bmap->linear = HAMMER2_SEGSIZE;
782470dad14SMatthew Dillon 		} else {
7834e47c34eSTomohiro Kusumi 			bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
784470dad14SMatthew Dillon 		}
785470dad14SMatthew Dillon 		size -= sizeof(*bmap);
786470dad14SMatthew Dillon 		key += HAMMER2_FREEMAP_LEVEL0_SIZE;
787470dad14SMatthew Dillon 		++bmap;
788470dad14SMatthew Dillon 	}
789470dad14SMatthew Dillon }
790470dad14SMatthew Dillon 
7913f01ebaaSMatthew Dillon static int
h2_bulkfree_callback(hammer2_bulkfree_info_t * cbinfo,hammer2_blockref_t * bref)792125966e8SMatthew Dillon h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
7933f01ebaaSMatthew Dillon {
7943f01ebaaSMatthew Dillon 	hammer2_bmap_data_t *bmap;
7953f01ebaaSMatthew Dillon 	hammer2_off_t data_off;
7963f01ebaaSMatthew Dillon 	uint16_t class;
7973f01ebaaSMatthew Dillon 	size_t bytes;
7983f01ebaaSMatthew Dillon 	int radix;
7993f01ebaaSMatthew Dillon 
8003f01ebaaSMatthew Dillon 	/*
8014ff0b408SMatthew Dillon 	 * Check for signal and allow yield to userland during scan.
8023f01ebaaSMatthew Dillon 	 */
8033f01ebaaSMatthew Dillon 	if (hammer2_signal_check(&cbinfo->save_time))
8041e2c8208SMatthew Dillon 		return HAMMER2_ERROR_ABORTED;
8051e2c8208SMatthew Dillon 
8063f01ebaaSMatthew Dillon 	/*
8074ff0b408SMatthew Dillon 	 * Deal with kernel thread cpu or I/O hogging by limiting the
8084ff0b408SMatthew Dillon 	 * number of chains scanned per second to hammer2_bulkfree_tps.
8094ff0b408SMatthew Dillon 	 * Ignore leaf records (DIRENT and DATA), no per-record I/O is
8104ff0b408SMatthew Dillon 	 * involved for those since we don't load their data.
8114ff0b408SMatthew Dillon 	 */
8124ff0b408SMatthew Dillon 	if (bref->type != HAMMER2_BREF_TYPE_DATA &&
8134ff0b408SMatthew Dillon 	    bref->type != HAMMER2_BREF_TYPE_DIRENT) {
8144ff0b408SMatthew Dillon 		++cbinfo->bulkfree_calls;
8154ff0b408SMatthew Dillon 		if (cbinfo->bulkfree_calls > hammer2_bulkfree_tps) {
8164ff0b408SMatthew Dillon 			int dticks = ticks - cbinfo->bulkfree_ticks;
8174ff0b408SMatthew Dillon 			if (dticks < 0)
8184ff0b408SMatthew Dillon 				dticks = 0;
8194ff0b408SMatthew Dillon 			if (dticks < hz) {
8204ff0b408SMatthew Dillon 				tsleep(&cbinfo->bulkfree_ticks, 0,
8214ff0b408SMatthew Dillon 				       "h2bw", hz - dticks);
8224ff0b408SMatthew Dillon 			}
8234ff0b408SMatthew Dillon 			cbinfo->bulkfree_calls = 0;
8244ff0b408SMatthew Dillon 			cbinfo->bulkfree_ticks = ticks;
8254ff0b408SMatthew Dillon 		}
8264ff0b408SMatthew Dillon 	}
8274ff0b408SMatthew Dillon 
8284ff0b408SMatthew Dillon 	/*
8293f01ebaaSMatthew Dillon 	 * Calculate the data offset and determine if it is within
8303f01ebaaSMatthew Dillon 	 * the current freemap range being gathered.
8313f01ebaaSMatthew Dillon 	 */
832125966e8SMatthew Dillon 	data_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
833470dad14SMatthew Dillon 	if (data_off < cbinfo->sbase || data_off >= cbinfo->sstop)
8343f01ebaaSMatthew Dillon 		return 0;
835125966e8SMatthew Dillon 	if (data_off < cbinfo->hmp->voldata.allocator_beg)
8363f01ebaaSMatthew Dillon 		return 0;
8370b738157STomohiro Kusumi 	if (data_off >= cbinfo->hmp->total_size)
8383f01ebaaSMatthew Dillon 		return 0;
8393f01ebaaSMatthew Dillon 
8403f01ebaaSMatthew Dillon 	/*
8413f01ebaaSMatthew Dillon 	 * Calculate the information needed to generate the in-memory
8423f01ebaaSMatthew Dillon 	 * freemap record.
8433f01ebaaSMatthew Dillon 	 *
84463a8b78dSTomohiro Kusumi 	 * Hammer2 does not allow allocations to cross the L1 (1GB) boundary,
84563a8b78dSTomohiro Kusumi 	 * it's a problem if it does.  (Or L0 (4MB) for that matter).
8463f01ebaaSMatthew Dillon 	 */
847125966e8SMatthew Dillon 	radix = (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
848da0cdd33SMatthew Dillon 	KKASSERT(radix != 0);
8493f01ebaaSMatthew Dillon 	bytes = (size_t)1 << radix;
850ee78054eSTomohiro Kusumi 	class = (bref->type << 8) | HAMMER2_PBUFRADIX;
8513f01ebaaSMatthew Dillon 
85265cacacfSMatthew Dillon 	if (data_off + bytes > cbinfo->sstop) {
85363a8b78dSTomohiro Kusumi 		kprintf("hammer2_bulkfree_scan: illegal 1GB boundary "
8543f01ebaaSMatthew Dillon 			"%016jx %016jx/%d\n",
855125966e8SMatthew Dillon 			(intmax_t)bref->data_off,
856125966e8SMatthew Dillon 			(intmax_t)bref->key,
857125966e8SMatthew Dillon 			bref->keybits);
8583f01ebaaSMatthew Dillon 		bytes = cbinfo->sstop - data_off;	/* XXX */
8593f01ebaaSMatthew Dillon 	}
8603f01ebaaSMatthew Dillon 
8613f01ebaaSMatthew Dillon 	/*
8623f01ebaaSMatthew Dillon 	 * Convert to a storage offset relative to the beginning of the
8633f01ebaaSMatthew Dillon 	 * storage range we are collecting.  Then lookup the level0 bmap entry.
8643f01ebaaSMatthew Dillon 	 */
8653f01ebaaSMatthew Dillon 	data_off -= cbinfo->sbase;
8663f01ebaaSMatthew Dillon 	bmap = cbinfo->bmap + (data_off >> HAMMER2_FREEMAP_LEVEL0_RADIX);
8673f01ebaaSMatthew Dillon 
8683f01ebaaSMatthew Dillon 	/*
869ef5c6e68SMatthew Dillon 	 * Convert data_off to a bmap-relative value (~4MB storage range).
8703f01ebaaSMatthew Dillon 	 * Adjust linear, class, and avail.
8713f01ebaaSMatthew Dillon 	 *
872ef5c6e68SMatthew Dillon 	 * Hammer2 does not allow allocations to cross the L0 (4MB) boundary,
8733f01ebaaSMatthew Dillon 	 */
8743f01ebaaSMatthew Dillon 	data_off &= HAMMER2_FREEMAP_LEVEL0_MASK;
8753f01ebaaSMatthew Dillon 	if (data_off + bytes > HAMMER2_FREEMAP_LEVEL0_SIZE) {
876ef5c6e68SMatthew Dillon 		kprintf("hammer2_bulkfree_scan: illegal 4MB boundary "
8773f01ebaaSMatthew Dillon 			"%016jx %016jx/%d\n",
878125966e8SMatthew Dillon 			(intmax_t)bref->data_off,
879125966e8SMatthew Dillon 			(intmax_t)bref->key,
880125966e8SMatthew Dillon 			bref->keybits);
8813f01ebaaSMatthew Dillon 		bytes = HAMMER2_FREEMAP_LEVEL0_SIZE - data_off;
8823f01ebaaSMatthew Dillon 	}
8833f01ebaaSMatthew Dillon 
8843f01ebaaSMatthew Dillon 	if (bmap->class == 0) {
8853f01ebaaSMatthew Dillon 		bmap->class = class;
8863f01ebaaSMatthew Dillon 		bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
8873f01ebaaSMatthew Dillon 	}
88865cacacfSMatthew Dillon 
88965cacacfSMatthew Dillon 	/*
89065cacacfSMatthew Dillon 	 * NOTE: bmap->class does not have to match class.  Classification
89165cacacfSMatthew Dillon 	 *	 is relaxed when free space is low, so some mixing can occur.
89265cacacfSMatthew Dillon 	 */
89365cacacfSMatthew Dillon #if 0
89465cacacfSMatthew Dillon 	/*
89565cacacfSMatthew Dillon 	 * XXX removed
89665cacacfSMatthew Dillon 	 */
8973f01ebaaSMatthew Dillon 	if (bmap->class != class) {
8983f01ebaaSMatthew Dillon 		kprintf("hammer2_bulkfree_scan: illegal mixed class "
8993f01ebaaSMatthew Dillon 			"%016jx %016jx/%d (%04x vs %04x)\n",
900125966e8SMatthew Dillon 			(intmax_t)bref->data_off,
901125966e8SMatthew Dillon 			(intmax_t)bref->key,
902125966e8SMatthew Dillon 			bref->keybits,
9033f01ebaaSMatthew Dillon 			class, bmap->class);
9043f01ebaaSMatthew Dillon 	}
90565cacacfSMatthew Dillon #endif
906b49f72c3SMatthew Dillon 
907b49f72c3SMatthew Dillon 	/*
908b49f72c3SMatthew Dillon 	 * Just record the highest byte-granular offset for now.  Do not
909b49f72c3SMatthew Dillon 	 * match against allocations which are in multiples of whole blocks.
910b49f72c3SMatthew Dillon 	 *
911b49f72c3SMatthew Dillon 	 * Make sure that any in-block linear offset at least covers the
912b49f72c3SMatthew Dillon 	 * data range.  This can cause bmap->linear to become block-aligned.
913b49f72c3SMatthew Dillon 	 */
914b49f72c3SMatthew Dillon 	if (bytes & HAMMER2_FREEMAP_BLOCK_MASK) {
9153f01ebaaSMatthew Dillon 		if (bmap->linear < (int32_t)data_off + (int32_t)bytes)
9163f01ebaaSMatthew Dillon 			bmap->linear = (int32_t)data_off + (int32_t)bytes;
917b49f72c3SMatthew Dillon 	} else if (bmap->linear >= (int32_t)data_off &&
918b49f72c3SMatthew Dillon 		   bmap->linear < (int32_t)data_off + (int32_t)bytes) {
919b49f72c3SMatthew Dillon 		bmap->linear = (int32_t)data_off + (int32_t)bytes;
920b49f72c3SMatthew Dillon 	}
9213f01ebaaSMatthew Dillon 
9223f01ebaaSMatthew Dillon 	/*
9233f01ebaaSMatthew Dillon 	 * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS].
9243f01ebaaSMatthew Dillon 	 * 64-bit entries, 2 bits per entry, to code 11.
9253f01ebaaSMatthew Dillon 	 *
926ef5c6e68SMatthew Dillon 	 * NOTE: data_off mask to 524288, shift right by 14 (radix for 16384),
927ef5c6e68SMatthew Dillon 	 *	 and multiply shift amount by 2 for sets of 2 bits.
928ef5c6e68SMatthew Dillon 	 *
9293f01ebaaSMatthew Dillon 	 * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE.
930ef5c6e68SMatthew Dillon 	 *	 also, data_off may not be FREEMAP_BLOCK_SIZE aligned.
9313f01ebaaSMatthew Dillon 	 */
9323f01ebaaSMatthew Dillon 	while (bytes > 0) {
9333f01ebaaSMatthew Dillon 		hammer2_bitmap_t bmask;
934ef5c6e68SMatthew Dillon 		int bindex;
9353f01ebaaSMatthew Dillon 
9363f01ebaaSMatthew Dillon 		bindex = (int)data_off >> (HAMMER2_FREEMAP_BLOCK_RADIX +
9373f01ebaaSMatthew Dillon 					   HAMMER2_BMAP_INDEX_RADIX);
9383f01ebaaSMatthew Dillon 		bmask = (hammer2_bitmap_t)3 <<
9393f01ebaaSMatthew Dillon 			((((int)data_off & HAMMER2_BMAP_INDEX_MASK) >>
9403f01ebaaSMatthew Dillon 			 HAMMER2_FREEMAP_BLOCK_RADIX) << 1);
9413f01ebaaSMatthew Dillon 
9423f01ebaaSMatthew Dillon 		/*
9433f01ebaaSMatthew Dillon 		 * NOTE! The (avail) calculation is bitmap-granular.  Multiple
9443f01ebaaSMatthew Dillon 		 *	 sub-granular records can wind up at the same bitmap
9453f01ebaaSMatthew Dillon 		 *	 position.
9463f01ebaaSMatthew Dillon 		 */
9473f01ebaaSMatthew Dillon 		if ((bmap->bitmapq[bindex] & bmask) == 0) {
9483f01ebaaSMatthew Dillon 			if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) {
9493f01ebaaSMatthew Dillon 				bmap->avail -= HAMMER2_FREEMAP_BLOCK_SIZE;
9503f01ebaaSMatthew Dillon 			} else {
9513f01ebaaSMatthew Dillon 				bmap->avail -= bytes;
9523f01ebaaSMatthew Dillon 			}
9533f01ebaaSMatthew Dillon 			bmap->bitmapq[bindex] |= bmask;
9543f01ebaaSMatthew Dillon 		}
9553f01ebaaSMatthew Dillon 		data_off += HAMMER2_FREEMAP_BLOCK_SIZE;
9563f01ebaaSMatthew Dillon 		if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE)
9573f01ebaaSMatthew Dillon 			bytes = 0;
9583f01ebaaSMatthew Dillon 		else
9593f01ebaaSMatthew Dillon 			bytes -= HAMMER2_FREEMAP_BLOCK_SIZE;
9603f01ebaaSMatthew Dillon 	}
9611e2c8208SMatthew Dillon 	return 0;
9623f01ebaaSMatthew Dillon }
9633f01ebaaSMatthew Dillon 
9643f01ebaaSMatthew Dillon /*
9653f01ebaaSMatthew Dillon  * Synchronize the in-memory bitmap with the live freemap.  This is not a
9663f01ebaaSMatthew Dillon  * direct copy.  Instead the bitmaps must be compared:
9673f01ebaaSMatthew Dillon  *
9683f01ebaaSMatthew Dillon  *	In-memory	Live-freemap
9693f01ebaaSMatthew Dillon  *	   00		  11 -> 10	(do nothing if live modified)
9703f01ebaaSMatthew Dillon  *			  10 -> 00	(do nothing if live modified)
9713f01ebaaSMatthew Dillon  *	   11		  10 -> 11	handles race against live
9723f01ebaaSMatthew Dillon  *			  ** -> 11	nominally warn of corruption
9733f01ebaaSMatthew Dillon  *
97465cacacfSMatthew Dillon  * We must also fixup the hints in HAMMER2_BREF_TYPE_FREEMAP_LEAF.
9753f01ebaaSMatthew Dillon  */
976c8c0a18aSMatthew Dillon static int
h2_bulkfree_sync(hammer2_bulkfree_info_t * cbinfo)9773f01ebaaSMatthew Dillon h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
9783f01ebaaSMatthew Dillon {
9793f01ebaaSMatthew Dillon 	hammer2_off_t data_off;
9803f01ebaaSMatthew Dillon 	hammer2_key_t key;
9813f01ebaaSMatthew Dillon 	hammer2_key_t key_dummy;
9823f01ebaaSMatthew Dillon 	hammer2_bmap_data_t *bmap;
9833f01ebaaSMatthew Dillon 	hammer2_bmap_data_t *live;
9843f01ebaaSMatthew Dillon 	hammer2_chain_t *live_parent;
9853f01ebaaSMatthew Dillon 	hammer2_chain_t *live_chain;
9863f01ebaaSMatthew Dillon 	int bmapindex;
987c8c0a18aSMatthew Dillon 	int error;
9883f01ebaaSMatthew Dillon 
989470dad14SMatthew Dillon 	kprintf("hammer2_bulkfree - range ");
990470dad14SMatthew Dillon 
991470dad14SMatthew Dillon 	if (cbinfo->sbase < cbinfo->hmp->voldata.allocator_beg)
992470dad14SMatthew Dillon 		kprintf("%016jx-",
993470dad14SMatthew Dillon 			(intmax_t)cbinfo->hmp->voldata.allocator_beg);
994470dad14SMatthew Dillon 	else
995470dad14SMatthew Dillon 		kprintf("%016jx-",
996470dad14SMatthew Dillon 			(intmax_t)cbinfo->sbase);
997470dad14SMatthew Dillon 
9980b738157STomohiro Kusumi 	if (cbinfo->sstop > cbinfo->hmp->total_size)
999470dad14SMatthew Dillon 		kprintf("%016jx\n",
10000b738157STomohiro Kusumi 			(intmax_t)cbinfo->hmp->total_size);
1001470dad14SMatthew Dillon 	else
1002470dad14SMatthew Dillon 		kprintf("%016jx\n",
10033f01ebaaSMatthew Dillon 			(intmax_t)cbinfo->sstop);
10043f01ebaaSMatthew Dillon 
10053f01ebaaSMatthew Dillon 	data_off = cbinfo->sbase;
10063f01ebaaSMatthew Dillon 	bmap = cbinfo->bmap;
10073f01ebaaSMatthew Dillon 
10083f01ebaaSMatthew Dillon 	live_parent = &cbinfo->hmp->fchain;
10093f01ebaaSMatthew Dillon 	hammer2_chain_ref(live_parent);
10103f01ebaaSMatthew Dillon 	hammer2_chain_lock(live_parent, HAMMER2_RESOLVE_ALWAYS);
10113f01ebaaSMatthew Dillon 	live_chain = NULL;
1012c8c0a18aSMatthew Dillon 	error = 0;
10133f01ebaaSMatthew Dillon 
1014b5b45574SMatthew Dillon 	/*
1015b5b45574SMatthew Dillon 	 * Iterate each hammer2_bmap_data_t line (128 bytes) managing
1016b5b45574SMatthew Dillon 	 * 4MB of storage.
1017b5b45574SMatthew Dillon 	 */
10183f01ebaaSMatthew Dillon 	while (data_off < cbinfo->sstop) {
10193f01ebaaSMatthew Dillon 		/*
10203f01ebaaSMatthew Dillon 		 * The freemap is not used below allocator_beg or beyond
10210b738157STomohiro Kusumi 		 * total_size.
10223f01ebaaSMatthew Dillon 		 */
1023470dad14SMatthew Dillon 
10243f01ebaaSMatthew Dillon 		if (data_off < cbinfo->hmp->voldata.allocator_beg)
10253f01ebaaSMatthew Dillon 			goto next;
10260b738157STomohiro Kusumi 		if (data_off >= cbinfo->hmp->total_size)
10273f01ebaaSMatthew Dillon 			goto next;
10283f01ebaaSMatthew Dillon 
10293f01ebaaSMatthew Dillon 		/*
10303f01ebaaSMatthew Dillon 		 * Locate the freemap leaf on the live filesystem
10313f01ebaaSMatthew Dillon 		 */
10323f01ebaaSMatthew Dillon 		key = (data_off & ~HAMMER2_FREEMAP_LEVEL1_MASK);
1033470dad14SMatthew Dillon 
10343f01ebaaSMatthew Dillon 		if (live_chain == NULL || live_chain->bref.key != key) {
10353f01ebaaSMatthew Dillon 			if (live_chain) {
10363f01ebaaSMatthew Dillon 				hammer2_chain_unlock(live_chain);
10373f01ebaaSMatthew Dillon 				hammer2_chain_drop(live_chain);
10383f01ebaaSMatthew Dillon 			}
10393f01ebaaSMatthew Dillon 			live_chain = hammer2_chain_lookup(
10403f01ebaaSMatthew Dillon 					    &live_parent,
10413f01ebaaSMatthew Dillon 					    &key_dummy,
10423f01ebaaSMatthew Dillon 					    key,
10433f01ebaaSMatthew Dillon 					    key + HAMMER2_FREEMAP_LEVEL1_MASK,
1044c8c0a18aSMatthew Dillon 					    &error,
10453f01ebaaSMatthew Dillon 					    HAMMER2_LOOKUP_ALWAYS);
1046c8c0a18aSMatthew Dillon 			if (error) {
1047c8c0a18aSMatthew Dillon 				kprintf("hammer2_bulkfree: freemap lookup "
1048c8c0a18aSMatthew Dillon 					"error near %016jx, error %s\n",
1049c8c0a18aSMatthew Dillon 					(intmax_t)data_off,
1050c8c0a18aSMatthew Dillon 					hammer2_error_str(live_chain->error));
1051c8c0a18aSMatthew Dillon 				break;
1052ba38182bSMatthew Dillon 			}
10533f01ebaaSMatthew Dillon 		}
10543f01ebaaSMatthew Dillon 		if (live_chain == NULL) {
1055b5b45574SMatthew Dillon 			/*
1056b5b45574SMatthew Dillon 			 * XXX if we implement a full recovery mode we need
1057b5b45574SMatthew Dillon 			 * to create/recreate missing freemap chains if our
1058b5b45574SMatthew Dillon 			 * bmap has any allocated blocks.
1059b5b45574SMatthew Dillon 			 */
10603f01ebaaSMatthew Dillon 			if (bmap->class &&
10613f01ebaaSMatthew Dillon 			    bmap->avail != HAMMER2_FREEMAP_LEVEL0_SIZE) {
10623f01ebaaSMatthew Dillon 				kprintf("hammer2_bulkfree: cannot locate "
10633f01ebaaSMatthew Dillon 					"live leaf for allocated data "
10643f01ebaaSMatthew Dillon 					"near %016jx\n",
10653f01ebaaSMatthew Dillon 					(intmax_t)data_off);
10663f01ebaaSMatthew Dillon 			}
10673f01ebaaSMatthew Dillon 			goto next;
10683f01ebaaSMatthew Dillon 		}
10693f01ebaaSMatthew Dillon 		if (live_chain->error) {
1070c8c0a18aSMatthew Dillon 			kprintf("hammer2_bulkfree: unable to access freemap "
1071c8c0a18aSMatthew Dillon 				"near %016jx, error %s\n",
1072c8c0a18aSMatthew Dillon 				(intmax_t)data_off,
1073c8c0a18aSMatthew Dillon 				hammer2_error_str(live_chain->error));
10743f01ebaaSMatthew Dillon 			hammer2_chain_unlock(live_chain);
10753f01ebaaSMatthew Dillon 			hammer2_chain_drop(live_chain);
10763f01ebaaSMatthew Dillon 			live_chain = NULL;
10773f01ebaaSMatthew Dillon 			goto next;
10783f01ebaaSMatthew Dillon 		}
10793f01ebaaSMatthew Dillon 
10803f01ebaaSMatthew Dillon 		bmapindex = (data_off & HAMMER2_FREEMAP_LEVEL1_MASK) >>
10813f01ebaaSMatthew Dillon 			    HAMMER2_FREEMAP_LEVEL0_RADIX;
10823f01ebaaSMatthew Dillon 		live = &live_chain->data->bmdata[bmapindex];
10833f01ebaaSMatthew Dillon 
10843f01ebaaSMatthew Dillon 		/*
1085b49f72c3SMatthew Dillon 		 * Shortcut if the bitmaps match and the live linear
1086b49f72c3SMatthew Dillon 		 * indicator is sane.  We can't do a perfect check of
1087b49f72c3SMatthew Dillon 		 * live->linear because the only real requirement is that
1088b49f72c3SMatthew Dillon 		 * if it is not block-aligned, that it not cover the space
1089b49f72c3SMatthew Dillon 		 * within its current block which overlaps one of the data
1090b49f72c3SMatthew Dillon 		 * ranges we scan.  We don't retain enough fine-grained
1091b49f72c3SMatthew Dillon 		 * data in our scan to be able to set it exactly.
1092b49f72c3SMatthew Dillon 		 *
1093b5b45574SMatthew Dillon 		 * TODO - we could shortcut this by testing that both
1094b5b45574SMatthew Dillon 		 * live->class and bmap->class are 0, and both avails are
1095b5b45574SMatthew Dillon 		 * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB).
10963f01ebaaSMatthew Dillon 		 */
10973f01ebaaSMatthew Dillon 		if (bcmp(live->bitmapq, bmap->bitmapq,
1098b49f72c3SMatthew Dillon 			 sizeof(bmap->bitmapq)) == 0 &&
1099691854bdSMatthew Dillon 		    live->linear >= bmap->linear &&
1100691854bdSMatthew Dillon 		    (hammer2_aux_flags & 1) == 0 &&
1101691854bdSMatthew Dillon 		    bigmask_good(bmap, live_chain->bref.check.freemap.bigmask))
1102691854bdSMatthew Dillon 		{
11033f01ebaaSMatthew Dillon 			goto next;
11043f01ebaaSMatthew Dillon 		}
1105470dad14SMatthew Dillon 		if (hammer2_debug & 1) {
1106691854bdSMatthew Dillon 			kprintf("live %016jx %04d.%04x (avail=%d) "
1107691854bdSMatthew Dillon 				"bigmask %08x->%08x\n",
1108691854bdSMatthew Dillon 				data_off, bmapindex, live->class, live->avail,
1109691854bdSMatthew Dillon 				live_chain->bref.check.freemap.bigmask,
1110691854bdSMatthew Dillon 				live_chain->bref.check.freemap.bigmask |
1111691854bdSMatthew Dillon 				bigmask_get(bmap));
1112470dad14SMatthew Dillon 		}
11133f01ebaaSMatthew Dillon 
1114041c3a94SMatthew Dillon 		if (hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0)) {
1115041c3a94SMatthew Dillon 			kprintf("hammer2_bulkfree: unable to modify freemap "
1116041c3a94SMatthew Dillon 				"at %016jx for data-block %016jx, error %s\n",
1117041c3a94SMatthew Dillon 				live_chain->bref.data_off,
1118041c3a94SMatthew Dillon 				(intmax_t)data_off,
1119041c3a94SMatthew Dillon 				hammer2_error_str(live_chain->error));
1120041c3a94SMatthew Dillon 			hammer2_chain_unlock(live_chain);
1121041c3a94SMatthew Dillon 			hammer2_chain_drop(live_chain);
1122041c3a94SMatthew Dillon 			live_chain = NULL;
1123041c3a94SMatthew Dillon 			goto next;
1124041c3a94SMatthew Dillon 		}
112565cacacfSMatthew Dillon 		live_chain->bref.check.freemap.bigmask = -1;
112665cacacfSMatthew Dillon 		cbinfo->hmp->freemap_relaxed = 0;	/* reset heuristic */
1127470dad14SMatthew Dillon 		live = &live_chain->data->bmdata[bmapindex];
1128b5b45574SMatthew Dillon 
112985b1f267SMatthew Dillon 		h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
113085b1f267SMatthew Dillon 					live_chain->bref.key +
113185b1f267SMatthew Dillon 					bmapindex *
113285b1f267SMatthew Dillon 					HAMMER2_FREEMAP_LEVEL0_SIZE);
11333f01ebaaSMatthew Dillon next:
11343f01ebaaSMatthew Dillon 		data_off += HAMMER2_FREEMAP_LEVEL0_SIZE;
11353f01ebaaSMatthew Dillon 		++bmap;
11363f01ebaaSMatthew Dillon 	}
11373f01ebaaSMatthew Dillon 	if (live_chain) {
11383f01ebaaSMatthew Dillon 		hammer2_chain_unlock(live_chain);
11393f01ebaaSMatthew Dillon 		hammer2_chain_drop(live_chain);
11403f01ebaaSMatthew Dillon 	}
11413f01ebaaSMatthew Dillon 	if (live_parent) {
11423f01ebaaSMatthew Dillon 		hammer2_chain_unlock(live_parent);
11433f01ebaaSMatthew Dillon 		hammer2_chain_drop(live_parent);
11443f01ebaaSMatthew Dillon 	}
1145c8c0a18aSMatthew Dillon 	return error;
11463f01ebaaSMatthew Dillon }
11473f01ebaaSMatthew Dillon 
11483f01ebaaSMatthew Dillon /*
11493f01ebaaSMatthew Dillon  * Merge the bulkfree bitmap against the existing bitmap.
11503f01ebaaSMatthew Dillon  */
11513f01ebaaSMatthew Dillon static
11523f01ebaaSMatthew Dillon void
h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t * cbinfo,hammer2_off_t data_off,hammer2_bmap_data_t * live,hammer2_bmap_data_t * bmap,hammer2_key_t alloc_base)11533f01ebaaSMatthew Dillon h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
11547767d389SMatthew Dillon 			hammer2_off_t data_off, hammer2_bmap_data_t *live,
115585b1f267SMatthew Dillon 			hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base)
11563f01ebaaSMatthew Dillon {
11573f01ebaaSMatthew Dillon 	int bindex;
11583f01ebaaSMatthew Dillon 	int scount;
11593d4f397aSMatthew Dillon 	hammer2_off_t tmp_off;
11603f01ebaaSMatthew Dillon 	hammer2_bitmap_t lmask;
11613f01ebaaSMatthew Dillon 	hammer2_bitmap_t mmask;
11623f01ebaaSMatthew Dillon 
11633d4f397aSMatthew Dillon 	tmp_off = data_off;
11643d4f397aSMatthew Dillon 
11653f01ebaaSMatthew Dillon 	for (bindex = 0; bindex < HAMMER2_BMAP_ELEMENTS; ++bindex) {
11666b1f1befSMatthew Dillon 		lmask = live->bitmapq[bindex];	/* live */
11676b1f1befSMatthew Dillon 		mmask = bmap->bitmapq[bindex];	/* snapshotted bulkfree */
11683d4f397aSMatthew Dillon 		if (lmask == mmask) {
11693d4f397aSMatthew Dillon 			tmp_off += HAMMER2_BMAP_INDEX_SIZE;
11703f01ebaaSMatthew Dillon 			continue;
11713d4f397aSMatthew Dillon 		}
11723f01ebaaSMatthew Dillon 
11733f01ebaaSMatthew Dillon 		for (scount = 0;
11743f01ebaaSMatthew Dillon 		     scount < HAMMER2_BMAP_BITS_PER_ELEMENT;
11753f01ebaaSMatthew Dillon 		     scount += 2) {
11763f01ebaaSMatthew Dillon 			if ((mmask & 3) == 0) {
11773f01ebaaSMatthew Dillon 				/*
11783f01ebaaSMatthew Dillon 				 * in-memory 00		live 11 -> 10
11793f01ebaaSMatthew Dillon 				 *			live 10 -> 00
1180b5b45574SMatthew Dillon 				 *
1181b5b45574SMatthew Dillon 				 * Storage might be marked allocated or
1182b5b45574SMatthew Dillon 				 * staged and must be remarked staged or
1183b5b45574SMatthew Dillon 				 * free.
11843f01ebaaSMatthew Dillon 				 */
11853f01ebaaSMatthew Dillon 				switch (lmask & 3) {
11863f01ebaaSMatthew Dillon 				case 0:	/* 00 */
11873f01ebaaSMatthew Dillon 					break;
11883f01ebaaSMatthew Dillon 				case 1:	/* 01 */
11893f01ebaaSMatthew Dillon 					kprintf("hammer2_bulkfree: cannot "
11903f01ebaaSMatthew Dillon 						"transition m=00/l=01\n");
11913f01ebaaSMatthew Dillon 					break;
11923f01ebaaSMatthew Dillon 				case 2:	/* 10 -> 00 */
11933f01ebaaSMatthew Dillon 					live->bitmapq[bindex] &=
11943f01ebaaSMatthew Dillon 					    ~((hammer2_bitmap_t)2 << scount);
11953f01ebaaSMatthew Dillon 					live->avail +=
11963f01ebaaSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE;
1197b5b45574SMatthew Dillon 					if (live->avail >
1198b5b45574SMatthew Dillon 					    HAMMER2_FREEMAP_LEVEL0_SIZE) {
1199b5b45574SMatthew Dillon 						live->avail =
1200b5b45574SMatthew Dillon 						    HAMMER2_FREEMAP_LEVEL0_SIZE;
1201b5b45574SMatthew Dillon 					}
12023f01ebaaSMatthew Dillon 					cbinfo->adj_free +=
12033f01ebaaSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE;
12043f01ebaaSMatthew Dillon 					++cbinfo->count_10_00;
12050b8efeb7SMatthew Dillon 					hammer2_io_dedup_assert(
12063d4f397aSMatthew Dillon 						cbinfo->hmp,
12073d4f397aSMatthew Dillon 						tmp_off |
12083d4f397aSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_RADIX,
12093d4f397aSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE);
12103f01ebaaSMatthew Dillon 					break;
12113f01ebaaSMatthew Dillon 				case 3:	/* 11 -> 10 */
12123f01ebaaSMatthew Dillon 					live->bitmapq[bindex] &=
12133f01ebaaSMatthew Dillon 					    ~((hammer2_bitmap_t)1 << scount);
12143f01ebaaSMatthew Dillon 					++cbinfo->count_11_10;
12150b8efeb7SMatthew Dillon 					hammer2_io_dedup_delete(
12163d4f397aSMatthew Dillon 						cbinfo->hmp,
12170b8efeb7SMatthew Dillon 						HAMMER2_BREF_TYPE_DATA,
12183d4f397aSMatthew Dillon 						tmp_off |
12193d4f397aSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_RADIX,
12203d4f397aSMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE);
12213f01ebaaSMatthew Dillon 					break;
12223f01ebaaSMatthew Dillon 				}
1223b5b45574SMatthew Dillon 			} else if ((mmask & 3) == 3) {
12243f01ebaaSMatthew Dillon 				/*
12253f01ebaaSMatthew Dillon 				 * in-memory 11		live 10 -> 11
12263f01ebaaSMatthew Dillon 				 *			live ** -> 11
1227b5b45574SMatthew Dillon 				 *
1228b5b45574SMatthew Dillon 				 * Storage might be incorrectly marked free
1229b5b45574SMatthew Dillon 				 * or staged and must be remarked fully
1230b5b45574SMatthew Dillon 				 * allocated.
12313f01ebaaSMatthew Dillon 				 */
12323f01ebaaSMatthew Dillon 				switch (lmask & 3) {
12333f01ebaaSMatthew Dillon 				case 0:	/* 00 */
123439745314SMatthew Dillon 					/*
123539745314SMatthew Dillon 					 * This case is not supposed to
123639745314SMatthew Dillon 					 * happen.  If it does, it means
123739745314SMatthew Dillon 					 * that an allocated block was
123839745314SMatthew Dillon 					 * thought by the filesystem to be
123939745314SMatthew Dillon 					 * free.
124039745314SMatthew Dillon 					 */
124139745314SMatthew Dillon 					kprintf("hammer2_bulkfree: "
124239745314SMatthew Dillon 						"00->11 critical freemap "
124339745314SMatthew Dillon 						"transition for datablock "
124439745314SMatthew Dillon 						"%016jx\n",
124539745314SMatthew Dillon 						tmp_off);
1246b5b45574SMatthew Dillon 					++cbinfo->count_00_11;
1247b5b45574SMatthew Dillon 					cbinfo->adj_free -=
1248b5b45574SMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE;
1249b5b45574SMatthew Dillon 					live->avail -=
1250b5b45574SMatthew Dillon 						HAMMER2_FREEMAP_BLOCK_SIZE;
1251b5b45574SMatthew Dillon 					if ((int32_t)live->avail < 0)
1252b5b45574SMatthew Dillon 						live->avail = 0;
12533f01ebaaSMatthew Dillon 					break;
12543f01ebaaSMatthew Dillon 				case 1:	/* 01 */
1255b5b45574SMatthew Dillon 					++cbinfo->count_01_11;
12563f01ebaaSMatthew Dillon 					break;
12573f01ebaaSMatthew Dillon 				case 2:	/* 10 -> 11 */
12583f01ebaaSMatthew Dillon 					++cbinfo->count_10_11;
12593f01ebaaSMatthew Dillon 					break;
12603f01ebaaSMatthew Dillon 				case 3:	/* 11 */
12613f01ebaaSMatthew Dillon 					break;
12623f01ebaaSMatthew Dillon 				}
1263b5b45574SMatthew Dillon 				live->bitmapq[bindex] |=
1264b5b45574SMatthew Dillon 					((hammer2_bitmap_t)3 << scount);
12653f01ebaaSMatthew Dillon 			}
12663f01ebaaSMatthew Dillon 			mmask >>= 2;
12673f01ebaaSMatthew Dillon 			lmask >>= 2;
12683d4f397aSMatthew Dillon 			tmp_off += HAMMER2_FREEMAP_BLOCK_SIZE;
12693f01ebaaSMatthew Dillon 		}
12703f01ebaaSMatthew Dillon 	}
12713f01ebaaSMatthew Dillon 
12723f01ebaaSMatthew Dillon 	/*
12733f01ebaaSMatthew Dillon 	 * Determine if the live bitmap is completely free and reset its
12743f01ebaaSMatthew Dillon 	 * fields if so.  Otherwise check to see if we can reduce the linear
12753f01ebaaSMatthew Dillon 	 * offset.
12763f01ebaaSMatthew Dillon 	 */
12773f01ebaaSMatthew Dillon 	for (bindex = HAMMER2_BMAP_ELEMENTS - 1; bindex >= 0; --bindex) {
12783f01ebaaSMatthew Dillon 		if (live->bitmapq[bindex] != 0)
12793f01ebaaSMatthew Dillon 			break;
12803f01ebaaSMatthew Dillon 	}
1281b49f72c3SMatthew Dillon 	if (bindex < 0) {
1282b49f72c3SMatthew Dillon 		/*
1283b49f72c3SMatthew Dillon 		 * Completely empty, reset entire segment
1284b49f72c3SMatthew Dillon 		 */
128585b1f267SMatthew Dillon #if 0
128685b1f267SMatthew Dillon 		kprintf("hammer2: cleanseg %016jx.%04x (%d)\n",
128785b1f267SMatthew Dillon 			alloc_base, live->class, live->avail);
128885b1f267SMatthew Dillon #endif
12893f01ebaaSMatthew Dillon 		live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
12903f01ebaaSMatthew Dillon 		live->class = 0;
12913f01ebaaSMatthew Dillon 		live->linear = 0;
12923f01ebaaSMatthew Dillon 		++cbinfo->count_l0cleans;
12933f01ebaaSMatthew Dillon 	} else if (bindex < 7) {
1294b5b45574SMatthew Dillon 		/*
129583815ec6SMatthew Dillon 		 * Partially full, bitmapq[bindex] != 0.  Our bulkfree pass
129683815ec6SMatthew Dillon 		 * does not record enough information to set live->linear
129783815ec6SMatthew Dillon 		 * exactly.
1298b49f72c3SMatthew Dillon 		 *
129983815ec6SMatthew Dillon 		 * NOTE: Setting live->linear to a sub-block (16K) boundary
130083815ec6SMatthew Dillon 		 *	 forces the live code to iterate to the next fully
130183815ec6SMatthew Dillon 		 *	 free block.  It does NOT mean that all blocks above
130283815ec6SMatthew Dillon 		 *	 live->linear are available.
1303b49f72c3SMatthew Dillon 		 *
130483815ec6SMatthew Dillon 		 *	 Setting live->linear to a fragmentary (less than
130583815ec6SMatthew Dillon 		 *	 16K) boundary allows allocations to iterate within
130683815ec6SMatthew Dillon 		 *	 that sub-block.
1307b5b45574SMatthew Dillon 		 */
1308b49f72c3SMatthew Dillon 		if (live->linear < bmap->linear &&
1309b49f72c3SMatthew Dillon 		    ((live->linear ^ bmap->linear) &
1310b49f72c3SMatthew Dillon 		     ~HAMMER2_FREEMAP_BLOCK_MASK) == 0) {
131183815ec6SMatthew Dillon 			/*
131283815ec6SMatthew Dillon 			 * If greater than but still within the same
131383815ec6SMatthew Dillon 			 * sub-block as live we can adjust linear upward.
131483815ec6SMatthew Dillon 			 */
1315b49f72c3SMatthew Dillon 			live->linear = bmap->linear;
1316b49f72c3SMatthew Dillon 			++cbinfo->count_linadjusts;
1317b49f72c3SMatthew Dillon 		} else {
131883815ec6SMatthew Dillon 			/*
131983815ec6SMatthew Dillon 			 * Otherwise adjust to the nearest higher or same
132083815ec6SMatthew Dillon 			 * sub-block boundary.  The live system may have
132183815ec6SMatthew Dillon 			 * bounced live->linear around so we cannot make any
132283815ec6SMatthew Dillon 			 * assumptions with regards to available fragmentary
132383815ec6SMatthew Dillon 			 * allocations.
132483815ec6SMatthew Dillon 			 */
1325b49f72c3SMatthew Dillon 			live->linear =
1326b49f72c3SMatthew Dillon 				(bmap->linear + HAMMER2_FREEMAP_BLOCK_MASK) &
1327b49f72c3SMatthew Dillon 				~HAMMER2_FREEMAP_BLOCK_MASK;
1328b5b45574SMatthew Dillon 			++cbinfo->count_linadjusts;
1329b5b45574SMatthew Dillon 		}
1330b5b45574SMatthew Dillon 	} else {
1331b49f72c3SMatthew Dillon 		/*
1332b49f72c3SMatthew Dillon 		 * Completely full, effectively disable the linear iterator
1333b49f72c3SMatthew Dillon 		 */
1334b5b45574SMatthew Dillon 		live->linear = HAMMER2_SEGSIZE;
13353f01ebaaSMatthew Dillon 	}
13363f01ebaaSMatthew Dillon 
13373f01ebaaSMatthew Dillon #if 0
13383f01ebaaSMatthew Dillon 	if (bmap->class) {
13393f01ebaaSMatthew Dillon 		kprintf("%016jx %04d.%04x (avail=%7d) "
13403f01ebaaSMatthew Dillon 			"%08x %08x %08x %08x %08x %08x %08x %08x\n",
13413f01ebaaSMatthew Dillon 			(intmax_t)data_off,
13423f01ebaaSMatthew Dillon 			(int)((data_off &
13433f01ebaaSMatthew Dillon 			       HAMMER2_FREEMAP_LEVEL1_MASK) >>
13443f01ebaaSMatthew Dillon 			      HAMMER2_FREEMAP_LEVEL0_RADIX),
13453f01ebaaSMatthew Dillon 			bmap->class,
13463f01ebaaSMatthew Dillon 			bmap->avail,
13473f01ebaaSMatthew Dillon 			bmap->bitmap[0], bmap->bitmap[1],
13483f01ebaaSMatthew Dillon 			bmap->bitmap[2], bmap->bitmap[3],
13493f01ebaaSMatthew Dillon 			bmap->bitmap[4], bmap->bitmap[5],
13503f01ebaaSMatthew Dillon 			bmap->bitmap[6], bmap->bitmap[7]);
13513f01ebaaSMatthew Dillon 	}
13523f01ebaaSMatthew Dillon #endif
13533f01ebaaSMatthew Dillon }
1354125966e8SMatthew Dillon 
1355125966e8SMatthew Dillon /*
1356125966e8SMatthew Dillon  * BULKFREE DEDUP HEURISTIC
1357125966e8SMatthew Dillon  *
1358125966e8SMatthew Dillon  * WARNING! This code is SMP safe but the heuristic allows SMP collisions.
1359125966e8SMatthew Dillon  *	    All fields must be loaded into locals and validated.
1360125966e8SMatthew Dillon  */
1361125966e8SMatthew Dillon static
1362125966e8SMatthew Dillon int
h2_bulkfree_test(hammer2_bulkfree_info_t * cbinfo,hammer2_blockref_t * bref,int pri,int saved_error)1363125966e8SMatthew Dillon h2_bulkfree_test(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref,
1364acbbd0efSMatthew Dillon 		 int pri, int saved_error)
1365125966e8SMatthew Dillon {
1366125966e8SMatthew Dillon 	hammer2_dedup_t *dedup;
1367125966e8SMatthew Dillon 	int best;
1368125966e8SMatthew Dillon 	int n;
1369125966e8SMatthew Dillon 	int i;
1370125966e8SMatthew Dillon 
1371125966e8SMatthew Dillon 	n = hammer2_icrc32(&bref->data_off, sizeof(bref->data_off));
1372125966e8SMatthew Dillon 	dedup = cbinfo->dedup + (n & (HAMMER2_DEDUP_HEUR_MASK & ~7));
1373125966e8SMatthew Dillon 
1374125966e8SMatthew Dillon 	for (i = best = 0; i < 8; ++i) {
1375125966e8SMatthew Dillon 		if (dedup[i].data_off == bref->data_off) {
1376125966e8SMatthew Dillon 			if (dedup[i].ticks < pri)
1377125966e8SMatthew Dillon 				dedup[i].ticks = pri;
1378125966e8SMatthew Dillon 			if (pri == 1)
1379125966e8SMatthew Dillon 				cbinfo->count_dedup_factor += dedup[i].ticks;
1380acbbd0efSMatthew Dillon 			return (dedup[i].saved_error | HAMMER2_ERROR_EOF);
1381125966e8SMatthew Dillon 		}
1382125966e8SMatthew Dillon 		if (dedup[i].ticks < dedup[best].ticks)
1383125966e8SMatthew Dillon 			best = i;
1384125966e8SMatthew Dillon 	}
1385125966e8SMatthew Dillon 	dedup[best].data_off = bref->data_off;
1386125966e8SMatthew Dillon 	dedup[best].ticks = pri;
1387acbbd0efSMatthew Dillon 	dedup[best].saved_error = saved_error;
1388125966e8SMatthew Dillon 
1389125966e8SMatthew Dillon 	return 0;
1390125966e8SMatthew Dillon }
1391691854bdSMatthew Dillon 
1392691854bdSMatthew Dillon /*
1393691854bdSMatthew Dillon  * Calculate what the bigmask should be.  bigmask is permissive, so the
1394691854bdSMatthew Dillon  * bits returned must be set at a minimum in the live bigmask.  Other bits
1395691854bdSMatthew Dillon  * might also be set in the live bigmask.
1396691854bdSMatthew Dillon  */
1397691854bdSMatthew Dillon static uint32_t
bigmask_get(hammer2_bmap_data_t * bmap)1398691854bdSMatthew Dillon bigmask_get(hammer2_bmap_data_t *bmap)
1399691854bdSMatthew Dillon {
1400691854bdSMatthew Dillon 	hammer2_bitmap_t mask;	/* 64-bit mask to check */
1401691854bdSMatthew Dillon 	hammer2_bitmap_t scan;
1402691854bdSMatthew Dillon 	uint32_t bigmask;
1403691854bdSMatthew Dillon 	uint32_t radix_mask;
1404691854bdSMatthew Dillon 	int iter;
1405691854bdSMatthew Dillon 	int i;
1406691854bdSMatthew Dillon 	int j;
1407691854bdSMatthew Dillon 
1408691854bdSMatthew Dillon 	bigmask = 0;
1409691854bdSMatthew Dillon 	for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) {
1410691854bdSMatthew Dillon 		mask = bmap->bitmapq[i];
1411691854bdSMatthew Dillon 
1412691854bdSMatthew Dillon 		radix_mask = 1U << HAMMER2_FREEMAP_BLOCK_RADIX;
1413691854bdSMatthew Dillon 		radix_mask |= radix_mask - 1;
1414691854bdSMatthew Dillon 		iter = 2;	/* each bitmap entry is 2 bits. 2, 4, 8... */
1415691854bdSMatthew Dillon 		while (iter <= HAMMER2_BMAP_BITS_PER_ELEMENT) {
1416691854bdSMatthew Dillon 			if (iter == HAMMER2_BMAP_BITS_PER_ELEMENT)
1417691854bdSMatthew Dillon 				scan = -1;
1418691854bdSMatthew Dillon 			else
1419691854bdSMatthew Dillon 				scan = (1LU << iter) - 1;
1420691854bdSMatthew Dillon 			j = 0;
1421691854bdSMatthew Dillon 			while (j < HAMMER2_BMAP_BITS_PER_ELEMENT) {
1422691854bdSMatthew Dillon 				/*
1423691854bdSMatthew Dillon 				 * Check if all bits are 0 (free block).
1424691854bdSMatthew Dillon 				 * If so, set the bit in bigmask for the
1425691854bdSMatthew Dillon 				 * allocation radix under test.
1426691854bdSMatthew Dillon 				 */
1427691854bdSMatthew Dillon 				if ((scan & mask) == 0) {
1428691854bdSMatthew Dillon 					bigmask |= radix_mask;
1429691854bdSMatthew Dillon 				}
1430691854bdSMatthew Dillon 				scan <<= iter;
1431691854bdSMatthew Dillon 				j += iter;
1432691854bdSMatthew Dillon 			}
1433691854bdSMatthew Dillon 			iter <<= 1;
1434691854bdSMatthew Dillon 			radix_mask = (radix_mask << 1) | 1;
1435691854bdSMatthew Dillon 		}
1436691854bdSMatthew Dillon 	}
1437691854bdSMatthew Dillon 	return bigmask;
1438691854bdSMatthew Dillon }
1439691854bdSMatthew Dillon 
1440691854bdSMatthew Dillon static int
bigmask_good(hammer2_bmap_data_t * bmap,uint32_t live_bigmask)1441691854bdSMatthew Dillon bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask)
1442691854bdSMatthew Dillon {
1443691854bdSMatthew Dillon 	uint32_t bigmask;
1444691854bdSMatthew Dillon 
1445691854bdSMatthew Dillon 	bigmask = bigmask_get(bmap);
1446691854bdSMatthew Dillon 	return ((live_bigmask & bigmask) == bigmask);
1447691854bdSMatthew Dillon }
1448