1*a63188c8STomohiro Kusumi /*
2*a63188c8STomohiro Kusumi * SPDX-License-Identifier: BSD-3-Clause
3*a63188c8STomohiro Kusumi *
4*a63188c8STomohiro Kusumi * Copyright (c) 2023 Tomohiro Kusumi <tkusumi@netbsd.org>
5*a63188c8STomohiro Kusumi * Copyright (c) 2011-2023 The DragonFly Project. All rights reserved.
6*a63188c8STomohiro Kusumi *
7*a63188c8STomohiro Kusumi * This code is derived from software contributed to The DragonFly Project
8*a63188c8STomohiro Kusumi * by Matthew Dillon <dillon@dragonflybsd.org>
9*a63188c8STomohiro Kusumi *
10*a63188c8STomohiro Kusumi * Redistribution and use in source and binary forms, with or without
11*a63188c8STomohiro Kusumi * modification, are permitted provided that the following conditions
12*a63188c8STomohiro Kusumi * are met:
13*a63188c8STomohiro Kusumi *
14*a63188c8STomohiro Kusumi * 1. Redistributions of source code must retain the above copyright
15*a63188c8STomohiro Kusumi * notice, this list of conditions and the following disclaimer.
16*a63188c8STomohiro Kusumi * 2. Redistributions in binary form must reproduce the above copyright
17*a63188c8STomohiro Kusumi * notice, this list of conditions and the following disclaimer in
18*a63188c8STomohiro Kusumi * the documentation and/or other materials provided with the
19*a63188c8STomohiro Kusumi * distribution.
20*a63188c8STomohiro Kusumi * 3. Neither the name of The DragonFly Project nor the names of its
21*a63188c8STomohiro Kusumi * contributors may be used to endorse or promote products derived
22*a63188c8STomohiro Kusumi * from this software without specific, prior written permission.
23*a63188c8STomohiro Kusumi *
24*a63188c8STomohiro Kusumi * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25*a63188c8STomohiro Kusumi * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26*a63188c8STomohiro Kusumi * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
27*a63188c8STomohiro Kusumi * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
28*a63188c8STomohiro Kusumi * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
29*a63188c8STomohiro Kusumi * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
30*a63188c8STomohiro Kusumi * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
31*a63188c8STomohiro Kusumi * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
32*a63188c8STomohiro Kusumi * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
33*a63188c8STomohiro Kusumi * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
34*a63188c8STomohiro Kusumi * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35*a63188c8STomohiro Kusumi * SUCH DAMAGE.
36*a63188c8STomohiro Kusumi */
37*a63188c8STomohiro Kusumi /*
38*a63188c8STomohiro Kusumi #include <sys/param.h>
39*a63188c8STomohiro Kusumi #include <sys/systm.h>
40*a63188c8STomohiro Kusumi #include <sys/kernel.h>
41*a63188c8STomohiro Kusumi #include <sys/proc.h>
42*a63188c8STomohiro Kusumi #include <sys/mount.h>
43*a63188c8STomohiro Kusumi #include <vm/vm_kern.h>
44*a63188c8STomohiro Kusumi #include <vm/vm_extern.h>
45*a63188c8STomohiro Kusumi */
46*a63188c8STomohiro Kusumi
47*a63188c8STomohiro Kusumi #include "hammer2.h"
48*a63188c8STomohiro Kusumi
49*a63188c8STomohiro Kusumi /*
50*a63188c8STomohiro Kusumi * breadth-first search
51*a63188c8STomohiro Kusumi */
52*a63188c8STomohiro Kusumi typedef struct hammer2_chain_save {
53*a63188c8STomohiro Kusumi TAILQ_ENTRY(hammer2_chain_save) entry;
54*a63188c8STomohiro Kusumi hammer2_chain_t *chain;
55*a63188c8STomohiro Kusumi } hammer2_chain_save_t;
56*a63188c8STomohiro Kusumi
57*a63188c8STomohiro Kusumi TAILQ_HEAD(hammer2_chain_save_list, hammer2_chain_save);
58*a63188c8STomohiro Kusumi typedef struct hammer2_chain_save_list hammer2_chain_save_list_t;
59*a63188c8STomohiro Kusumi
60*a63188c8STomohiro Kusumi typedef struct hammer2_bulkfree_info {
61*a63188c8STomohiro Kusumi hammer2_dev_t *hmp;
62*a63188c8STomohiro Kusumi //kmem_anon_desc_t kp;
63*a63188c8STomohiro Kusumi hammer2_off_t sbase; /* sub-loop iteration */
64*a63188c8STomohiro Kusumi hammer2_off_t sstop;
65*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap;
66*a63188c8STomohiro Kusumi int depth;
67*a63188c8STomohiro Kusumi long count_10_00; /* staged->free */
68*a63188c8STomohiro Kusumi long count_11_10; /* allocated->staged */
69*a63188c8STomohiro Kusumi long count_00_11; /* (should not happen) */
70*a63188c8STomohiro Kusumi long count_01_11; /* (should not happen) */
71*a63188c8STomohiro Kusumi long count_10_11; /* staged->allocated */
72*a63188c8STomohiro Kusumi long count_l0cleans;
73*a63188c8STomohiro Kusumi long count_linadjusts;
74*a63188c8STomohiro Kusumi long count_inodes_scanned;
75*a63188c8STomohiro Kusumi long count_dirents_scanned;
76*a63188c8STomohiro Kusumi long count_dedup_factor;
77*a63188c8STomohiro Kusumi long count_bytes_scanned;
78*a63188c8STomohiro Kusumi long count_chains_scanned;
79*a63188c8STomohiro Kusumi long count_chains_reported;
80*a63188c8STomohiro Kusumi long bulkfree_calls;
81*a63188c8STomohiro Kusumi int bulkfree_ticks;
82*a63188c8STomohiro Kusumi int list_alert;
83*a63188c8STomohiro Kusumi hammer2_off_t adj_free;
84*a63188c8STomohiro Kusumi hammer2_tid_t mtid;
85*a63188c8STomohiro Kusumi time_t save_time;
86*a63188c8STomohiro Kusumi hammer2_chain_save_list_t list;
87*a63188c8STomohiro Kusumi long list_count;
88*a63188c8STomohiro Kusumi long list_count_max;
89*a63188c8STomohiro Kusumi hammer2_chain_save_t *backout; /* ins pt while backing out */
90*a63188c8STomohiro Kusumi hammer2_dedup_t *dedup;
91*a63188c8STomohiro Kusumi int pri;
92*a63188c8STomohiro Kusumi } hammer2_bulkfree_info_t;
93*a63188c8STomohiro Kusumi
94*a63188c8STomohiro Kusumi static int h2_bulkfree_test(hammer2_bulkfree_info_t *info,
95*a63188c8STomohiro Kusumi hammer2_blockref_t *bref, int pri, int saved_error);
96*a63188c8STomohiro Kusumi static uint32_t bigmask_get(hammer2_bmap_data_t *bmap);
97*a63188c8STomohiro Kusumi static int bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask);
98*a63188c8STomohiro Kusumi
99*a63188c8STomohiro Kusumi /*
100*a63188c8STomohiro Kusumi * General bulk scan function with callback. Called with a referenced
101*a63188c8STomohiro Kusumi * but UNLOCKED parent. The parent is returned in the same state.
102*a63188c8STomohiro Kusumi */
103*a63188c8STomohiro Kusumi static
104*a63188c8STomohiro Kusumi int
hammer2_bulkfree_scan(hammer2_chain_t * parent,int (* func)(hammer2_bulkfree_info_t * info,hammer2_blockref_t * bref),hammer2_bulkfree_info_t * info)105*a63188c8STomohiro Kusumi hammer2_bulkfree_scan(hammer2_chain_t *parent,
106*a63188c8STomohiro Kusumi int (*func)(hammer2_bulkfree_info_t *info,
107*a63188c8STomohiro Kusumi hammer2_blockref_t *bref),
108*a63188c8STomohiro Kusumi hammer2_bulkfree_info_t *info)
109*a63188c8STomohiro Kusumi {
110*a63188c8STomohiro Kusumi hammer2_blockref_t bref;
111*a63188c8STomohiro Kusumi hammer2_chain_t *chain;
112*a63188c8STomohiro Kusumi hammer2_chain_save_t *tail;
113*a63188c8STomohiro Kusumi hammer2_chain_save_t *save;
114*a63188c8STomohiro Kusumi int first = 1;
115*a63188c8STomohiro Kusumi int rup_error;
116*a63188c8STomohiro Kusumi int error;
117*a63188c8STomohiro Kusumi int e2;
118*a63188c8STomohiro Kusumi
119*a63188c8STomohiro Kusumi ++info->pri;
120*a63188c8STomohiro Kusumi
121*a63188c8STomohiro Kusumi chain = NULL;
122*a63188c8STomohiro Kusumi rup_error = 0;
123*a63188c8STomohiro Kusumi error = 0;
124*a63188c8STomohiro Kusumi
125*a63188c8STomohiro Kusumi hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS |
126*a63188c8STomohiro Kusumi HAMMER2_RESOLVE_SHARED);
127*a63188c8STomohiro Kusumi
128*a63188c8STomohiro Kusumi /*
129*a63188c8STomohiro Kusumi * End of scan if parent is a PFS
130*a63188c8STomohiro Kusumi */
131*a63188c8STomohiro Kusumi tail = TAILQ_FIRST(&info->list);
132*a63188c8STomohiro Kusumi
133*a63188c8STomohiro Kusumi /*
134*a63188c8STomohiro Kusumi * The parent was previously retrieved NODATA and thus has not
135*a63188c8STomohiro Kusumi * tested the CRC. Now that we have locked it normally, check
136*a63188c8STomohiro Kusumi * for a CRC problem and skip it if we found one. The bulk scan
137*a63188c8STomohiro Kusumi * cannot safely traverse invalid block tables (we could end up
138*a63188c8STomohiro Kusumi * in an endless loop or cause a panic).
139*a63188c8STomohiro Kusumi */
140*a63188c8STomohiro Kusumi if (parent->error & HAMMER2_ERROR_CHECK) {
141*a63188c8STomohiro Kusumi error = parent->error;
142*a63188c8STomohiro Kusumi goto done;
143*a63188c8STomohiro Kusumi }
144*a63188c8STomohiro Kusumi
145*a63188c8STomohiro Kusumi /*
146*a63188c8STomohiro Kusumi * Report which PFS is being scanned
147*a63188c8STomohiro Kusumi */
148*a63188c8STomohiro Kusumi if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
149*a63188c8STomohiro Kusumi (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) {
150*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: Scanning %s\n",
151*a63188c8STomohiro Kusumi parent->data->ipdata.filename);
152*a63188c8STomohiro Kusumi }
153*a63188c8STomohiro Kusumi
154*a63188c8STomohiro Kusumi /*
155*a63188c8STomohiro Kusumi * Generally loop on the contents if we have not been flagged
156*a63188c8STomohiro Kusumi * for abort.
157*a63188c8STomohiro Kusumi *
158*a63188c8STomohiro Kusumi * Remember that these chains are completely isolated from
159*a63188c8STomohiro Kusumi * the frontend, so we can release locks temporarily without
160*a63188c8STomohiro Kusumi * imploding.
161*a63188c8STomohiro Kusumi */
162*a63188c8STomohiro Kusumi for (;;) {
163*a63188c8STomohiro Kusumi error |= hammer2_chain_scan(parent, &chain, &bref, &first,
164*a63188c8STomohiro Kusumi HAMMER2_LOOKUP_NODATA |
165*a63188c8STomohiro Kusumi HAMMER2_LOOKUP_SHARED);
166*a63188c8STomohiro Kusumi
167*a63188c8STomohiro Kusumi /*
168*a63188c8STomohiro Kusumi * Handle EOF or other error at current level. This stops
169*a63188c8STomohiro Kusumi * the bulkfree scan.
170*a63188c8STomohiro Kusumi */
171*a63188c8STomohiro Kusumi if (error & ~HAMMER2_ERROR_CHECK)
172*a63188c8STomohiro Kusumi break;
173*a63188c8STomohiro Kusumi
174*a63188c8STomohiro Kusumi /*
175*a63188c8STomohiro Kusumi * Account for dirents before thre data_off test, since most
176*a63188c8STomohiro Kusumi * dirents do not need a data reference.
177*a63188c8STomohiro Kusumi */
178*a63188c8STomohiro Kusumi if (bref.type == HAMMER2_BREF_TYPE_DIRENT)
179*a63188c8STomohiro Kusumi ++info->count_dirents_scanned;
180*a63188c8STomohiro Kusumi
181*a63188c8STomohiro Kusumi /*
182*a63188c8STomohiro Kusumi * Ignore brefs without data (typically dirents)
183*a63188c8STomohiro Kusumi */
184*a63188c8STomohiro Kusumi if ((bref.data_off & ~HAMMER2_OFF_MASK_RADIX) == 0)
185*a63188c8STomohiro Kusumi continue;
186*a63188c8STomohiro Kusumi
187*a63188c8STomohiro Kusumi /*
188*a63188c8STomohiro Kusumi * Process bref, chain is only non-NULL if the bref
189*a63188c8STomohiro Kusumi * might be recursable (its possible that we sometimes get
190*a63188c8STomohiro Kusumi * a non-NULL chain where the bref cannot be recursed).
191*a63188c8STomohiro Kusumi *
192*a63188c8STomohiro Kusumi * If we already ran down this tree we do not have to do it
193*a63188c8STomohiro Kusumi * again, but we must still recover any cumulative error
194*a63188c8STomohiro Kusumi * recorded from the time we did.
195*a63188c8STomohiro Kusumi */
196*a63188c8STomohiro Kusumi ++info->pri;
197*a63188c8STomohiro Kusumi e2 = h2_bulkfree_test(info, &bref, 1, 0);
198*a63188c8STomohiro Kusumi if (e2) {
199*a63188c8STomohiro Kusumi error |= e2 & ~HAMMER2_ERROR_EOF;
200*a63188c8STomohiro Kusumi continue;
201*a63188c8STomohiro Kusumi }
202*a63188c8STomohiro Kusumi
203*a63188c8STomohiro Kusumi if (bref.type == HAMMER2_BREF_TYPE_INODE)
204*a63188c8STomohiro Kusumi ++info->count_inodes_scanned;
205*a63188c8STomohiro Kusumi
206*a63188c8STomohiro Kusumi error |= func(info, &bref);
207*a63188c8STomohiro Kusumi if (error & ~HAMMER2_ERROR_CHECK)
208*a63188c8STomohiro Kusumi break;
209*a63188c8STomohiro Kusumi
210*a63188c8STomohiro Kusumi /*
211*a63188c8STomohiro Kusumi * A non-null chain is always returned if it is
212*a63188c8STomohiro Kusumi * recursive, otherwise a non-null chain might be
213*a63188c8STomohiro Kusumi * returned but usually is not when not recursive.
214*a63188c8STomohiro Kusumi */
215*a63188c8STomohiro Kusumi if (chain == NULL)
216*a63188c8STomohiro Kusumi continue;
217*a63188c8STomohiro Kusumi
218*a63188c8STomohiro Kusumi info->count_bytes_scanned += chain->bytes;
219*a63188c8STomohiro Kusumi ++info->count_chains_scanned;
220*a63188c8STomohiro Kusumi
221*a63188c8STomohiro Kusumi if (info->count_chains_scanned >=
222*a63188c8STomohiro Kusumi info->count_chains_reported + 1000000 ||
223*a63188c8STomohiro Kusumi (info->count_chains_scanned < 1000000 &&
224*a63188c8STomohiro Kusumi info->count_chains_scanned >=
225*a63188c8STomohiro Kusumi info->count_chains_reported + 100000)) {
226*a63188c8STomohiro Kusumi kprintf(" chains %-7ld inodes %-7ld "
227*a63188c8STomohiro Kusumi "dirents %-7ld bytes %5ldMB\n",
228*a63188c8STomohiro Kusumi info->count_chains_scanned,
229*a63188c8STomohiro Kusumi info->count_inodes_scanned,
230*a63188c8STomohiro Kusumi info->count_dirents_scanned,
231*a63188c8STomohiro Kusumi info->count_bytes_scanned / 1000000);
232*a63188c8STomohiro Kusumi info->count_chains_reported =
233*a63188c8STomohiro Kusumi info->count_chains_scanned;
234*a63188c8STomohiro Kusumi }
235*a63188c8STomohiro Kusumi
236*a63188c8STomohiro Kusumi /*
237*a63188c8STomohiro Kusumi * Else check type and setup depth-first scan.
238*a63188c8STomohiro Kusumi *
239*a63188c8STomohiro Kusumi * Account for bytes actually read.
240*a63188c8STomohiro Kusumi */
241*a63188c8STomohiro Kusumi switch(chain->bref.type) {
242*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_INODE:
243*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_FREEMAP_NODE:
244*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_INDIRECT:
245*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_VOLUME:
246*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_FREEMAP:
247*a63188c8STomohiro Kusumi ++info->depth;
248*a63188c8STomohiro Kusumi if (chain->error & HAMMER2_ERROR_CHECK) {
249*a63188c8STomohiro Kusumi /*
250*a63188c8STomohiro Kusumi * Cannot safely recurse chains with crc
251*a63188c8STomohiro Kusumi * errors, even in emergency mode.
252*a63188c8STomohiro Kusumi */
253*a63188c8STomohiro Kusumi /* NOP */
254*a63188c8STomohiro Kusumi } else if (info->depth > 16 ||
255*a63188c8STomohiro Kusumi info->backout ||
256*a63188c8STomohiro Kusumi (info->depth > hammer2_limit_saved_depth &&
257*a63188c8STomohiro Kusumi info->list_count >=
258*a63188c8STomohiro Kusumi (hammer2_limit_saved_chains >> 2)))
259*a63188c8STomohiro Kusumi {
260*a63188c8STomohiro Kusumi /*
261*a63188c8STomohiro Kusumi * We must defer the recursion if it runs
262*a63188c8STomohiro Kusumi * too deep or if too many saved chains are
263*a63188c8STomohiro Kusumi * allocated.
264*a63188c8STomohiro Kusumi *
265*a63188c8STomohiro Kusumi * In the case of too many saved chains, we
266*a63188c8STomohiro Kusumi * have to stop recursing ASAP to avoid an
267*a63188c8STomohiro Kusumi * explosion of memory use since each radix
268*a63188c8STomohiro Kusumi * level can hold 512 elements.
269*a63188c8STomohiro Kusumi *
270*a63188c8STomohiro Kusumi * If we had to defer at a deeper level
271*a63188c8STomohiro Kusumi * backout is non-NULL. We must backout
272*a63188c8STomohiro Kusumi * completely before resuming.
273*a63188c8STomohiro Kusumi */
274*a63188c8STomohiro Kusumi if (info->list_count >
275*a63188c8STomohiro Kusumi hammer2_limit_saved_chains &&
276*a63188c8STomohiro Kusumi info->list_alert == 0)
277*a63188c8STomohiro Kusumi {
278*a63188c8STomohiro Kusumi kprintf("hammer2: during bulkfree, "
279*a63188c8STomohiro Kusumi "saved chains exceeded %ld "
280*a63188c8STomohiro Kusumi "at depth %d, "
281*a63188c8STomohiro Kusumi "backing off to less-efficient "
282*a63188c8STomohiro Kusumi "operation\n",
283*a63188c8STomohiro Kusumi hammer2_limit_saved_chains,
284*a63188c8STomohiro Kusumi info->depth);
285*a63188c8STomohiro Kusumi info->list_alert = 1;
286*a63188c8STomohiro Kusumi }
287*a63188c8STomohiro Kusumi
288*a63188c8STomohiro Kusumi /*
289*a63188c8STomohiro Kusumi * Must be placed at head so pfsroot scan
290*a63188c8STomohiro Kusumi * can exhaust saved elements for that pfs
291*a63188c8STomohiro Kusumi * first.
292*a63188c8STomohiro Kusumi *
293*a63188c8STomohiro Kusumi * Must be placed at head for depth-first
294*a63188c8STomohiro Kusumi * recovery when too many saved chains, to
295*a63188c8STomohiro Kusumi * limit number of chains saved during
296*a63188c8STomohiro Kusumi * saved-chain reruns. The worst-case excess
297*a63188c8STomohiro Kusumi * is (maximum_depth * 512) saved chains above
298*a63188c8STomohiro Kusumi * the threshold.
299*a63188c8STomohiro Kusumi *
300*a63188c8STomohiro Kusumi * The maximum_depth generally occurs in the
301*a63188c8STomohiro Kusumi * inode index and can be fairly deep once
302*a63188c8STomohiro Kusumi * the radix tree becomes a bit fragmented.
303*a63188c8STomohiro Kusumi * nominally 100M inodes would be only 4 deep,
304*a63188c8STomohiro Kusumi * plus a maximally sized file would be another
305*a63188c8STomohiro Kusumi * 8 deep, but with fragmentation it can wind
306*a63188c8STomohiro Kusumi * up being a lot more.
307*a63188c8STomohiro Kusumi *
308*a63188c8STomohiro Kusumi * However, when backing out, we have to place
309*a63188c8STomohiro Kusumi * all the entries in each parent node not
310*a63188c8STomohiro Kusumi * yet processed on the list too, and because
311*a63188c8STomohiro Kusumi * these entries are shallower they must be
312*a63188c8STomohiro Kusumi * placed after each other in order to maintain
313*a63188c8STomohiro Kusumi * our depth-first processing.
314*a63188c8STomohiro Kusumi */
315*a63188c8STomohiro Kusumi save = kmalloc(sizeof(*save), M_HAMMER2,
316*a63188c8STomohiro Kusumi M_WAITOK | M_ZERO);
317*a63188c8STomohiro Kusumi save->chain = chain;
318*a63188c8STomohiro Kusumi hammer2_chain_ref(chain);
319*a63188c8STomohiro Kusumi
320*a63188c8STomohiro Kusumi if (info->backout) {
321*a63188c8STomohiro Kusumi TAILQ_INSERT_AFTER(&info->list,
322*a63188c8STomohiro Kusumi info->backout,
323*a63188c8STomohiro Kusumi save, entry);
324*a63188c8STomohiro Kusumi } else {
325*a63188c8STomohiro Kusumi TAILQ_INSERT_HEAD(&info->list,
326*a63188c8STomohiro Kusumi save, entry);
327*a63188c8STomohiro Kusumi }
328*a63188c8STomohiro Kusumi info->backout = save;
329*a63188c8STomohiro Kusumi ++info->list_count;
330*a63188c8STomohiro Kusumi if (info->list_count_max < info->list_count)
331*a63188c8STomohiro Kusumi info->list_count_max = info->list_count;
332*a63188c8STomohiro Kusumi
333*a63188c8STomohiro Kusumi /* guess */
334*a63188c8STomohiro Kusumi info->pri += 10;
335*a63188c8STomohiro Kusumi } else {
336*a63188c8STomohiro Kusumi int savepri = info->pri;
337*a63188c8STomohiro Kusumi
338*a63188c8STomohiro Kusumi hammer2_chain_unlock(chain);
339*a63188c8STomohiro Kusumi hammer2_chain_unlock(parent);
340*a63188c8STomohiro Kusumi info->pri = 0;
341*a63188c8STomohiro Kusumi rup_error |= hammer2_bulkfree_scan(chain,
342*a63188c8STomohiro Kusumi func, info);
343*a63188c8STomohiro Kusumi info->pri += savepri;
344*a63188c8STomohiro Kusumi hammer2_chain_lock(parent,
345*a63188c8STomohiro Kusumi HAMMER2_RESOLVE_ALWAYS |
346*a63188c8STomohiro Kusumi HAMMER2_RESOLVE_SHARED);
347*a63188c8STomohiro Kusumi hammer2_chain_lock(chain,
348*a63188c8STomohiro Kusumi HAMMER2_RESOLVE_ALWAYS |
349*a63188c8STomohiro Kusumi HAMMER2_RESOLVE_SHARED);
350*a63188c8STomohiro Kusumi }
351*a63188c8STomohiro Kusumi --info->depth;
352*a63188c8STomohiro Kusumi break;
353*a63188c8STomohiro Kusumi case HAMMER2_BREF_TYPE_DATA:
354*a63188c8STomohiro Kusumi break;
355*a63188c8STomohiro Kusumi default:
356*a63188c8STomohiro Kusumi /* does not recurse */
357*a63188c8STomohiro Kusumi break;
358*a63188c8STomohiro Kusumi }
359*a63188c8STomohiro Kusumi if (rup_error & HAMMER2_ERROR_ABORTED)
360*a63188c8STomohiro Kusumi break;
361*a63188c8STomohiro Kusumi }
362*a63188c8STomohiro Kusumi if (chain) {
363*a63188c8STomohiro Kusumi hammer2_chain_unlock(chain);
364*a63188c8STomohiro Kusumi hammer2_chain_drop(chain);
365*a63188c8STomohiro Kusumi }
366*a63188c8STomohiro Kusumi
367*a63188c8STomohiro Kusumi /*
368*a63188c8STomohiro Kusumi * If this is a PFSROOT, also re-run any defered elements
369*a63188c8STomohiro Kusumi * added during our scan so we can report any cumulative errors
370*a63188c8STomohiro Kusumi * for the PFS.
371*a63188c8STomohiro Kusumi */
372*a63188c8STomohiro Kusumi if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
373*a63188c8STomohiro Kusumi (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT)) {
374*a63188c8STomohiro Kusumi for (;;) {
375*a63188c8STomohiro Kusumi int opri;
376*a63188c8STomohiro Kusumi
377*a63188c8STomohiro Kusumi save = TAILQ_FIRST(&info->list);
378*a63188c8STomohiro Kusumi if (save == tail) /* exhaust this PFS only */
379*a63188c8STomohiro Kusumi break;
380*a63188c8STomohiro Kusumi
381*a63188c8STomohiro Kusumi TAILQ_REMOVE(&info->list, save, entry);
382*a63188c8STomohiro Kusumi info->backout = NULL;
383*a63188c8STomohiro Kusumi --info->list_count;
384*a63188c8STomohiro Kusumi opri = info->pri;
385*a63188c8STomohiro Kusumi info->pri = 0;
386*a63188c8STomohiro Kusumi rup_error |= hammer2_bulkfree_scan(save->chain, func, info);
387*a63188c8STomohiro Kusumi hammer2_chain_drop(save->chain);
388*a63188c8STomohiro Kusumi kfree(save, M_HAMMER2);
389*a63188c8STomohiro Kusumi info->pri = opri;
390*a63188c8STomohiro Kusumi }
391*a63188c8STomohiro Kusumi }
392*a63188c8STomohiro Kusumi
393*a63188c8STomohiro Kusumi error |= rup_error;
394*a63188c8STomohiro Kusumi
395*a63188c8STomohiro Kusumi /*
396*a63188c8STomohiro Kusumi * Report which PFS the errors were encountered in.
397*a63188c8STomohiro Kusumi */
398*a63188c8STomohiro Kusumi if (parent->bref.type == HAMMER2_BREF_TYPE_INODE &&
399*a63188c8STomohiro Kusumi (parent->bref.flags & HAMMER2_BREF_FLAG_PFSROOT) &&
400*a63188c8STomohiro Kusumi (error & ~HAMMER2_ERROR_EOF)) {
401*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: Encountered errors (%08x) "
402*a63188c8STomohiro Kusumi "while scanning \"%s\"\n",
403*a63188c8STomohiro Kusumi error, parent->data->ipdata.filename);
404*a63188c8STomohiro Kusumi }
405*a63188c8STomohiro Kusumi
406*a63188c8STomohiro Kusumi /*
407*a63188c8STomohiro Kusumi * Save with higher pri now that we know what it is.
408*a63188c8STomohiro Kusumi */
409*a63188c8STomohiro Kusumi h2_bulkfree_test(info, &parent->bref, info->pri + 1,
410*a63188c8STomohiro Kusumi (error & ~HAMMER2_ERROR_EOF));
411*a63188c8STomohiro Kusumi
412*a63188c8STomohiro Kusumi done:
413*a63188c8STomohiro Kusumi hammer2_chain_unlock(parent);
414*a63188c8STomohiro Kusumi
415*a63188c8STomohiro Kusumi return (error & ~HAMMER2_ERROR_EOF);
416*a63188c8STomohiro Kusumi }
417*a63188c8STomohiro Kusumi
418*a63188c8STomohiro Kusumi /*
419*a63188c8STomohiro Kusumi * Bulkfree algorithm
420*a63188c8STomohiro Kusumi *
421*a63188c8STomohiro Kusumi * Repeat {
422*a63188c8STomohiro Kusumi * Chain flush (partial synchronization) XXX removed
423*a63188c8STomohiro Kusumi * Scan the whole topology - build in-memory freemap (mark 11)
424*a63188c8STomohiro Kusumi * Reconcile the in-memory freemap against the on-disk freemap.
425*a63188c8STomohiro Kusumi * ondisk xx -> ondisk 11 (if allocated)
426*a63188c8STomohiro Kusumi * ondisk 11 -> ondisk 10 (if free in-memory)
427*a63188c8STomohiro Kusumi * ondisk 10 -> ondisk 00 (if free in-memory) - on next pass
428*a63188c8STomohiro Kusumi * }
429*a63188c8STomohiro Kusumi *
430*a63188c8STomohiro Kusumi * The topology scan may have to be performed multiple times to window
431*a63188c8STomohiro Kusumi * freemaps which are too large to fit in kernel memory.
432*a63188c8STomohiro Kusumi *
433*a63188c8STomohiro Kusumi * Races are handled using a double-transition (11->10, 10->00). The bulkfree
434*a63188c8STomohiro Kusumi * scan snapshots the volume root's blockset and thus can run concurrent with
435*a63188c8STomohiro Kusumi * normal operations, as long as a full flush is made between each pass to
436*a63188c8STomohiro Kusumi * synchronize any modified chains (otherwise their blocks might be improperly
437*a63188c8STomohiro Kusumi * freed).
438*a63188c8STomohiro Kusumi *
439*a63188c8STomohiro Kusumi * Temporary memory in multiples of 32KB is required to reconstruct the leaf
440*a63188c8STomohiro Kusumi * hammer2_bmap_data blocks so they can later be compared against the live
441*a63188c8STomohiro Kusumi * freemap. Each 32KB represents 256 x 16KB x 256 = ~1 GB of storage.
442*a63188c8STomohiro Kusumi * A 32MB save area thus represents around ~1 TB. The temporary memory
443*a63188c8STomohiro Kusumi * allocated can be specified. If it is not sufficient multiple topology
444*a63188c8STomohiro Kusumi * passes will be made.
445*a63188c8STomohiro Kusumi */
446*a63188c8STomohiro Kusumi
447*a63188c8STomohiro Kusumi /*
448*a63188c8STomohiro Kusumi * Bulkfree callback info
449*a63188c8STomohiro Kusumi */
450*a63188c8STomohiro Kusumi //static void hammer2_bulkfree_thread(void *arg __unused);
451*a63188c8STomohiro Kusumi static void cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size);
452*a63188c8STomohiro Kusumi static int h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo,
453*a63188c8STomohiro Kusumi hammer2_blockref_t *bref);
454*a63188c8STomohiro Kusumi static int h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo);
455*a63188c8STomohiro Kusumi static void h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
456*a63188c8STomohiro Kusumi hammer2_off_t data_off, hammer2_bmap_data_t *live,
457*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base);
458*a63188c8STomohiro Kusumi
459*a63188c8STomohiro Kusumi void
hammer2_bulkfree_init(hammer2_dev_t * hmp)460*a63188c8STomohiro Kusumi hammer2_bulkfree_init(hammer2_dev_t *hmp)
461*a63188c8STomohiro Kusumi {
462*a63188c8STomohiro Kusumi /*
463*a63188c8STomohiro Kusumi hammer2_thr_create(&hmp->bfthr, NULL, hmp,
464*a63188c8STomohiro Kusumi hmp->devrepname, -1, -1,
465*a63188c8STomohiro Kusumi hammer2_bulkfree_thread);
466*a63188c8STomohiro Kusumi */
467*a63188c8STomohiro Kusumi }
468*a63188c8STomohiro Kusumi
469*a63188c8STomohiro Kusumi void
hammer2_bulkfree_uninit(hammer2_dev_t * hmp)470*a63188c8STomohiro Kusumi hammer2_bulkfree_uninit(hammer2_dev_t *hmp)
471*a63188c8STomohiro Kusumi {
472*a63188c8STomohiro Kusumi //hammer2_thr_delete(&hmp->bfthr);
473*a63188c8STomohiro Kusumi }
474*a63188c8STomohiro Kusumi
475*a63188c8STomohiro Kusumi #if 0
476*a63188c8STomohiro Kusumi static void
477*a63188c8STomohiro Kusumi hammer2_bulkfree_thread(void *arg)
478*a63188c8STomohiro Kusumi {
479*a63188c8STomohiro Kusumi hammer2_thread_t *thr = arg;
480*a63188c8STomohiro Kusumi hammer2_ioc_bulkfree_t bfi;
481*a63188c8STomohiro Kusumi uint32_t flags;
482*a63188c8STomohiro Kusumi
483*a63188c8STomohiro Kusumi for (;;) {
484*a63188c8STomohiro Kusumi hammer2_thr_wait_any(thr,
485*a63188c8STomohiro Kusumi HAMMER2_THREAD_STOP |
486*a63188c8STomohiro Kusumi HAMMER2_THREAD_FREEZE |
487*a63188c8STomohiro Kusumi HAMMER2_THREAD_UNFREEZE |
488*a63188c8STomohiro Kusumi HAMMER2_THREAD_REMASTER,
489*a63188c8STomohiro Kusumi hz * 60);
490*a63188c8STomohiro Kusumi
491*a63188c8STomohiro Kusumi flags = thr->flags;
492*a63188c8STomohiro Kusumi cpu_ccfence();
493*a63188c8STomohiro Kusumi if (flags & HAMMER2_THREAD_STOP)
494*a63188c8STomohiro Kusumi break;
495*a63188c8STomohiro Kusumi if (flags & HAMMER2_THREAD_FREEZE) {
496*a63188c8STomohiro Kusumi hammer2_thr_signal2(thr, HAMMER2_THREAD_FROZEN,
497*a63188c8STomohiro Kusumi HAMMER2_THREAD_FREEZE);
498*a63188c8STomohiro Kusumi continue;
499*a63188c8STomohiro Kusumi }
500*a63188c8STomohiro Kusumi if (flags & HAMMER2_THREAD_UNFREEZE) {
501*a63188c8STomohiro Kusumi hammer2_thr_signal2(thr, 0,
502*a63188c8STomohiro Kusumi HAMMER2_THREAD_FROZEN |
503*a63188c8STomohiro Kusumi HAMMER2_THREAD_UNFREEZE);
504*a63188c8STomohiro Kusumi continue;
505*a63188c8STomohiro Kusumi }
506*a63188c8STomohiro Kusumi if (flags & HAMMER2_THREAD_FROZEN)
507*a63188c8STomohiro Kusumi continue;
508*a63188c8STomohiro Kusumi if (flags & HAMMER2_THREAD_REMASTER) {
509*a63188c8STomohiro Kusumi hammer2_thr_signal2(thr, 0, HAMMER2_THREAD_REMASTER);
510*a63188c8STomohiro Kusumi bzero(&bfi, sizeof(bfi));
511*a63188c8STomohiro Kusumi bfi.size = 8192 * 1024;
512*a63188c8STomohiro Kusumi /* hammer2_bulkfree_pass(thr->hmp, &bfi); */
513*a63188c8STomohiro Kusumi }
514*a63188c8STomohiro Kusumi }
515*a63188c8STomohiro Kusumi thr->td = NULL;
516*a63188c8STomohiro Kusumi hammer2_thr_signal(thr, HAMMER2_THREAD_STOPPED);
517*a63188c8STomohiro Kusumi /* structure can go invalid at this point */
518*a63188c8STomohiro Kusumi }
519*a63188c8STomohiro Kusumi #endif
520*a63188c8STomohiro Kusumi
521*a63188c8STomohiro Kusumi int
hammer2_bulkfree_pass(hammer2_dev_t * hmp,hammer2_chain_t * vchain,hammer2_ioc_bulkfree_t * bfi)522*a63188c8STomohiro Kusumi hammer2_bulkfree_pass(hammer2_dev_t *hmp, hammer2_chain_t *vchain,
523*a63188c8STomohiro Kusumi hammer2_ioc_bulkfree_t *bfi)
524*a63188c8STomohiro Kusumi {
525*a63188c8STomohiro Kusumi hammer2_bulkfree_info_t cbinfo;
526*a63188c8STomohiro Kusumi hammer2_chain_save_t *save;
527*a63188c8STomohiro Kusumi hammer2_off_t incr;
528*a63188c8STomohiro Kusumi size_t size;
529*a63188c8STomohiro Kusumi int error;
530*a63188c8STomohiro Kusumi
531*a63188c8STomohiro Kusumi /*
532*a63188c8STomohiro Kusumi * We have to clear the live dedup cache as it might have entries
533*a63188c8STomohiro Kusumi * that are freeable as of now. Any new entries in the dedup cache
534*a63188c8STomohiro Kusumi * made after this point, even if they become freeable, will have
535*a63188c8STomohiro Kusumi * previously been fully allocated and will be protected by the
536*a63188c8STomohiro Kusumi * 2-stage bulkfree.
537*a63188c8STomohiro Kusumi */
538*a63188c8STomohiro Kusumi hammer2_dedup_clear(hmp);
539*a63188c8STomohiro Kusumi
540*a63188c8STomohiro Kusumi /*
541*a63188c8STomohiro Kusumi * Setup for free pass using the buffer size specified by the
542*a63188c8STomohiro Kusumi * hammer2 utility, 32K-aligned.
543*a63188c8STomohiro Kusumi */
544*a63188c8STomohiro Kusumi bzero(&cbinfo, sizeof(cbinfo));
545*a63188c8STomohiro Kusumi size = (bfi->size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
546*a63188c8STomohiro Kusumi ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
547*a63188c8STomohiro Kusumi
548*a63188c8STomohiro Kusumi /*
549*a63188c8STomohiro Kusumi * Cap at 1/4 physical memory (hammer2 utility will not normally
550*a63188c8STomohiro Kusumi * ever specify a buffer this big, but leave the option available).
551*a63188c8STomohiro Kusumi */
552*a63188c8STomohiro Kusumi /*
553*a63188c8STomohiro Kusumi if (size > kmem_lim_size() * 1024 * 1024 / 4) {
554*a63188c8STomohiro Kusumi size = kmem_lim_size() * 1024 * 1024 / 4;
555*a63188c8STomohiro Kusumi kprintf("hammer2: Warning: capping bulkfree buffer at %jdM\n",
556*a63188c8STomohiro Kusumi (intmax_t)size / (1024 * 1024));
557*a63188c8STomohiro Kusumi }
558*a63188c8STomohiro Kusumi */
559*a63188c8STomohiro Kusumi
560*a63188c8STomohiro Kusumi #define HAMMER2_FREEMAP_SIZEDIV \
561*a63188c8STomohiro Kusumi (HAMMER2_FREEMAP_LEVEL1_SIZE / HAMMER2_FREEMAP_LEVELN_PSIZE)
562*a63188c8STomohiro Kusumi
563*a63188c8STomohiro Kusumi /*
564*a63188c8STomohiro Kusumi * Cap at the size needed to cover the whole volume to avoid
565*a63188c8STomohiro Kusumi * making an unnecessarily large allocation.
566*a63188c8STomohiro Kusumi */
567*a63188c8STomohiro Kusumi if (size > hmp->total_size / HAMMER2_FREEMAP_SIZEDIV)
568*a63188c8STomohiro Kusumi size = howmany(hmp->total_size, HAMMER2_FREEMAP_SIZEDIV);
569*a63188c8STomohiro Kusumi
570*a63188c8STomohiro Kusumi /*
571*a63188c8STomohiro Kusumi * Minimum bitmap buffer size, then align to a LEVELN_PSIZE (32K)
572*a63188c8STomohiro Kusumi * boundary.
573*a63188c8STomohiro Kusumi */
574*a63188c8STomohiro Kusumi if (size < 1024 * 1024)
575*a63188c8STomohiro Kusumi size = 1024 * 1024;
576*a63188c8STomohiro Kusumi size = (size + HAMMER2_FREEMAP_LEVELN_PSIZE - 1) &
577*a63188c8STomohiro Kusumi ~(size_t)(HAMMER2_FREEMAP_LEVELN_PSIZE - 1);
578*a63188c8STomohiro Kusumi
579*a63188c8STomohiro Kusumi cbinfo.hmp = hmp;
580*a63188c8STomohiro Kusumi cbinfo.bmap = kmalloc(size, M_HAMMER2, M_WAITOK | M_ZERO);
581*a63188c8STomohiro Kusumi cbinfo.dedup = kmalloc(sizeof(*cbinfo.dedup) * HAMMER2_DEDUP_HEUR_SIZE,
582*a63188c8STomohiro Kusumi M_HAMMER2, M_WAITOK | M_ZERO);
583*a63188c8STomohiro Kusumi
584*a63188c8STomohiro Kusumi kprintf("hammer2: bulkfree buf=%jdM\n",
585*a63188c8STomohiro Kusumi (intmax_t)size / (1024 * 1024));
586*a63188c8STomohiro Kusumi
587*a63188c8STomohiro Kusumi /*
588*a63188c8STomohiro Kusumi * Normalize start point to a 1GB boundary. We operate on a
589*a63188c8STomohiro Kusumi * 32KB leaf bitmap boundary which represents 1GB of storage.
590*a63188c8STomohiro Kusumi */
591*a63188c8STomohiro Kusumi cbinfo.sbase = bfi->sbase;
592*a63188c8STomohiro Kusumi if (cbinfo.sbase > hmp->total_size)
593*a63188c8STomohiro Kusumi cbinfo.sbase = hmp->total_size;
594*a63188c8STomohiro Kusumi cbinfo.sbase &= ~HAMMER2_FREEMAP_LEVEL1_MASK;
595*a63188c8STomohiro Kusumi TAILQ_INIT(&cbinfo.list);
596*a63188c8STomohiro Kusumi
597*a63188c8STomohiro Kusumi cbinfo.bulkfree_ticks = ticks;
598*a63188c8STomohiro Kusumi
599*a63188c8STomohiro Kusumi /*
600*a63188c8STomohiro Kusumi * Loop on a full meta-data scan as many times as required to
601*a63188c8STomohiro Kusumi * get through all available storage.
602*a63188c8STomohiro Kusumi */
603*a63188c8STomohiro Kusumi error = 0;
604*a63188c8STomohiro Kusumi while (cbinfo.sbase < hmp->total_size) {
605*a63188c8STomohiro Kusumi /*
606*a63188c8STomohiro Kusumi * We have enough ram to represent (incr) bytes of storage.
607*a63188c8STomohiro Kusumi * Each 32KB of ram represents 1GB of storage.
608*a63188c8STomohiro Kusumi *
609*a63188c8STomohiro Kusumi * We must also clean out our de-duplication heuristic for
610*a63188c8STomohiro Kusumi * each (incr) bytes of storage, otherwise we wind up not
611*a63188c8STomohiro Kusumi * scanning meta-data for later areas of storage because
612*a63188c8STomohiro Kusumi * they had already been scanned in earlier areas of storage.
613*a63188c8STomohiro Kusumi * Since the ranging is different, we have to restart
614*a63188c8STomohiro Kusumi * the dedup heuristic too.
615*a63188c8STomohiro Kusumi */
616*a63188c8STomohiro Kusumi int allmedia;
617*a63188c8STomohiro Kusumi
618*a63188c8STomohiro Kusumi cbinfo_bmap_init(&cbinfo, size);
619*a63188c8STomohiro Kusumi bzero(cbinfo.dedup, sizeof(*cbinfo.dedup) *
620*a63188c8STomohiro Kusumi HAMMER2_DEDUP_HEUR_SIZE);
621*a63188c8STomohiro Kusumi cbinfo.count_inodes_scanned = 0;
622*a63188c8STomohiro Kusumi cbinfo.count_dirents_scanned = 0;
623*a63188c8STomohiro Kusumi cbinfo.count_bytes_scanned = 0;
624*a63188c8STomohiro Kusumi cbinfo.count_chains_scanned = 0;
625*a63188c8STomohiro Kusumi cbinfo.count_chains_reported = 0;
626*a63188c8STomohiro Kusumi
627*a63188c8STomohiro Kusumi incr = size / HAMMER2_FREEMAP_LEVELN_PSIZE *
628*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL1_SIZE;
629*a63188c8STomohiro Kusumi if (hmp->total_size - cbinfo.sbase <= incr) {
630*a63188c8STomohiro Kusumi cbinfo.sstop = hmp->total_size;
631*a63188c8STomohiro Kusumi allmedia = 1;
632*a63188c8STomohiro Kusumi } else {
633*a63188c8STomohiro Kusumi cbinfo.sstop = cbinfo.sbase + incr;
634*a63188c8STomohiro Kusumi allmedia = 0;
635*a63188c8STomohiro Kusumi }
636*a63188c8STomohiro Kusumi kprintf("hammer2: pass %016jx-%016jx ",
637*a63188c8STomohiro Kusumi (intmax_t)cbinfo.sbase,
638*a63188c8STomohiro Kusumi (intmax_t)cbinfo.sstop);
639*a63188c8STomohiro Kusumi if (allmedia && cbinfo.sbase == 0)
640*a63188c8STomohiro Kusumi kprintf("(all media)\n");
641*a63188c8STomohiro Kusumi else if (allmedia)
642*a63188c8STomohiro Kusumi kprintf("(remaining media)\n");
643*a63188c8STomohiro Kusumi else
644*a63188c8STomohiro Kusumi kprintf("(%jdGB of media)\n",
645*a63188c8STomohiro Kusumi (intmax_t)incr / (1024L*1024*1024));
646*a63188c8STomohiro Kusumi
647*a63188c8STomohiro Kusumi /*
648*a63188c8STomohiro Kusumi * Scan topology for stuff inside this range.
649*a63188c8STomohiro Kusumi *
650*a63188c8STomohiro Kusumi * NOTE - By not using a transaction the operation can
651*a63188c8STomohiro Kusumi * run concurrent with the frontend as well as
652*a63188c8STomohiro Kusumi * with flushes.
653*a63188c8STomohiro Kusumi *
654*a63188c8STomohiro Kusumi * We cannot safely set a mtid without a transaction,
655*a63188c8STomohiro Kusumi * and in fact we don't want to set one anyway. We
656*a63188c8STomohiro Kusumi * want the bulkfree to be passive and no interfere
657*a63188c8STomohiro Kusumi * with crash recovery.
658*a63188c8STomohiro Kusumi */
659*a63188c8STomohiro Kusumi #undef HAMMER2_BULKFREE_TRANS /* undef - don't use transaction */
660*a63188c8STomohiro Kusumi #ifdef HAMMER2_BULKFREE_TRANS
661*a63188c8STomohiro Kusumi hammer2_trans_init(hmp->spmp, 0);
662*a63188c8STomohiro Kusumi cbinfo.mtid = hammer2_trans_sub(hmp->spmp);
663*a63188c8STomohiro Kusumi #else
664*a63188c8STomohiro Kusumi cbinfo.mtid = 0;
665*a63188c8STomohiro Kusumi #endif
666*a63188c8STomohiro Kusumi cbinfo.pri = 0;
667*a63188c8STomohiro Kusumi error |= hammer2_bulkfree_scan(vchain,
668*a63188c8STomohiro Kusumi h2_bulkfree_callback, &cbinfo);
669*a63188c8STomohiro Kusumi
670*a63188c8STomohiro Kusumi while ((save = TAILQ_FIRST(&cbinfo.list)) != NULL &&
671*a63188c8STomohiro Kusumi (error & ~HAMMER2_ERROR_CHECK) == 0) {
672*a63188c8STomohiro Kusumi TAILQ_REMOVE(&cbinfo.list, save, entry);
673*a63188c8STomohiro Kusumi --cbinfo.list_count;
674*a63188c8STomohiro Kusumi cbinfo.pri = 0;
675*a63188c8STomohiro Kusumi cbinfo.backout = NULL;
676*a63188c8STomohiro Kusumi error |= hammer2_bulkfree_scan(save->chain,
677*a63188c8STomohiro Kusumi h2_bulkfree_callback,
678*a63188c8STomohiro Kusumi &cbinfo);
679*a63188c8STomohiro Kusumi hammer2_chain_drop(save->chain);
680*a63188c8STomohiro Kusumi kfree(save, M_HAMMER2);
681*a63188c8STomohiro Kusumi }
682*a63188c8STomohiro Kusumi while (save) {
683*a63188c8STomohiro Kusumi TAILQ_REMOVE(&cbinfo.list, save, entry);
684*a63188c8STomohiro Kusumi --cbinfo.list_count;
685*a63188c8STomohiro Kusumi hammer2_chain_drop(save->chain);
686*a63188c8STomohiro Kusumi kfree(save, M_HAMMER2);
687*a63188c8STomohiro Kusumi save = TAILQ_FIRST(&cbinfo.list);
688*a63188c8STomohiro Kusumi }
689*a63188c8STomohiro Kusumi cbinfo.backout = NULL;
690*a63188c8STomohiro Kusumi
691*a63188c8STomohiro Kusumi /*
692*a63188c8STomohiro Kusumi * If the complete scan succeeded we can synchronize our
693*a63188c8STomohiro Kusumi * in-memory freemap against live storage. If an abort
694*a63188c8STomohiro Kusumi * occured we cannot safely synchronize our partially
695*a63188c8STomohiro Kusumi * filled-out in-memory freemap.
696*a63188c8STomohiro Kusumi *
697*a63188c8STomohiro Kusumi * We still synchronize on CHECK failures. That is, we still
698*a63188c8STomohiro Kusumi * want bulkfree to operate even if the filesystem has defects.
699*a63188c8STomohiro Kusumi */
700*a63188c8STomohiro Kusumi if (error & ~HAMMER2_ERROR_CHECK) {
701*a63188c8STomohiro Kusumi kprintf("bulkfree lastdrop %d %d error=0x%04x\n",
702*a63188c8STomohiro Kusumi vchain->refs, vchain->core.chain_count, error);
703*a63188c8STomohiro Kusumi } else {
704*a63188c8STomohiro Kusumi if (error & HAMMER2_ERROR_CHECK) {
705*a63188c8STomohiro Kusumi kprintf("bulkfree lastdrop %d %d "
706*a63188c8STomohiro Kusumi "(with check errors)\n",
707*a63188c8STomohiro Kusumi vchain->refs, vchain->core.chain_count);
708*a63188c8STomohiro Kusumi } else {
709*a63188c8STomohiro Kusumi kprintf("bulkfree lastdrop %d %d\n",
710*a63188c8STomohiro Kusumi vchain->refs, vchain->core.chain_count);
711*a63188c8STomohiro Kusumi }
712*a63188c8STomohiro Kusumi
713*a63188c8STomohiro Kusumi error = h2_bulkfree_sync(&cbinfo);
714*a63188c8STomohiro Kusumi
715*a63188c8STomohiro Kusumi hammer2_voldata_lock(hmp);
716*a63188c8STomohiro Kusumi hammer2_voldata_modify(hmp);
717*a63188c8STomohiro Kusumi hmp->voldata.allocator_free += cbinfo.adj_free;
718*a63188c8STomohiro Kusumi hammer2_voldata_unlock(hmp);
719*a63188c8STomohiro Kusumi }
720*a63188c8STomohiro Kusumi
721*a63188c8STomohiro Kusumi /*
722*a63188c8STomohiro Kusumi * Cleanup for next loop.
723*a63188c8STomohiro Kusumi */
724*a63188c8STomohiro Kusumi #ifdef HAMMER2_BULKFREE_TRANS
725*a63188c8STomohiro Kusumi hammer2_trans_done(hmp->spmp, 0);
726*a63188c8STomohiro Kusumi #endif
727*a63188c8STomohiro Kusumi if (error & ~HAMMER2_ERROR_CHECK)
728*a63188c8STomohiro Kusumi break;
729*a63188c8STomohiro Kusumi cbinfo.sbase = cbinfo.sstop;
730*a63188c8STomohiro Kusumi cbinfo.adj_free = 0;
731*a63188c8STomohiro Kusumi }
732*a63188c8STomohiro Kusumi kfree(cbinfo.bmap, M_HAMMER2);
733*a63188c8STomohiro Kusumi kfree(cbinfo.dedup, M_HAMMER2);
734*a63188c8STomohiro Kusumi cbinfo.dedup = NULL;
735*a63188c8STomohiro Kusumi
736*a63188c8STomohiro Kusumi bfi->sstop = cbinfo.sbase;
737*a63188c8STomohiro Kusumi
738*a63188c8STomohiro Kusumi incr = bfi->sstop / (hmp->total_size / 10000);
739*a63188c8STomohiro Kusumi if (incr > 10000)
740*a63188c8STomohiro Kusumi incr = 10000;
741*a63188c8STomohiro Kusumi
742*a63188c8STomohiro Kusumi kprintf("bulkfree pass statistics (%d.%02d%% storage processed):\n",
743*a63188c8STomohiro Kusumi (int)incr / 100,
744*a63188c8STomohiro Kusumi (int)incr % 100);
745*a63188c8STomohiro Kusumi
746*a63188c8STomohiro Kusumi if (error & ~HAMMER2_ERROR_CHECK) {
747*a63188c8STomohiro Kusumi kprintf(" bulkfree was aborted\n");
748*a63188c8STomohiro Kusumi } else {
749*a63188c8STomohiro Kusumi if (error & HAMMER2_ERROR_CHECK) {
750*a63188c8STomohiro Kusumi kprintf(" WARNING: bulkfree "
751*a63188c8STomohiro Kusumi "encountered CRC errors\n");
752*a63188c8STomohiro Kusumi }
753*a63188c8STomohiro Kusumi kprintf(" transition->free %ld\n", cbinfo.count_10_00);
754*a63188c8STomohiro Kusumi kprintf(" transition->staged %ld\n", cbinfo.count_11_10);
755*a63188c8STomohiro Kusumi kprintf(" ERR(00)->allocated %ld\n", cbinfo.count_00_11);
756*a63188c8STomohiro Kusumi kprintf(" ERR(01)->allocated %ld\n", cbinfo.count_01_11);
757*a63188c8STomohiro Kusumi kprintf(" staged->allocated %ld\n", cbinfo.count_10_11);
758*a63188c8STomohiro Kusumi kprintf(" ~4MB segs cleaned %ld\n", cbinfo.count_l0cleans);
759*a63188c8STomohiro Kusumi kprintf(" linear adjusts %ld\n",
760*a63188c8STomohiro Kusumi cbinfo.count_linadjusts);
761*a63188c8STomohiro Kusumi kprintf(" dedup factor %ld\n",
762*a63188c8STomohiro Kusumi cbinfo.count_dedup_factor);
763*a63188c8STomohiro Kusumi kprintf(" max saved chains %ld\n", cbinfo.list_count_max);
764*a63188c8STomohiro Kusumi }
765*a63188c8STomohiro Kusumi
766*a63188c8STomohiro Kusumi return error;
767*a63188c8STomohiro Kusumi }
768*a63188c8STomohiro Kusumi
769*a63188c8STomohiro Kusumi static void
cbinfo_bmap_init(hammer2_bulkfree_info_t * cbinfo,size_t size)770*a63188c8STomohiro Kusumi cbinfo_bmap_init(hammer2_bulkfree_info_t *cbinfo, size_t size)
771*a63188c8STomohiro Kusumi {
772*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap = cbinfo->bmap;
773*a63188c8STomohiro Kusumi hammer2_key_t key = cbinfo->sbase;
774*a63188c8STomohiro Kusumi hammer2_key_t lokey;
775*a63188c8STomohiro Kusumi hammer2_key_t hikey;
776*a63188c8STomohiro Kusumi
777*a63188c8STomohiro Kusumi lokey = (cbinfo->hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
778*a63188c8STomohiro Kusumi ~HAMMER2_SEGMASK64;
779*a63188c8STomohiro Kusumi hikey = cbinfo->hmp->total_size & ~HAMMER2_SEGMASK64;
780*a63188c8STomohiro Kusumi
781*a63188c8STomohiro Kusumi bzero(bmap, size);
782*a63188c8STomohiro Kusumi while (size) {
783*a63188c8STomohiro Kusumi bzero(bmap, sizeof(*bmap));
784*a63188c8STomohiro Kusumi if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX))
785*a63188c8STomohiro Kusumi lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
786*a63188c8STomohiro Kusumi if (lokey < H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64)
787*a63188c8STomohiro Kusumi lokey = H2FMZONEBASE(key) + HAMMER2_ZONE_SEG64;
788*a63188c8STomohiro Kusumi if (key < lokey || key >= hikey) {
789*a63188c8STomohiro Kusumi memset(bmap->bitmapq, -1,
790*a63188c8STomohiro Kusumi sizeof(bmap->bitmapq));
791*a63188c8STomohiro Kusumi bmap->avail = 0;
792*a63188c8STomohiro Kusumi bmap->linear = HAMMER2_SEGSIZE;
793*a63188c8STomohiro Kusumi } else {
794*a63188c8STomohiro Kusumi bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
795*a63188c8STomohiro Kusumi }
796*a63188c8STomohiro Kusumi size -= sizeof(*bmap);
797*a63188c8STomohiro Kusumi key += HAMMER2_FREEMAP_LEVEL0_SIZE;
798*a63188c8STomohiro Kusumi ++bmap;
799*a63188c8STomohiro Kusumi }
800*a63188c8STomohiro Kusumi }
801*a63188c8STomohiro Kusumi
802*a63188c8STomohiro Kusumi static int
h2_bulkfree_callback(hammer2_bulkfree_info_t * cbinfo,hammer2_blockref_t * bref)803*a63188c8STomohiro Kusumi h2_bulkfree_callback(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref)
804*a63188c8STomohiro Kusumi {
805*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap;
806*a63188c8STomohiro Kusumi hammer2_off_t data_off;
807*a63188c8STomohiro Kusumi uint16_t class;
808*a63188c8STomohiro Kusumi size_t bytes;
809*a63188c8STomohiro Kusumi int radix;
810*a63188c8STomohiro Kusumi
811*a63188c8STomohiro Kusumi /*
812*a63188c8STomohiro Kusumi * Check for signal and allow yield to userland during scan.
813*a63188c8STomohiro Kusumi */
814*a63188c8STomohiro Kusumi if (hammer2_signal_check(&cbinfo->save_time))
815*a63188c8STomohiro Kusumi return HAMMER2_ERROR_ABORTED;
816*a63188c8STomohiro Kusumi
817*a63188c8STomohiro Kusumi /*
818*a63188c8STomohiro Kusumi * Deal with kernel thread cpu or I/O hogging by limiting the
819*a63188c8STomohiro Kusumi * number of chains scanned per second to hammer2_bulkfree_tps.
820*a63188c8STomohiro Kusumi * Ignore leaf records (DIRENT and DATA), no per-record I/O is
821*a63188c8STomohiro Kusumi * involved for those since we don't load their data.
822*a63188c8STomohiro Kusumi */
823*a63188c8STomohiro Kusumi if (bref->type != HAMMER2_BREF_TYPE_DATA &&
824*a63188c8STomohiro Kusumi bref->type != HAMMER2_BREF_TYPE_DIRENT) {
825*a63188c8STomohiro Kusumi ++cbinfo->bulkfree_calls;
826*a63188c8STomohiro Kusumi if (cbinfo->bulkfree_calls > hammer2_bulkfree_tps) {
827*a63188c8STomohiro Kusumi int dticks = ticks - cbinfo->bulkfree_ticks;
828*a63188c8STomohiro Kusumi if (dticks < 0)
829*a63188c8STomohiro Kusumi dticks = 0;
830*a63188c8STomohiro Kusumi if (dticks < hz) {
831*a63188c8STomohiro Kusumi tsleep(&cbinfo->bulkfree_ticks, 0,
832*a63188c8STomohiro Kusumi "h2bw", hz - dticks);
833*a63188c8STomohiro Kusumi }
834*a63188c8STomohiro Kusumi cbinfo->bulkfree_calls = 0;
835*a63188c8STomohiro Kusumi cbinfo->bulkfree_ticks = ticks;
836*a63188c8STomohiro Kusumi }
837*a63188c8STomohiro Kusumi }
838*a63188c8STomohiro Kusumi
839*a63188c8STomohiro Kusumi /*
840*a63188c8STomohiro Kusumi * Calculate the data offset and determine if it is within
841*a63188c8STomohiro Kusumi * the current freemap range being gathered.
842*a63188c8STomohiro Kusumi */
843*a63188c8STomohiro Kusumi data_off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
844*a63188c8STomohiro Kusumi if (data_off < cbinfo->sbase || data_off >= cbinfo->sstop)
845*a63188c8STomohiro Kusumi return 0;
846*a63188c8STomohiro Kusumi if (data_off < cbinfo->hmp->voldata.allocator_beg)
847*a63188c8STomohiro Kusumi return 0;
848*a63188c8STomohiro Kusumi if (data_off >= cbinfo->hmp->total_size)
849*a63188c8STomohiro Kusumi return 0;
850*a63188c8STomohiro Kusumi
851*a63188c8STomohiro Kusumi /*
852*a63188c8STomohiro Kusumi * Calculate the information needed to generate the in-memory
853*a63188c8STomohiro Kusumi * freemap record.
854*a63188c8STomohiro Kusumi *
855*a63188c8STomohiro Kusumi * Hammer2 does not allow allocations to cross the L1 (1GB) boundary,
856*a63188c8STomohiro Kusumi * it's a problem if it does. (Or L0 (4MB) for that matter).
857*a63188c8STomohiro Kusumi */
858*a63188c8STomohiro Kusumi radix = (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX);
859*a63188c8STomohiro Kusumi KKASSERT(radix != 0);
860*a63188c8STomohiro Kusumi bytes = (size_t)1 << radix;
861*a63188c8STomohiro Kusumi class = (bref->type << 8) | HAMMER2_PBUFRADIX;
862*a63188c8STomohiro Kusumi
863*a63188c8STomohiro Kusumi if (data_off + bytes > cbinfo->sstop) {
864*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree_scan: illegal 1GB boundary "
865*a63188c8STomohiro Kusumi "%016jx %016jx/%d\n",
866*a63188c8STomohiro Kusumi (intmax_t)bref->data_off,
867*a63188c8STomohiro Kusumi (intmax_t)bref->key,
868*a63188c8STomohiro Kusumi bref->keybits);
869*a63188c8STomohiro Kusumi bytes = cbinfo->sstop - data_off; /* XXX */
870*a63188c8STomohiro Kusumi }
871*a63188c8STomohiro Kusumi
872*a63188c8STomohiro Kusumi /*
873*a63188c8STomohiro Kusumi * Convert to a storage offset relative to the beginning of the
874*a63188c8STomohiro Kusumi * storage range we are collecting. Then lookup the level0 bmap entry.
875*a63188c8STomohiro Kusumi */
876*a63188c8STomohiro Kusumi data_off -= cbinfo->sbase;
877*a63188c8STomohiro Kusumi bmap = cbinfo->bmap + (data_off >> HAMMER2_FREEMAP_LEVEL0_RADIX);
878*a63188c8STomohiro Kusumi
879*a63188c8STomohiro Kusumi /*
880*a63188c8STomohiro Kusumi * Convert data_off to a bmap-relative value (~4MB storage range).
881*a63188c8STomohiro Kusumi * Adjust linear, class, and avail.
882*a63188c8STomohiro Kusumi *
883*a63188c8STomohiro Kusumi * Hammer2 does not allow allocations to cross the L0 (4MB) boundary,
884*a63188c8STomohiro Kusumi */
885*a63188c8STomohiro Kusumi data_off &= HAMMER2_FREEMAP_LEVEL0_MASK;
886*a63188c8STomohiro Kusumi if (data_off + bytes > HAMMER2_FREEMAP_LEVEL0_SIZE) {
887*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree_scan: illegal 4MB boundary "
888*a63188c8STomohiro Kusumi "%016jx %016jx/%d\n",
889*a63188c8STomohiro Kusumi (intmax_t)bref->data_off,
890*a63188c8STomohiro Kusumi (intmax_t)bref->key,
891*a63188c8STomohiro Kusumi bref->keybits);
892*a63188c8STomohiro Kusumi bytes = HAMMER2_FREEMAP_LEVEL0_SIZE - data_off;
893*a63188c8STomohiro Kusumi }
894*a63188c8STomohiro Kusumi
895*a63188c8STomohiro Kusumi if (bmap->class == 0) {
896*a63188c8STomohiro Kusumi bmap->class = class;
897*a63188c8STomohiro Kusumi bmap->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
898*a63188c8STomohiro Kusumi }
899*a63188c8STomohiro Kusumi
900*a63188c8STomohiro Kusumi /*
901*a63188c8STomohiro Kusumi * NOTE: bmap->class does not have to match class. Classification
902*a63188c8STomohiro Kusumi * is relaxed when free space is low, so some mixing can occur.
903*a63188c8STomohiro Kusumi */
904*a63188c8STomohiro Kusumi #if 0
905*a63188c8STomohiro Kusumi /*
906*a63188c8STomohiro Kusumi * XXX removed
907*a63188c8STomohiro Kusumi */
908*a63188c8STomohiro Kusumi if (bmap->class != class) {
909*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree_scan: illegal mixed class "
910*a63188c8STomohiro Kusumi "%016jx %016jx/%d (%04x vs %04x)\n",
911*a63188c8STomohiro Kusumi (intmax_t)bref->data_off,
912*a63188c8STomohiro Kusumi (intmax_t)bref->key,
913*a63188c8STomohiro Kusumi bref->keybits,
914*a63188c8STomohiro Kusumi class, bmap->class);
915*a63188c8STomohiro Kusumi }
916*a63188c8STomohiro Kusumi #endif
917*a63188c8STomohiro Kusumi
918*a63188c8STomohiro Kusumi /*
919*a63188c8STomohiro Kusumi * Just record the highest byte-granular offset for now. Do not
920*a63188c8STomohiro Kusumi * match against allocations which are in multiples of whole blocks.
921*a63188c8STomohiro Kusumi *
922*a63188c8STomohiro Kusumi * Make sure that any in-block linear offset at least covers the
923*a63188c8STomohiro Kusumi * data range. This can cause bmap->linear to become block-aligned.
924*a63188c8STomohiro Kusumi */
925*a63188c8STomohiro Kusumi if (bytes & HAMMER2_FREEMAP_BLOCK_MASK) {
926*a63188c8STomohiro Kusumi if (bmap->linear < (int32_t)data_off + (int32_t)bytes)
927*a63188c8STomohiro Kusumi bmap->linear = (int32_t)data_off + (int32_t)bytes;
928*a63188c8STomohiro Kusumi } else if (bmap->linear >= (int32_t)data_off &&
929*a63188c8STomohiro Kusumi bmap->linear < (int32_t)data_off + (int32_t)bytes) {
930*a63188c8STomohiro Kusumi bmap->linear = (int32_t)data_off + (int32_t)bytes;
931*a63188c8STomohiro Kusumi }
932*a63188c8STomohiro Kusumi
933*a63188c8STomohiro Kusumi /*
934*a63188c8STomohiro Kusumi * Adjust the hammer2_bitmap_t bitmap[HAMMER2_BMAP_ELEMENTS].
935*a63188c8STomohiro Kusumi * 64-bit entries, 2 bits per entry, to code 11.
936*a63188c8STomohiro Kusumi *
937*a63188c8STomohiro Kusumi * NOTE: data_off mask to 524288, shift right by 14 (radix for 16384),
938*a63188c8STomohiro Kusumi * and multiply shift amount by 2 for sets of 2 bits.
939*a63188c8STomohiro Kusumi *
940*a63188c8STomohiro Kusumi * NOTE: The allocation can be smaller than HAMMER2_FREEMAP_BLOCK_SIZE.
941*a63188c8STomohiro Kusumi * also, data_off may not be FREEMAP_BLOCK_SIZE aligned.
942*a63188c8STomohiro Kusumi */
943*a63188c8STomohiro Kusumi while (bytes > 0) {
944*a63188c8STomohiro Kusumi hammer2_bitmap_t bmask;
945*a63188c8STomohiro Kusumi int bindex;
946*a63188c8STomohiro Kusumi
947*a63188c8STomohiro Kusumi bindex = (int)data_off >> (HAMMER2_FREEMAP_BLOCK_RADIX +
948*a63188c8STomohiro Kusumi HAMMER2_BMAP_INDEX_RADIX);
949*a63188c8STomohiro Kusumi bmask = (hammer2_bitmap_t)3 <<
950*a63188c8STomohiro Kusumi ((((int)data_off & HAMMER2_BMAP_INDEX_MASK) >>
951*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_RADIX) << 1);
952*a63188c8STomohiro Kusumi
953*a63188c8STomohiro Kusumi /*
954*a63188c8STomohiro Kusumi * NOTE! The (avail) calculation is bitmap-granular. Multiple
955*a63188c8STomohiro Kusumi * sub-granular records can wind up at the same bitmap
956*a63188c8STomohiro Kusumi * position.
957*a63188c8STomohiro Kusumi */
958*a63188c8STomohiro Kusumi if ((bmap->bitmapq[bindex] & bmask) == 0) {
959*a63188c8STomohiro Kusumi if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE) {
960*a63188c8STomohiro Kusumi bmap->avail -= HAMMER2_FREEMAP_BLOCK_SIZE;
961*a63188c8STomohiro Kusumi } else {
962*a63188c8STomohiro Kusumi bmap->avail -= bytes;
963*a63188c8STomohiro Kusumi }
964*a63188c8STomohiro Kusumi bmap->bitmapq[bindex] |= bmask;
965*a63188c8STomohiro Kusumi }
966*a63188c8STomohiro Kusumi data_off += HAMMER2_FREEMAP_BLOCK_SIZE;
967*a63188c8STomohiro Kusumi if (bytes < HAMMER2_FREEMAP_BLOCK_SIZE)
968*a63188c8STomohiro Kusumi bytes = 0;
969*a63188c8STomohiro Kusumi else
970*a63188c8STomohiro Kusumi bytes -= HAMMER2_FREEMAP_BLOCK_SIZE;
971*a63188c8STomohiro Kusumi }
972*a63188c8STomohiro Kusumi return 0;
973*a63188c8STomohiro Kusumi }
974*a63188c8STomohiro Kusumi
975*a63188c8STomohiro Kusumi /*
976*a63188c8STomohiro Kusumi * Synchronize the in-memory bitmap with the live freemap. This is not a
977*a63188c8STomohiro Kusumi * direct copy. Instead the bitmaps must be compared:
978*a63188c8STomohiro Kusumi *
979*a63188c8STomohiro Kusumi * In-memory Live-freemap
980*a63188c8STomohiro Kusumi * 00 11 -> 10 (do nothing if live modified)
981*a63188c8STomohiro Kusumi * 10 -> 00 (do nothing if live modified)
982*a63188c8STomohiro Kusumi * 11 10 -> 11 handles race against live
983*a63188c8STomohiro Kusumi * ** -> 11 nominally warn of corruption
984*a63188c8STomohiro Kusumi *
985*a63188c8STomohiro Kusumi * We must also fixup the hints in HAMMER2_BREF_TYPE_FREEMAP_LEAF.
986*a63188c8STomohiro Kusumi */
987*a63188c8STomohiro Kusumi static int
h2_bulkfree_sync(hammer2_bulkfree_info_t * cbinfo)988*a63188c8STomohiro Kusumi h2_bulkfree_sync(hammer2_bulkfree_info_t *cbinfo)
989*a63188c8STomohiro Kusumi {
990*a63188c8STomohiro Kusumi hammer2_off_t data_off;
991*a63188c8STomohiro Kusumi hammer2_key_t key;
992*a63188c8STomohiro Kusumi hammer2_key_t key_dummy;
993*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap;
994*a63188c8STomohiro Kusumi hammer2_bmap_data_t *live;
995*a63188c8STomohiro Kusumi hammer2_chain_t *live_parent;
996*a63188c8STomohiro Kusumi hammer2_chain_t *live_chain;
997*a63188c8STomohiro Kusumi int bmapindex;
998*a63188c8STomohiro Kusumi int error;
999*a63188c8STomohiro Kusumi
1000*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree - range ");
1001*a63188c8STomohiro Kusumi
1002*a63188c8STomohiro Kusumi if (cbinfo->sbase < cbinfo->hmp->voldata.allocator_beg)
1003*a63188c8STomohiro Kusumi kprintf("%016jx-",
1004*a63188c8STomohiro Kusumi (intmax_t)cbinfo->hmp->voldata.allocator_beg);
1005*a63188c8STomohiro Kusumi else
1006*a63188c8STomohiro Kusumi kprintf("%016jx-",
1007*a63188c8STomohiro Kusumi (intmax_t)cbinfo->sbase);
1008*a63188c8STomohiro Kusumi
1009*a63188c8STomohiro Kusumi if (cbinfo->sstop > cbinfo->hmp->total_size)
1010*a63188c8STomohiro Kusumi kprintf("%016jx\n",
1011*a63188c8STomohiro Kusumi (intmax_t)cbinfo->hmp->total_size);
1012*a63188c8STomohiro Kusumi else
1013*a63188c8STomohiro Kusumi kprintf("%016jx\n",
1014*a63188c8STomohiro Kusumi (intmax_t)cbinfo->sstop);
1015*a63188c8STomohiro Kusumi
1016*a63188c8STomohiro Kusumi data_off = cbinfo->sbase;
1017*a63188c8STomohiro Kusumi bmap = cbinfo->bmap;
1018*a63188c8STomohiro Kusumi
1019*a63188c8STomohiro Kusumi live_parent = &cbinfo->hmp->fchain;
1020*a63188c8STomohiro Kusumi hammer2_chain_ref(live_parent);
1021*a63188c8STomohiro Kusumi hammer2_chain_lock(live_parent, HAMMER2_RESOLVE_ALWAYS);
1022*a63188c8STomohiro Kusumi live_chain = NULL;
1023*a63188c8STomohiro Kusumi error = 0;
1024*a63188c8STomohiro Kusumi
1025*a63188c8STomohiro Kusumi /*
1026*a63188c8STomohiro Kusumi * Iterate each hammer2_bmap_data_t line (128 bytes) managing
1027*a63188c8STomohiro Kusumi * 4MB of storage.
1028*a63188c8STomohiro Kusumi */
1029*a63188c8STomohiro Kusumi while (data_off < cbinfo->sstop) {
1030*a63188c8STomohiro Kusumi /*
1031*a63188c8STomohiro Kusumi * The freemap is not used below allocator_beg or beyond
1032*a63188c8STomohiro Kusumi * total_size.
1033*a63188c8STomohiro Kusumi */
1034*a63188c8STomohiro Kusumi
1035*a63188c8STomohiro Kusumi if (data_off < cbinfo->hmp->voldata.allocator_beg)
1036*a63188c8STomohiro Kusumi goto next;
1037*a63188c8STomohiro Kusumi if (data_off >= cbinfo->hmp->total_size)
1038*a63188c8STomohiro Kusumi goto next;
1039*a63188c8STomohiro Kusumi
1040*a63188c8STomohiro Kusumi /*
1041*a63188c8STomohiro Kusumi * Locate the freemap leaf on the live filesystem
1042*a63188c8STomohiro Kusumi */
1043*a63188c8STomohiro Kusumi key = (data_off & ~HAMMER2_FREEMAP_LEVEL1_MASK);
1044*a63188c8STomohiro Kusumi
1045*a63188c8STomohiro Kusumi if (live_chain == NULL || live_chain->bref.key != key) {
1046*a63188c8STomohiro Kusumi if (live_chain) {
1047*a63188c8STomohiro Kusumi hammer2_chain_unlock(live_chain);
1048*a63188c8STomohiro Kusumi hammer2_chain_drop(live_chain);
1049*a63188c8STomohiro Kusumi }
1050*a63188c8STomohiro Kusumi live_chain = hammer2_chain_lookup(
1051*a63188c8STomohiro Kusumi &live_parent,
1052*a63188c8STomohiro Kusumi &key_dummy,
1053*a63188c8STomohiro Kusumi key,
1054*a63188c8STomohiro Kusumi key + HAMMER2_FREEMAP_LEVEL1_MASK,
1055*a63188c8STomohiro Kusumi &error,
1056*a63188c8STomohiro Kusumi HAMMER2_LOOKUP_ALWAYS);
1057*a63188c8STomohiro Kusumi if (error) {
1058*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: freemap lookup "
1059*a63188c8STomohiro Kusumi "error near %016jx, error %s\n",
1060*a63188c8STomohiro Kusumi (intmax_t)data_off,
1061*a63188c8STomohiro Kusumi hammer2_error_str(live_chain->error));
1062*a63188c8STomohiro Kusumi break;
1063*a63188c8STomohiro Kusumi }
1064*a63188c8STomohiro Kusumi }
1065*a63188c8STomohiro Kusumi if (live_chain == NULL) {
1066*a63188c8STomohiro Kusumi /*
1067*a63188c8STomohiro Kusumi * XXX if we implement a full recovery mode we need
1068*a63188c8STomohiro Kusumi * to create/recreate missing freemap chains if our
1069*a63188c8STomohiro Kusumi * bmap has any allocated blocks.
1070*a63188c8STomohiro Kusumi */
1071*a63188c8STomohiro Kusumi if (bmap->class &&
1072*a63188c8STomohiro Kusumi bmap->avail != HAMMER2_FREEMAP_LEVEL0_SIZE) {
1073*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: cannot locate "
1074*a63188c8STomohiro Kusumi "live leaf for allocated data "
1075*a63188c8STomohiro Kusumi "near %016jx\n",
1076*a63188c8STomohiro Kusumi (intmax_t)data_off);
1077*a63188c8STomohiro Kusumi }
1078*a63188c8STomohiro Kusumi goto next;
1079*a63188c8STomohiro Kusumi }
1080*a63188c8STomohiro Kusumi if (live_chain->error) {
1081*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: unable to access freemap "
1082*a63188c8STomohiro Kusumi "near %016jx, error %s\n",
1083*a63188c8STomohiro Kusumi (intmax_t)data_off,
1084*a63188c8STomohiro Kusumi hammer2_error_str(live_chain->error));
1085*a63188c8STomohiro Kusumi hammer2_chain_unlock(live_chain);
1086*a63188c8STomohiro Kusumi hammer2_chain_drop(live_chain);
1087*a63188c8STomohiro Kusumi live_chain = NULL;
1088*a63188c8STomohiro Kusumi goto next;
1089*a63188c8STomohiro Kusumi }
1090*a63188c8STomohiro Kusumi
1091*a63188c8STomohiro Kusumi bmapindex = (data_off & HAMMER2_FREEMAP_LEVEL1_MASK) >>
1092*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL0_RADIX;
1093*a63188c8STomohiro Kusumi live = &live_chain->data->bmdata[bmapindex];
1094*a63188c8STomohiro Kusumi
1095*a63188c8STomohiro Kusumi /*
1096*a63188c8STomohiro Kusumi * Shortcut if the bitmaps match and the live linear
1097*a63188c8STomohiro Kusumi * indicator is sane. We can't do a perfect check of
1098*a63188c8STomohiro Kusumi * live->linear because the only real requirement is that
1099*a63188c8STomohiro Kusumi * if it is not block-aligned, that it not cover the space
1100*a63188c8STomohiro Kusumi * within its current block which overlaps one of the data
1101*a63188c8STomohiro Kusumi * ranges we scan. We don't retain enough fine-grained
1102*a63188c8STomohiro Kusumi * data in our scan to be able to set it exactly.
1103*a63188c8STomohiro Kusumi *
1104*a63188c8STomohiro Kusumi * TODO - we could shortcut this by testing that both
1105*a63188c8STomohiro Kusumi * live->class and bmap->class are 0, and both avails are
1106*a63188c8STomohiro Kusumi * set to HAMMER2_FREEMAP_LEVEL0_SIZE (4MB).
1107*a63188c8STomohiro Kusumi */
1108*a63188c8STomohiro Kusumi if (bcmp(live->bitmapq, bmap->bitmapq,
1109*a63188c8STomohiro Kusumi sizeof(bmap->bitmapq)) == 0 &&
1110*a63188c8STomohiro Kusumi live->linear >= bmap->linear &&
1111*a63188c8STomohiro Kusumi (hammer2_aux_flags & 1) == 0 &&
1112*a63188c8STomohiro Kusumi bigmask_good(bmap, live_chain->bref.check.freemap.bigmask))
1113*a63188c8STomohiro Kusumi {
1114*a63188c8STomohiro Kusumi goto next;
1115*a63188c8STomohiro Kusumi }
1116*a63188c8STomohiro Kusumi if (hammer2_debug & 1) {
1117*a63188c8STomohiro Kusumi kprintf("live %016jx %04d.%04x (avail=%d) "
1118*a63188c8STomohiro Kusumi "bigmask %08x->%08x\n",
1119*a63188c8STomohiro Kusumi data_off, bmapindex, live->class, live->avail,
1120*a63188c8STomohiro Kusumi live_chain->bref.check.freemap.bigmask,
1121*a63188c8STomohiro Kusumi live_chain->bref.check.freemap.bigmask |
1122*a63188c8STomohiro Kusumi bigmask_get(bmap));
1123*a63188c8STomohiro Kusumi }
1124*a63188c8STomohiro Kusumi
1125*a63188c8STomohiro Kusumi if (hammer2_chain_modify(live_chain, cbinfo->mtid, 0, 0)) {
1126*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: unable to modify freemap "
1127*a63188c8STomohiro Kusumi "at %016jx for data-block %016jx, error %s\n",
1128*a63188c8STomohiro Kusumi live_chain->bref.data_off,
1129*a63188c8STomohiro Kusumi (intmax_t)data_off,
1130*a63188c8STomohiro Kusumi hammer2_error_str(live_chain->error));
1131*a63188c8STomohiro Kusumi hammer2_chain_unlock(live_chain);
1132*a63188c8STomohiro Kusumi hammer2_chain_drop(live_chain);
1133*a63188c8STomohiro Kusumi live_chain = NULL;
1134*a63188c8STomohiro Kusumi goto next;
1135*a63188c8STomohiro Kusumi }
1136*a63188c8STomohiro Kusumi live_chain->bref.check.freemap.bigmask = -1;
1137*a63188c8STomohiro Kusumi cbinfo->hmp->freemap_relaxed = 0; /* reset heuristic */
1138*a63188c8STomohiro Kusumi live = &live_chain->data->bmdata[bmapindex];
1139*a63188c8STomohiro Kusumi
1140*a63188c8STomohiro Kusumi h2_bulkfree_sync_adjust(cbinfo, data_off, live, bmap,
1141*a63188c8STomohiro Kusumi live_chain->bref.key +
1142*a63188c8STomohiro Kusumi bmapindex *
1143*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL0_SIZE);
1144*a63188c8STomohiro Kusumi next:
1145*a63188c8STomohiro Kusumi data_off += HAMMER2_FREEMAP_LEVEL0_SIZE;
1146*a63188c8STomohiro Kusumi ++bmap;
1147*a63188c8STomohiro Kusumi }
1148*a63188c8STomohiro Kusumi if (live_chain) {
1149*a63188c8STomohiro Kusumi hammer2_chain_unlock(live_chain);
1150*a63188c8STomohiro Kusumi hammer2_chain_drop(live_chain);
1151*a63188c8STomohiro Kusumi }
1152*a63188c8STomohiro Kusumi if (live_parent) {
1153*a63188c8STomohiro Kusumi hammer2_chain_unlock(live_parent);
1154*a63188c8STomohiro Kusumi hammer2_chain_drop(live_parent);
1155*a63188c8STomohiro Kusumi }
1156*a63188c8STomohiro Kusumi return error;
1157*a63188c8STomohiro Kusumi }
1158*a63188c8STomohiro Kusumi
1159*a63188c8STomohiro Kusumi /*
1160*a63188c8STomohiro Kusumi * Merge the bulkfree bitmap against the existing bitmap.
1161*a63188c8STomohiro Kusumi */
1162*a63188c8STomohiro Kusumi static
1163*a63188c8STomohiro Kusumi void
h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t * cbinfo,hammer2_off_t data_off,hammer2_bmap_data_t * live,hammer2_bmap_data_t * bmap,hammer2_key_t alloc_base)1164*a63188c8STomohiro Kusumi h2_bulkfree_sync_adjust(hammer2_bulkfree_info_t *cbinfo,
1165*a63188c8STomohiro Kusumi hammer2_off_t data_off, hammer2_bmap_data_t *live,
1166*a63188c8STomohiro Kusumi hammer2_bmap_data_t *bmap, hammer2_key_t alloc_base)
1167*a63188c8STomohiro Kusumi {
1168*a63188c8STomohiro Kusumi int bindex;
1169*a63188c8STomohiro Kusumi int scount;
1170*a63188c8STomohiro Kusumi hammer2_off_t tmp_off;
1171*a63188c8STomohiro Kusumi hammer2_bitmap_t lmask;
1172*a63188c8STomohiro Kusumi hammer2_bitmap_t mmask;
1173*a63188c8STomohiro Kusumi
1174*a63188c8STomohiro Kusumi tmp_off = data_off;
1175*a63188c8STomohiro Kusumi
1176*a63188c8STomohiro Kusumi for (bindex = 0; bindex < HAMMER2_BMAP_ELEMENTS; ++bindex) {
1177*a63188c8STomohiro Kusumi lmask = live->bitmapq[bindex]; /* live */
1178*a63188c8STomohiro Kusumi mmask = bmap->bitmapq[bindex]; /* snapshotted bulkfree */
1179*a63188c8STomohiro Kusumi if (lmask == mmask) {
1180*a63188c8STomohiro Kusumi tmp_off += HAMMER2_BMAP_INDEX_SIZE;
1181*a63188c8STomohiro Kusumi continue;
1182*a63188c8STomohiro Kusumi }
1183*a63188c8STomohiro Kusumi
1184*a63188c8STomohiro Kusumi for (scount = 0;
1185*a63188c8STomohiro Kusumi scount < HAMMER2_BMAP_BITS_PER_ELEMENT;
1186*a63188c8STomohiro Kusumi scount += 2) {
1187*a63188c8STomohiro Kusumi if ((mmask & 3) == 0) {
1188*a63188c8STomohiro Kusumi /*
1189*a63188c8STomohiro Kusumi * in-memory 00 live 11 -> 10
1190*a63188c8STomohiro Kusumi * live 10 -> 00
1191*a63188c8STomohiro Kusumi *
1192*a63188c8STomohiro Kusumi * Storage might be marked allocated or
1193*a63188c8STomohiro Kusumi * staged and must be remarked staged or
1194*a63188c8STomohiro Kusumi * free.
1195*a63188c8STomohiro Kusumi */
1196*a63188c8STomohiro Kusumi switch (lmask & 3) {
1197*a63188c8STomohiro Kusumi case 0: /* 00 */
1198*a63188c8STomohiro Kusumi break;
1199*a63188c8STomohiro Kusumi case 1: /* 01 */
1200*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: cannot "
1201*a63188c8STomohiro Kusumi "transition m=00/l=01\n");
1202*a63188c8STomohiro Kusumi break;
1203*a63188c8STomohiro Kusumi case 2: /* 10 -> 00 */
1204*a63188c8STomohiro Kusumi live->bitmapq[bindex] &=
1205*a63188c8STomohiro Kusumi ~((hammer2_bitmap_t)2 << scount);
1206*a63188c8STomohiro Kusumi live->avail +=
1207*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE;
1208*a63188c8STomohiro Kusumi if (live->avail >
1209*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL0_SIZE) {
1210*a63188c8STomohiro Kusumi live->avail =
1211*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL0_SIZE;
1212*a63188c8STomohiro Kusumi }
1213*a63188c8STomohiro Kusumi cbinfo->adj_free +=
1214*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE;
1215*a63188c8STomohiro Kusumi ++cbinfo->count_10_00;
1216*a63188c8STomohiro Kusumi hammer2_io_dedup_assert(
1217*a63188c8STomohiro Kusumi cbinfo->hmp,
1218*a63188c8STomohiro Kusumi tmp_off |
1219*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_RADIX,
1220*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE);
1221*a63188c8STomohiro Kusumi break;
1222*a63188c8STomohiro Kusumi case 3: /* 11 -> 10 */
1223*a63188c8STomohiro Kusumi live->bitmapq[bindex] &=
1224*a63188c8STomohiro Kusumi ~((hammer2_bitmap_t)1 << scount);
1225*a63188c8STomohiro Kusumi ++cbinfo->count_11_10;
1226*a63188c8STomohiro Kusumi hammer2_io_dedup_delete(
1227*a63188c8STomohiro Kusumi cbinfo->hmp,
1228*a63188c8STomohiro Kusumi HAMMER2_BREF_TYPE_DATA,
1229*a63188c8STomohiro Kusumi tmp_off |
1230*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_RADIX,
1231*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE);
1232*a63188c8STomohiro Kusumi break;
1233*a63188c8STomohiro Kusumi }
1234*a63188c8STomohiro Kusumi } else if ((mmask & 3) == 3) {
1235*a63188c8STomohiro Kusumi /*
1236*a63188c8STomohiro Kusumi * in-memory 11 live 10 -> 11
1237*a63188c8STomohiro Kusumi * live ** -> 11
1238*a63188c8STomohiro Kusumi *
1239*a63188c8STomohiro Kusumi * Storage might be incorrectly marked free
1240*a63188c8STomohiro Kusumi * or staged and must be remarked fully
1241*a63188c8STomohiro Kusumi * allocated.
1242*a63188c8STomohiro Kusumi */
1243*a63188c8STomohiro Kusumi switch (lmask & 3) {
1244*a63188c8STomohiro Kusumi case 0: /* 00 */
1245*a63188c8STomohiro Kusumi /*
1246*a63188c8STomohiro Kusumi * This case is not supposed to
1247*a63188c8STomohiro Kusumi * happen. If it does, it means
1248*a63188c8STomohiro Kusumi * that an allocated block was
1249*a63188c8STomohiro Kusumi * thought by the filesystem to be
1250*a63188c8STomohiro Kusumi * free.
1251*a63188c8STomohiro Kusumi */
1252*a63188c8STomohiro Kusumi kprintf("hammer2_bulkfree: "
1253*a63188c8STomohiro Kusumi "00->11 critical freemap "
1254*a63188c8STomohiro Kusumi "transition for datablock "
1255*a63188c8STomohiro Kusumi "%016jx\n",
1256*a63188c8STomohiro Kusumi tmp_off);
1257*a63188c8STomohiro Kusumi ++cbinfo->count_00_11;
1258*a63188c8STomohiro Kusumi cbinfo->adj_free -=
1259*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE;
1260*a63188c8STomohiro Kusumi live->avail -=
1261*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_BLOCK_SIZE;
1262*a63188c8STomohiro Kusumi if ((int32_t)live->avail < 0)
1263*a63188c8STomohiro Kusumi live->avail = 0;
1264*a63188c8STomohiro Kusumi break;
1265*a63188c8STomohiro Kusumi case 1: /* 01 */
1266*a63188c8STomohiro Kusumi ++cbinfo->count_01_11;
1267*a63188c8STomohiro Kusumi break;
1268*a63188c8STomohiro Kusumi case 2: /* 10 -> 11 */
1269*a63188c8STomohiro Kusumi ++cbinfo->count_10_11;
1270*a63188c8STomohiro Kusumi break;
1271*a63188c8STomohiro Kusumi case 3: /* 11 */
1272*a63188c8STomohiro Kusumi break;
1273*a63188c8STomohiro Kusumi }
1274*a63188c8STomohiro Kusumi live->bitmapq[bindex] |=
1275*a63188c8STomohiro Kusumi ((hammer2_bitmap_t)3 << scount);
1276*a63188c8STomohiro Kusumi }
1277*a63188c8STomohiro Kusumi mmask >>= 2;
1278*a63188c8STomohiro Kusumi lmask >>= 2;
1279*a63188c8STomohiro Kusumi tmp_off += HAMMER2_FREEMAP_BLOCK_SIZE;
1280*a63188c8STomohiro Kusumi }
1281*a63188c8STomohiro Kusumi }
1282*a63188c8STomohiro Kusumi
1283*a63188c8STomohiro Kusumi /*
1284*a63188c8STomohiro Kusumi * Determine if the live bitmap is completely free and reset its
1285*a63188c8STomohiro Kusumi * fields if so. Otherwise check to see if we can reduce the linear
1286*a63188c8STomohiro Kusumi * offset.
1287*a63188c8STomohiro Kusumi */
1288*a63188c8STomohiro Kusumi for (bindex = HAMMER2_BMAP_ELEMENTS - 1; bindex >= 0; --bindex) {
1289*a63188c8STomohiro Kusumi if (live->bitmapq[bindex] != 0)
1290*a63188c8STomohiro Kusumi break;
1291*a63188c8STomohiro Kusumi }
1292*a63188c8STomohiro Kusumi if (bindex < 0) {
1293*a63188c8STomohiro Kusumi /*
1294*a63188c8STomohiro Kusumi * Completely empty, reset entire segment
1295*a63188c8STomohiro Kusumi */
1296*a63188c8STomohiro Kusumi #if 0
1297*a63188c8STomohiro Kusumi kprintf("hammer2: cleanseg %016jx.%04x (%d)\n",
1298*a63188c8STomohiro Kusumi alloc_base, live->class, live->avail);
1299*a63188c8STomohiro Kusumi #endif
1300*a63188c8STomohiro Kusumi live->avail = HAMMER2_FREEMAP_LEVEL0_SIZE;
1301*a63188c8STomohiro Kusumi live->class = 0;
1302*a63188c8STomohiro Kusumi live->linear = 0;
1303*a63188c8STomohiro Kusumi ++cbinfo->count_l0cleans;
1304*a63188c8STomohiro Kusumi } else if (bindex < 7) {
1305*a63188c8STomohiro Kusumi /*
1306*a63188c8STomohiro Kusumi * Partially full, bitmapq[bindex] != 0. Our bulkfree pass
1307*a63188c8STomohiro Kusumi * does not record enough information to set live->linear
1308*a63188c8STomohiro Kusumi * exactly.
1309*a63188c8STomohiro Kusumi *
1310*a63188c8STomohiro Kusumi * NOTE: Setting live->linear to a sub-block (16K) boundary
1311*a63188c8STomohiro Kusumi * forces the live code to iterate to the next fully
1312*a63188c8STomohiro Kusumi * free block. It does NOT mean that all blocks above
1313*a63188c8STomohiro Kusumi * live->linear are available.
1314*a63188c8STomohiro Kusumi *
1315*a63188c8STomohiro Kusumi * Setting live->linear to a fragmentary (less than
1316*a63188c8STomohiro Kusumi * 16K) boundary allows allocations to iterate within
1317*a63188c8STomohiro Kusumi * that sub-block.
1318*a63188c8STomohiro Kusumi */
1319*a63188c8STomohiro Kusumi if (live->linear < bmap->linear &&
1320*a63188c8STomohiro Kusumi ((live->linear ^ bmap->linear) &
1321*a63188c8STomohiro Kusumi ~HAMMER2_FREEMAP_BLOCK_MASK) == 0) {
1322*a63188c8STomohiro Kusumi /*
1323*a63188c8STomohiro Kusumi * If greater than but still within the same
1324*a63188c8STomohiro Kusumi * sub-block as live we can adjust linear upward.
1325*a63188c8STomohiro Kusumi */
1326*a63188c8STomohiro Kusumi live->linear = bmap->linear;
1327*a63188c8STomohiro Kusumi ++cbinfo->count_linadjusts;
1328*a63188c8STomohiro Kusumi } else {
1329*a63188c8STomohiro Kusumi /*
1330*a63188c8STomohiro Kusumi * Otherwise adjust to the nearest higher or same
1331*a63188c8STomohiro Kusumi * sub-block boundary. The live system may have
1332*a63188c8STomohiro Kusumi * bounced live->linear around so we cannot make any
1333*a63188c8STomohiro Kusumi * assumptions with regards to available fragmentary
1334*a63188c8STomohiro Kusumi * allocations.
1335*a63188c8STomohiro Kusumi */
1336*a63188c8STomohiro Kusumi live->linear =
1337*a63188c8STomohiro Kusumi (bmap->linear + HAMMER2_FREEMAP_BLOCK_MASK) &
1338*a63188c8STomohiro Kusumi ~HAMMER2_FREEMAP_BLOCK_MASK;
1339*a63188c8STomohiro Kusumi ++cbinfo->count_linadjusts;
1340*a63188c8STomohiro Kusumi }
1341*a63188c8STomohiro Kusumi } else {
1342*a63188c8STomohiro Kusumi /*
1343*a63188c8STomohiro Kusumi * Completely full, effectively disable the linear iterator
1344*a63188c8STomohiro Kusumi */
1345*a63188c8STomohiro Kusumi live->linear = HAMMER2_SEGSIZE;
1346*a63188c8STomohiro Kusumi }
1347*a63188c8STomohiro Kusumi
1348*a63188c8STomohiro Kusumi #if 0
1349*a63188c8STomohiro Kusumi if (bmap->class) {
1350*a63188c8STomohiro Kusumi kprintf("%016jx %04d.%04x (avail=%7d) "
1351*a63188c8STomohiro Kusumi "%08x %08x %08x %08x %08x %08x %08x %08x\n",
1352*a63188c8STomohiro Kusumi (intmax_t)data_off,
1353*a63188c8STomohiro Kusumi (int)((data_off &
1354*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL1_MASK) >>
1355*a63188c8STomohiro Kusumi HAMMER2_FREEMAP_LEVEL0_RADIX),
1356*a63188c8STomohiro Kusumi bmap->class,
1357*a63188c8STomohiro Kusumi bmap->avail,
1358*a63188c8STomohiro Kusumi bmap->bitmap[0], bmap->bitmap[1],
1359*a63188c8STomohiro Kusumi bmap->bitmap[2], bmap->bitmap[3],
1360*a63188c8STomohiro Kusumi bmap->bitmap[4], bmap->bitmap[5],
1361*a63188c8STomohiro Kusumi bmap->bitmap[6], bmap->bitmap[7]);
1362*a63188c8STomohiro Kusumi }
1363*a63188c8STomohiro Kusumi #endif
1364*a63188c8STomohiro Kusumi }
1365*a63188c8STomohiro Kusumi
1366*a63188c8STomohiro Kusumi /*
1367*a63188c8STomohiro Kusumi * BULKFREE DEDUP HEURISTIC
1368*a63188c8STomohiro Kusumi *
1369*a63188c8STomohiro Kusumi * WARNING! This code is SMP safe but the heuristic allows SMP collisions.
1370*a63188c8STomohiro Kusumi * All fields must be loaded into locals and validated.
1371*a63188c8STomohiro Kusumi */
1372*a63188c8STomohiro Kusumi static
1373*a63188c8STomohiro Kusumi int
h2_bulkfree_test(hammer2_bulkfree_info_t * cbinfo,hammer2_blockref_t * bref,int pri,int saved_error)1374*a63188c8STomohiro Kusumi h2_bulkfree_test(hammer2_bulkfree_info_t *cbinfo, hammer2_blockref_t *bref,
1375*a63188c8STomohiro Kusumi int pri, int saved_error)
1376*a63188c8STomohiro Kusumi {
1377*a63188c8STomohiro Kusumi hammer2_dedup_t *dedup;
1378*a63188c8STomohiro Kusumi int best;
1379*a63188c8STomohiro Kusumi int n;
1380*a63188c8STomohiro Kusumi int i;
1381*a63188c8STomohiro Kusumi
1382*a63188c8STomohiro Kusumi n = hammer2_icrc32(&bref->data_off, sizeof(bref->data_off));
1383*a63188c8STomohiro Kusumi dedup = cbinfo->dedup + (n & (HAMMER2_DEDUP_HEUR_MASK & ~7));
1384*a63188c8STomohiro Kusumi
1385*a63188c8STomohiro Kusumi for (i = best = 0; i < 8; ++i) {
1386*a63188c8STomohiro Kusumi if (dedup[i].data_off == bref->data_off) {
1387*a63188c8STomohiro Kusumi if (dedup[i].ticks < pri)
1388*a63188c8STomohiro Kusumi dedup[i].ticks = pri;
1389*a63188c8STomohiro Kusumi if (pri == 1)
1390*a63188c8STomohiro Kusumi cbinfo->count_dedup_factor += dedup[i].ticks;
1391*a63188c8STomohiro Kusumi return (dedup[i].saved_error | HAMMER2_ERROR_EOF);
1392*a63188c8STomohiro Kusumi }
1393*a63188c8STomohiro Kusumi if (dedup[i].ticks < dedup[best].ticks)
1394*a63188c8STomohiro Kusumi best = i;
1395*a63188c8STomohiro Kusumi }
1396*a63188c8STomohiro Kusumi dedup[best].data_off = bref->data_off;
1397*a63188c8STomohiro Kusumi dedup[best].ticks = pri;
1398*a63188c8STomohiro Kusumi dedup[best].saved_error = saved_error;
1399*a63188c8STomohiro Kusumi
1400*a63188c8STomohiro Kusumi return 0;
1401*a63188c8STomohiro Kusumi }
1402*a63188c8STomohiro Kusumi
1403*a63188c8STomohiro Kusumi /*
1404*a63188c8STomohiro Kusumi * Calculate what the bigmask should be. bigmask is permissive, so the
1405*a63188c8STomohiro Kusumi * bits returned must be set at a minimum in the live bigmask. Other bits
1406*a63188c8STomohiro Kusumi * might also be set in the live bigmask.
1407*a63188c8STomohiro Kusumi */
1408*a63188c8STomohiro Kusumi static uint32_t
bigmask_get(hammer2_bmap_data_t * bmap)1409*a63188c8STomohiro Kusumi bigmask_get(hammer2_bmap_data_t *bmap)
1410*a63188c8STomohiro Kusumi {
1411*a63188c8STomohiro Kusumi hammer2_bitmap_t mask; /* 64-bit mask to check */
1412*a63188c8STomohiro Kusumi hammer2_bitmap_t scan;
1413*a63188c8STomohiro Kusumi uint32_t bigmask;
1414*a63188c8STomohiro Kusumi uint32_t radix_mask;
1415*a63188c8STomohiro Kusumi int iter;
1416*a63188c8STomohiro Kusumi int i;
1417*a63188c8STomohiro Kusumi int j;
1418*a63188c8STomohiro Kusumi
1419*a63188c8STomohiro Kusumi bigmask = 0;
1420*a63188c8STomohiro Kusumi for (i = 0; i < HAMMER2_BMAP_ELEMENTS; ++i) {
1421*a63188c8STomohiro Kusumi mask = bmap->bitmapq[i];
1422*a63188c8STomohiro Kusumi
1423*a63188c8STomohiro Kusumi radix_mask = 1U << HAMMER2_FREEMAP_BLOCK_RADIX;
1424*a63188c8STomohiro Kusumi radix_mask |= radix_mask - 1;
1425*a63188c8STomohiro Kusumi iter = 2; /* each bitmap entry is 2 bits. 2, 4, 8... */
1426*a63188c8STomohiro Kusumi while (iter <= HAMMER2_BMAP_BITS_PER_ELEMENT) {
1427*a63188c8STomohiro Kusumi if (iter == HAMMER2_BMAP_BITS_PER_ELEMENT)
1428*a63188c8STomohiro Kusumi scan = -1;
1429*a63188c8STomohiro Kusumi else
1430*a63188c8STomohiro Kusumi scan = (1LU << iter) - 1;
1431*a63188c8STomohiro Kusumi j = 0;
1432*a63188c8STomohiro Kusumi while (j < HAMMER2_BMAP_BITS_PER_ELEMENT) {
1433*a63188c8STomohiro Kusumi /*
1434*a63188c8STomohiro Kusumi * Check if all bits are 0 (free block).
1435*a63188c8STomohiro Kusumi * If so, set the bit in bigmask for the
1436*a63188c8STomohiro Kusumi * allocation radix under test.
1437*a63188c8STomohiro Kusumi */
1438*a63188c8STomohiro Kusumi if ((scan & mask) == 0) {
1439*a63188c8STomohiro Kusumi bigmask |= radix_mask;
1440*a63188c8STomohiro Kusumi }
1441*a63188c8STomohiro Kusumi scan <<= iter;
1442*a63188c8STomohiro Kusumi j += iter;
1443*a63188c8STomohiro Kusumi }
1444*a63188c8STomohiro Kusumi iter <<= 1;
1445*a63188c8STomohiro Kusumi radix_mask = (radix_mask << 1) | 1;
1446*a63188c8STomohiro Kusumi }
1447*a63188c8STomohiro Kusumi }
1448*a63188c8STomohiro Kusumi return bigmask;
1449*a63188c8STomohiro Kusumi }
1450*a63188c8STomohiro Kusumi
1451*a63188c8STomohiro Kusumi static int
bigmask_good(hammer2_bmap_data_t * bmap,uint32_t live_bigmask)1452*a63188c8STomohiro Kusumi bigmask_good(hammer2_bmap_data_t *bmap, uint32_t live_bigmask)
1453*a63188c8STomohiro Kusumi {
1454*a63188c8STomohiro Kusumi uint32_t bigmask;
1455*a63188c8STomohiro Kusumi
1456*a63188c8STomohiro Kusumi bigmask = bigmask_get(bmap);
1457*a63188c8STomohiro Kusumi return ((live_bigmask & bigmask) == bigmask);
1458*a63188c8STomohiro Kusumi }
1459