xref: /dragonfly/sys/vfs/hammer2/hammer2_freemap.c (revision fdf62707)
15c23d7f1SMatthew Dillon /*
20dea3156SMatthew Dillon  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
35c23d7f1SMatthew Dillon  *
45c23d7f1SMatthew Dillon  * This code is derived from software contributed to The DragonFly Project
55c23d7f1SMatthew Dillon  * by Matthew Dillon <dillon@dragonflybsd.org>
65c23d7f1SMatthew Dillon  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
75c23d7f1SMatthew Dillon  *
85c23d7f1SMatthew Dillon  * Redistribution and use in source and binary forms, with or without
95c23d7f1SMatthew Dillon  * modification, are permitted provided that the following conditions
105c23d7f1SMatthew Dillon  * are met:
115c23d7f1SMatthew Dillon  *
125c23d7f1SMatthew Dillon  * 1. Redistributions of source code must retain the above copyright
135c23d7f1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer.
145c23d7f1SMatthew Dillon  * 2. Redistributions in binary form must reproduce the above copyright
155c23d7f1SMatthew Dillon  *    notice, this list of conditions and the following disclaimer in
165c23d7f1SMatthew Dillon  *    the documentation and/or other materials provided with the
175c23d7f1SMatthew Dillon  *    distribution.
185c23d7f1SMatthew Dillon  * 3. Neither the name of The DragonFly Project nor the names of its
195c23d7f1SMatthew Dillon  *    contributors may be used to endorse or promote products derived
205c23d7f1SMatthew Dillon  *    from this software without specific, prior written permission.
215c23d7f1SMatthew Dillon  *
225c23d7f1SMatthew Dillon  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
235c23d7f1SMatthew Dillon  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
245c23d7f1SMatthew Dillon  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
255c23d7f1SMatthew Dillon  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
265c23d7f1SMatthew Dillon  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
275c23d7f1SMatthew Dillon  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
285c23d7f1SMatthew Dillon  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
295c23d7f1SMatthew Dillon  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
305c23d7f1SMatthew Dillon  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
315c23d7f1SMatthew Dillon  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
325c23d7f1SMatthew Dillon  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
335c23d7f1SMatthew Dillon  * SUCH DAMAGE.
345c23d7f1SMatthew Dillon  */
355c23d7f1SMatthew Dillon #include <sys/param.h>
365c23d7f1SMatthew Dillon #include <sys/systm.h>
375c23d7f1SMatthew Dillon #include <sys/kernel.h>
385c23d7f1SMatthew Dillon #include <sys/fcntl.h>
395c23d7f1SMatthew Dillon #include <sys/buf.h>
405c23d7f1SMatthew Dillon #include <sys/proc.h>
415c23d7f1SMatthew Dillon #include <sys/namei.h>
425c23d7f1SMatthew Dillon #include <sys/mount.h>
435c23d7f1SMatthew Dillon #include <sys/vnode.h>
445c23d7f1SMatthew Dillon #include <sys/mountctl.h>
455c23d7f1SMatthew Dillon 
465c23d7f1SMatthew Dillon #include "hammer2.h"
475c23d7f1SMatthew Dillon 
4891abd410SMatthew Dillon struct hammer2_fiterate {
4991abd410SMatthew Dillon 	hammer2_off_t	bpref;
5091abd410SMatthew Dillon 	hammer2_off_t	bnext;
5191abd410SMatthew Dillon 	int		loops;
5291abd410SMatthew Dillon };
5391abd410SMatthew Dillon 
5491abd410SMatthew Dillon typedef struct hammer2_fiterate hammer2_fiterate_t;
5591abd410SMatthew Dillon 
561a7cfe5aSMatthew Dillon static int hammer2_freemap_try_alloc(hammer2_trans_t *trans,
571a7cfe5aSMatthew Dillon 			hammer2_chain_t **parentp, hammer2_blockref_t *bref,
5891abd410SMatthew Dillon 			int radix, hammer2_fiterate_t *iter);
59a5913bdfSMatthew Dillon static void hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp,
60a5913bdfSMatthew Dillon 			hammer2_key_t key, hammer2_chain_t *chain);
61a5913bdfSMatthew Dillon static int hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
6291abd410SMatthew Dillon 			hammer2_bmap_data_t *bmap, uint16_t class,
6393f3933aSMatthew Dillon 			int n, int radix, hammer2_key_t *basep);
641a7cfe5aSMatthew Dillon static int hammer2_freemap_iterate(hammer2_trans_t *trans,
651a7cfe5aSMatthew Dillon 			hammer2_chain_t **parentp, hammer2_chain_t **chainp,
6691abd410SMatthew Dillon 			hammer2_fiterate_t *iter);
671a7cfe5aSMatthew Dillon 
68a98aa0b0SMatthew Dillon static __inline
69a98aa0b0SMatthew Dillon int
70a98aa0b0SMatthew Dillon hammer2_freemapradix(int radix)
71a98aa0b0SMatthew Dillon {
72a98aa0b0SMatthew Dillon 	return(radix);
73a98aa0b0SMatthew Dillon }
74a98aa0b0SMatthew Dillon 
751a7cfe5aSMatthew Dillon /*
761a7cfe5aSMatthew Dillon  * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF
771a7cfe5aSMatthew Dillon  * bref.  Return a combined media offset and physical size radix.  Freemap
781a7cfe5aSMatthew Dillon  * chains use fixed storage offsets in the 4MB reserved area at the
791a7cfe5aSMatthew Dillon  * beginning of each 2GB zone
801a7cfe5aSMatthew Dillon  *
811a7cfe5aSMatthew Dillon  * Rotate between four possibilities.  Theoretically this means we have three
821a7cfe5aSMatthew Dillon  * good freemaps in case of a crash which we can use as a base for the fixup
831a7cfe5aSMatthew Dillon  * scan at mount-time.
841a7cfe5aSMatthew Dillon  */
851a7cfe5aSMatthew Dillon #define H2FMBASE(key, radix)	((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
861a7cfe5aSMatthew Dillon #define H2FMSHIFT(radix)	((hammer2_off_t)1 << (radix))
871a7cfe5aSMatthew Dillon 
881a7cfe5aSMatthew Dillon static
891a7cfe5aSMatthew Dillon int
901a7cfe5aSMatthew Dillon hammer2_freemap_reserve(hammer2_mount_t *hmp, hammer2_blockref_t *bref,
911a7cfe5aSMatthew Dillon 			int radix)
921a7cfe5aSMatthew Dillon {
931a7cfe5aSMatthew Dillon 	hammer2_off_t off;
941a7cfe5aSMatthew Dillon 	size_t bytes;
951a7cfe5aSMatthew Dillon 
961a7cfe5aSMatthew Dillon 	/*
971a7cfe5aSMatthew Dillon 	 * Physical allocation size -> radix.  Typically either 256 for
981a7cfe5aSMatthew Dillon 	 * a level 0 freemap leaf or 65536 for a level N freemap node.
991a7cfe5aSMatthew Dillon 	 *
1001a7cfe5aSMatthew Dillon 	 * NOTE: A 256 byte bitmap represents 256 x 8 x 1024 = 2MB of storage.
1011a7cfe5aSMatthew Dillon 	 *	 Do not use hammer2_allocsize() here as it has a min cap.
1021a7cfe5aSMatthew Dillon 	 */
1031a7cfe5aSMatthew Dillon 	bytes = 1 << radix;
1041a7cfe5aSMatthew Dillon 
1051a7cfe5aSMatthew Dillon 	/*
1061a7cfe5aSMatthew Dillon 	 * Adjust by HAMMER2_ZONE_FREEMAP_{A,B,C,D} using the existing
107a98aa0b0SMatthew Dillon 	 * offset as a basis.  Start in zone A if previously unallocated.
1081a7cfe5aSMatthew Dillon 	 */
1091a7cfe5aSMatthew Dillon 	if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) {
1101a7cfe5aSMatthew Dillon 		off = HAMMER2_ZONE_FREEMAP_A;
1111a7cfe5aSMatthew Dillon 	} else {
1121a7cfe5aSMatthew Dillon 		off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX &
1131a7cfe5aSMatthew Dillon 		      (((hammer2_off_t)1 << HAMMER2_FREEMAP_LEVEL1_RADIX) - 1);
1141a7cfe5aSMatthew Dillon 		off = off / HAMMER2_PBUFSIZE;
1151a7cfe5aSMatthew Dillon 		KKASSERT(off >= HAMMER2_ZONE_FREEMAP_A);
11693f3933aSMatthew Dillon 		KKASSERT(off < HAMMER2_ZONE_FREEMAP_D + 4);
1171a7cfe5aSMatthew Dillon 
1181a7cfe5aSMatthew Dillon 		if (off >= HAMMER2_ZONE_FREEMAP_D)
1191a7cfe5aSMatthew Dillon 			off = HAMMER2_ZONE_FREEMAP_A;
1201a7cfe5aSMatthew Dillon 		else if (off >= HAMMER2_ZONE_FREEMAP_C)
1211a7cfe5aSMatthew Dillon 			off = HAMMER2_ZONE_FREEMAP_D;
1221a7cfe5aSMatthew Dillon 		else if (off >= HAMMER2_ZONE_FREEMAP_B)
1231a7cfe5aSMatthew Dillon 			off = HAMMER2_ZONE_FREEMAP_C;
1241a7cfe5aSMatthew Dillon 		else
1251a7cfe5aSMatthew Dillon 			off = HAMMER2_ZONE_FREEMAP_B;
1261a7cfe5aSMatthew Dillon 	}
1271a7cfe5aSMatthew Dillon 	off = off * HAMMER2_PBUFSIZE;
1281a7cfe5aSMatthew Dillon 
1291a7cfe5aSMatthew Dillon 	/*
1301a7cfe5aSMatthew Dillon 	 * Calculate the block offset of the reserved block.  This will
1311a7cfe5aSMatthew Dillon 	 * point into the 4MB reserved area at the base of the appropriate
1321a7cfe5aSMatthew Dillon 	 * 2GB zone, once added to the FREEMAP_x selection above.
1331a7cfe5aSMatthew Dillon 	 */
1341a7cfe5aSMatthew Dillon 	switch(bref->keybits) {
1351a7cfe5aSMatthew Dillon 	/* case HAMMER2_FREEMAP_LEVEL5_RADIX: not applicable */
1361a7cfe5aSMatthew Dillon 	case HAMMER2_FREEMAP_LEVEL4_RADIX:	/* 2EB */
1371a7cfe5aSMatthew Dillon 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
1381a7cfe5aSMatthew Dillon 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
1391a7cfe5aSMatthew Dillon 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) +
1401a7cfe5aSMatthew Dillon 		       HAMMER2_ZONEFM_LEVEL4 * HAMMER2_PBUFSIZE;
1411a7cfe5aSMatthew Dillon 		break;
1421a7cfe5aSMatthew Dillon 	case HAMMER2_FREEMAP_LEVEL3_RADIX:	/* 2PB */
1431a7cfe5aSMatthew Dillon 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
1441a7cfe5aSMatthew Dillon 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
1451a7cfe5aSMatthew Dillon 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) +
1461a7cfe5aSMatthew Dillon 		       HAMMER2_ZONEFM_LEVEL3 * HAMMER2_PBUFSIZE;
1471a7cfe5aSMatthew Dillon 		break;
1481a7cfe5aSMatthew Dillon 	case HAMMER2_FREEMAP_LEVEL2_RADIX:	/* 2TB */
1491a7cfe5aSMatthew Dillon 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
1501a7cfe5aSMatthew Dillon 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
1511a7cfe5aSMatthew Dillon 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) +
1521a7cfe5aSMatthew Dillon 		       HAMMER2_ZONEFM_LEVEL2 * HAMMER2_PBUFSIZE;
1531a7cfe5aSMatthew Dillon 		break;
1541a7cfe5aSMatthew Dillon 	case HAMMER2_FREEMAP_LEVEL1_RADIX:	/* 2GB */
15593f3933aSMatthew Dillon 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF);
1561a7cfe5aSMatthew Dillon 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
1571a7cfe5aSMatthew Dillon 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
1581a7cfe5aSMatthew Dillon 		       HAMMER2_ZONEFM_LEVEL1 * HAMMER2_PBUFSIZE;
1591a7cfe5aSMatthew Dillon 		break;
1601a7cfe5aSMatthew Dillon 	default:
1611a7cfe5aSMatthew Dillon 		panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits);
1621a7cfe5aSMatthew Dillon 		/* NOT REACHED */
1631a7cfe5aSMatthew Dillon 		break;
1641a7cfe5aSMatthew Dillon 	}
1651a7cfe5aSMatthew Dillon 	bref->data_off = off | radix;
1661a7cfe5aSMatthew Dillon 	return (0);
1671a7cfe5aSMatthew Dillon }
1681a7cfe5aSMatthew Dillon 
1695c23d7f1SMatthew Dillon /*
1701a7cfe5aSMatthew Dillon  * Normal freemap allocator
1711a7cfe5aSMatthew Dillon  *
1721a7cfe5aSMatthew Dillon  * Use available hints to allocate space using the freemap.  Create missing
1731a7cfe5aSMatthew Dillon  * freemap infrastructure on-the-fly as needed (including marking initial
1741a7cfe5aSMatthew Dillon  * allocations using the iterator as allocated, instantiating new 2GB zones,
1751a7cfe5aSMatthew Dillon  * and dealing with the end-of-media edge case).
1761a7cfe5aSMatthew Dillon  *
1771a7cfe5aSMatthew Dillon  * ip and bpref are only used as a heuristic to determine locality of
1781a7cfe5aSMatthew Dillon  * reference.  bref->key may also be used heuristically.
179a4dc31e0SMatthew Dillon  *
180a4dc31e0SMatthew Dillon  * WARNING! When called from a flush we have to use the 'live' sync_tid
181a4dc31e0SMatthew Dillon  *	    and not the flush sync_tid.  The live sync_tid is the flush
182a4dc31e0SMatthew Dillon  *	    sync_tid + 1.  That is, freemap allocations which occur during
183a4dc31e0SMatthew Dillon  *	    a flush are not part of the flush.  Crash-recovery will restore
184a4dc31e0SMatthew Dillon  *	    any lost allocations.
1851a7cfe5aSMatthew Dillon  */
1861a7cfe5aSMatthew Dillon int
187a5913bdfSMatthew Dillon hammer2_freemap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
1881a7cfe5aSMatthew Dillon 		      hammer2_blockref_t *bref, size_t bytes)
1891a7cfe5aSMatthew Dillon {
1901a7cfe5aSMatthew Dillon 	hammer2_chain_t *parent;
1911a7cfe5aSMatthew Dillon 	int radix;
1921a7cfe5aSMatthew Dillon 	int error;
19391abd410SMatthew Dillon 	unsigned int hindex;
19491abd410SMatthew Dillon 	hammer2_fiterate_t iter;
1951a7cfe5aSMatthew Dillon 
1961a7cfe5aSMatthew Dillon 	/*
1971a7cfe5aSMatthew Dillon 	 * Validate the allocation size.  It must be a power of 2.
19893f3933aSMatthew Dillon 	 *
19993f3933aSMatthew Dillon 	 * For now require that the caller be aware of the minimum
20093f3933aSMatthew Dillon 	 * allocation (1K).
2011a7cfe5aSMatthew Dillon 	 */
2021a7cfe5aSMatthew Dillon 	radix = hammer2_getradix(bytes);
2031a7cfe5aSMatthew Dillon 	KKASSERT((size_t)1 << radix == bytes);
2041a7cfe5aSMatthew Dillon 
2051a7cfe5aSMatthew Dillon 	/*
20693f3933aSMatthew Dillon 	 * Freemap blocks themselves are simply assigned from the reserve
20793f3933aSMatthew Dillon 	 * area, not allocated from the freemap.
2081a7cfe5aSMatthew Dillon 	 */
2091a7cfe5aSMatthew Dillon 	if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
2101a7cfe5aSMatthew Dillon 	    bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) {
2111a7cfe5aSMatthew Dillon 		return(hammer2_freemap_reserve(hmp, bref, radix));
2121a7cfe5aSMatthew Dillon 	}
2131a7cfe5aSMatthew Dillon 
21491abd410SMatthew Dillon 	if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX)
21591abd410SMatthew Dillon 		hammer2_freemap_free(trans, hmp, bref, 0);
21691abd410SMatthew Dillon 
21793f3933aSMatthew Dillon 	/*
218a4dc31e0SMatthew Dillon 	 * Setting ISALLOCATING ensures correct operation even when the
219a4dc31e0SMatthew Dillon 	 * flusher itself is making allocations.
22093f3933aSMatthew Dillon 	 */
22193f3933aSMatthew Dillon 	KKASSERT(bytes >= HAMMER2_MIN_ALLOC && bytes <= HAMMER2_MAX_ALLOC);
222a4dc31e0SMatthew Dillon 	KKASSERT((trans->flags & HAMMER2_TRANS_ISALLOCATING) == 0);
223a7720be7SMatthew Dillon 	atomic_set_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING);
224a4dc31e0SMatthew Dillon 	if (trans->flags & HAMMER2_TRANS_ISFLUSH)
225a4dc31e0SMatthew Dillon 		++trans->sync_tid;
2261a7cfe5aSMatthew Dillon 
22701eabad4SMatthew Dillon 	/*
228a98aa0b0SMatthew Dillon 	 * Calculate the starting point for our allocation search.
229a98aa0b0SMatthew Dillon 	 *
230a98aa0b0SMatthew Dillon 	 * Each freemap leaf is dedicated to a specific freemap_radix.
231a98aa0b0SMatthew Dillon 	 * The freemap_radix can be more fine-grained than the device buffer
232a98aa0b0SMatthew Dillon 	 * radix which results in inodes being grouped together in their
233a98aa0b0SMatthew Dillon 	 * own segment, terminal-data (16K or less) and initial indirect
234a98aa0b0SMatthew Dillon 	 * block being grouped together, and then full-indirect and full-data
235a98aa0b0SMatthew Dillon 	 * blocks (64K) being grouped together.
236a98aa0b0SMatthew Dillon 	 *
237a98aa0b0SMatthew Dillon 	 * The single most important aspect of this is the inode grouping
238a98aa0b0SMatthew Dillon 	 * because that is what allows 'find' and 'ls' and other filesystem
239a98aa0b0SMatthew Dillon 	 * topology operations to run fast.
2401a7cfe5aSMatthew Dillon 	 */
241a98aa0b0SMatthew Dillon #if 0
2421a7cfe5aSMatthew Dillon 	if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX)
2431a7cfe5aSMatthew Dillon 		bpref = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
2441a7cfe5aSMatthew Dillon 	else if (trans->tmp_bpref)
2451a7cfe5aSMatthew Dillon 		bpref = trans->tmp_bpref;
2461a7cfe5aSMatthew Dillon 	else if (trans->tmp_ip)
2471a7cfe5aSMatthew Dillon 		bpref = trans->tmp_ip->chain->bref.data_off;
2481a7cfe5aSMatthew Dillon 	else
2491a7cfe5aSMatthew Dillon #endif
25091abd410SMatthew Dillon 	/*
25191abd410SMatthew Dillon 	 * Heuristic tracking index.  We would like one for each distinct
25291abd410SMatthew Dillon 	 * bref type if possible.  heur_freemap[] has room for two classes
25391abd410SMatthew Dillon 	 * for each type.  At a minimum we have to break-up our heuristic
25491abd410SMatthew Dillon 	 * by device block sizes.
25591abd410SMatthew Dillon 	 */
25691abd410SMatthew Dillon 	hindex = hammer2_devblkradix(radix) - HAMMER2_MINIORADIX;
25791abd410SMatthew Dillon 	KKASSERT(hindex < HAMMER2_FREEMAP_HEUR_NRADIX);
25891abd410SMatthew Dillon 	hindex += bref->type * HAMMER2_FREEMAP_HEUR_NRADIX;
25991abd410SMatthew Dillon 	hindex &= HAMMER2_FREEMAP_HEUR_TYPES * HAMMER2_FREEMAP_HEUR_NRADIX - 1;
26091abd410SMatthew Dillon 	KKASSERT(hindex < HAMMER2_FREEMAP_HEUR);
26191abd410SMatthew Dillon 
26291abd410SMatthew Dillon 	iter.bpref = hmp->heur_freemap[hindex];
2631a7cfe5aSMatthew Dillon 
2641a7cfe5aSMatthew Dillon 	/*
2651a7cfe5aSMatthew Dillon 	 * Make sure bpref is in-bounds.  It's ok if bpref covers a zone's
2661a7cfe5aSMatthew Dillon 	 * reserved area, the try code will iterate past it.
2671a7cfe5aSMatthew Dillon 	 */
26891abd410SMatthew Dillon 	if (iter.bpref > hmp->voldata.volu_size)
26991abd410SMatthew Dillon 		iter.bpref = hmp->voldata.volu_size - 1;
2701a7cfe5aSMatthew Dillon 
2711a7cfe5aSMatthew Dillon 	/*
2721a7cfe5aSMatthew Dillon 	 * Iterate the freemap looking for free space before and after.
2731a7cfe5aSMatthew Dillon 	 */
2741a7cfe5aSMatthew Dillon 	parent = &hmp->fchain;
2751a7cfe5aSMatthew Dillon 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
2761a7cfe5aSMatthew Dillon 	error = EAGAIN;
27791abd410SMatthew Dillon 	iter.bnext = iter.bpref;
27891abd410SMatthew Dillon 	iter.loops = 0;
2791a7cfe5aSMatthew Dillon 
2801a7cfe5aSMatthew Dillon 	while (error == EAGAIN) {
2811a7cfe5aSMatthew Dillon 		error = hammer2_freemap_try_alloc(trans, &parent, bref,
28291abd410SMatthew Dillon 						  radix, &iter);
2831a7cfe5aSMatthew Dillon 	}
28491abd410SMatthew Dillon 	hmp->heur_freemap[hindex] = iter.bnext;
2851a7cfe5aSMatthew Dillon 	hammer2_chain_unlock(parent);
2861a7cfe5aSMatthew Dillon 
287a7720be7SMatthew Dillon 	atomic_clear_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING);
288a4dc31e0SMatthew Dillon 	if (trans->flags & HAMMER2_TRANS_ISFLUSH)
289a4dc31e0SMatthew Dillon 		--trans->sync_tid;
290a7720be7SMatthew Dillon 
2911a7cfe5aSMatthew Dillon 	return (error);
2921a7cfe5aSMatthew Dillon }
2931a7cfe5aSMatthew Dillon 
2941a7cfe5aSMatthew Dillon static int
2951a7cfe5aSMatthew Dillon hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
2961a7cfe5aSMatthew Dillon 			  hammer2_blockref_t *bref, int radix,
29791abd410SMatthew Dillon 			  hammer2_fiterate_t *iter)
2981a7cfe5aSMatthew Dillon {
299a5913bdfSMatthew Dillon 	hammer2_mount_t *hmp = (*parentp)->hmp;
3001a7cfe5aSMatthew Dillon 	hammer2_off_t l0size;
30193f3933aSMatthew Dillon 	hammer2_off_t l1size;
30293f3933aSMatthew Dillon 	hammer2_off_t l1mask;
3031897c66eSMatthew Dillon 	hammer2_key_t key_dummy;
3041a7cfe5aSMatthew Dillon 	hammer2_chain_t *chain;
3051a7cfe5aSMatthew Dillon 	hammer2_off_t key;
3061a7cfe5aSMatthew Dillon 	size_t bytes;
30791abd410SMatthew Dillon 	uint16_t class;
3081a7cfe5aSMatthew Dillon 	int error = 0;
3091897c66eSMatthew Dillon 	int cache_index = -1;
31093f3933aSMatthew Dillon 
3111a7cfe5aSMatthew Dillon 
3121a7cfe5aSMatthew Dillon 	/*
3131a7cfe5aSMatthew Dillon 	 * Calculate the number of bytes being allocated, the number
3141a7cfe5aSMatthew Dillon 	 * of contiguous bits of bitmap being allocated, and the bitmap
3151a7cfe5aSMatthew Dillon 	 * mask.
31601eabad4SMatthew Dillon 	 *
3171a7cfe5aSMatthew Dillon 	 * WARNING! cpu hardware may mask bits == 64 -> 0 and blow up the
3181a7cfe5aSMatthew Dillon 	 *	    mask calculation.
3191a7cfe5aSMatthew Dillon 	 */
3201a7cfe5aSMatthew Dillon 	bytes = (size_t)1 << radix;
32191abd410SMatthew Dillon 	class = (bref->type << 8) | hammer2_devblkradix(radix);
322a98aa0b0SMatthew Dillon 
3231a7cfe5aSMatthew Dillon 	/*
32493f3933aSMatthew Dillon 	 * Lookup the level1 freemap chain, creating and initializing one
3251a7cfe5aSMatthew Dillon 	 * if necessary.  Intermediate levels will be created automatically
3261a7cfe5aSMatthew Dillon 	 * when necessary by hammer2_chain_create().
3271a7cfe5aSMatthew Dillon 	 */
32891abd410SMatthew Dillon 	key = H2FMBASE(iter->bnext, HAMMER2_FREEMAP_LEVEL1_RADIX);
3291a7cfe5aSMatthew Dillon 	l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
33093f3933aSMatthew Dillon 	l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
33193f3933aSMatthew Dillon 	l1mask = l1size - 1;
3321a7cfe5aSMatthew Dillon 
3331897c66eSMatthew Dillon 	chain = hammer2_chain_lookup(parentp, &key_dummy, key, key + l1mask,
3341897c66eSMatthew Dillon 				     &cache_index,
3351a7cfe5aSMatthew Dillon 				     HAMMER2_LOOKUP_FREEMAP |
3361a7cfe5aSMatthew Dillon 				     HAMMER2_LOOKUP_ALWAYS |
3371a7cfe5aSMatthew Dillon 				     HAMMER2_LOOKUP_MATCHIND/*XXX*/);
3381a7cfe5aSMatthew Dillon 	if (chain == NULL) {
3391a7cfe5aSMatthew Dillon 		/*
3401a7cfe5aSMatthew Dillon 		 * Create the missing leaf, be sure to initialize
3411a7cfe5aSMatthew Dillon 		 * the auxillary freemap tracking information in
3421a7cfe5aSMatthew Dillon 		 * the bref.check.freemap structure.
3431a7cfe5aSMatthew Dillon 		 */
3441a7cfe5aSMatthew Dillon #if 0
34593f3933aSMatthew Dillon 		kprintf("freemap create L1 @ %016jx bpref %016jx\n",
34691abd410SMatthew Dillon 			key, iter->bpref);
3471a7cfe5aSMatthew Dillon #endif
3481a7cfe5aSMatthew Dillon 		error = hammer2_chain_create(trans, parentp, &chain,
34993f3933aSMatthew Dillon 				     key, HAMMER2_FREEMAP_LEVEL1_RADIX,
3501a7cfe5aSMatthew Dillon 				     HAMMER2_BREF_TYPE_FREEMAP_LEAF,
35193f3933aSMatthew Dillon 				     HAMMER2_FREEMAP_LEVELN_PSIZE);
3521a7cfe5aSMatthew Dillon 		if (error == 0) {
3531a7cfe5aSMatthew Dillon 			hammer2_chain_modify(trans, &chain, 0);
35493f3933aSMatthew Dillon 			bzero(&chain->data->bmdata[0],
35593f3933aSMatthew Dillon 			      HAMMER2_FREEMAP_LEVELN_PSIZE);
35693f3933aSMatthew Dillon 			chain->bref.check.freemap.bigmask = (uint32_t)-1;
35793f3933aSMatthew Dillon 			chain->bref.check.freemap.avail = l1size;
35893f3933aSMatthew Dillon 			/* bref.methods should already be inherited */
3591a7cfe5aSMatthew Dillon 
360a5913bdfSMatthew Dillon 			hammer2_freemap_init(trans, hmp, key, chain);
3611a7cfe5aSMatthew Dillon 		}
36293f3933aSMatthew Dillon 	} else if ((chain->bref.check.freemap.bigmask & (1 << radix)) == 0) {
3631a7cfe5aSMatthew Dillon 		/*
3641a7cfe5aSMatthew Dillon 		 * Already flagged as not having enough space
3651a7cfe5aSMatthew Dillon 		 */
3661a7cfe5aSMatthew Dillon 		error = ENOSPC;
3671a7cfe5aSMatthew Dillon 	} else {
3681a7cfe5aSMatthew Dillon 		/*
3691a7cfe5aSMatthew Dillon 		 * Modify existing chain to setup for adjustment.
3701a7cfe5aSMatthew Dillon 		 */
3711a7cfe5aSMatthew Dillon 		hammer2_chain_modify(trans, &chain, 0);
3721a7cfe5aSMatthew Dillon 	}
3731a7cfe5aSMatthew Dillon 
3741a7cfe5aSMatthew Dillon 	/*
37593f3933aSMatthew Dillon 	 * Scan 2MB entries.
3761a7cfe5aSMatthew Dillon 	 */
37793f3933aSMatthew Dillon 	if (error == 0) {
37893f3933aSMatthew Dillon 		hammer2_bmap_data_t *bmap;
37993f3933aSMatthew Dillon 		hammer2_key_t base_key;
38093f3933aSMatthew Dillon 		int count;
38193f3933aSMatthew Dillon 		int start;
38293f3933aSMatthew Dillon 		int n;
3831a7cfe5aSMatthew Dillon 
38491abd410SMatthew Dillon 		start = (int)((iter->bnext - key) >>
38591abd410SMatthew Dillon 			      HAMMER2_FREEMAP_LEVEL0_RADIX);
38693f3933aSMatthew Dillon 		KKASSERT(start >= 0 && start < HAMMER2_FREEMAP_COUNT);
38793f3933aSMatthew Dillon 		hammer2_chain_modify(trans, &chain, 0);
3881a7cfe5aSMatthew Dillon 
3891a7cfe5aSMatthew Dillon 		error = ENOSPC;
39093f3933aSMatthew Dillon 		for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) {
39193f3933aSMatthew Dillon 			if (start + count >= HAMMER2_FREEMAP_COUNT &&
39293f3933aSMatthew Dillon 			    start - count < 0) {
39393f3933aSMatthew Dillon 				break;
39493f3933aSMatthew Dillon 			}
39593f3933aSMatthew Dillon 			n = start + count;
39693f3933aSMatthew Dillon 			bmap = &chain->data->bmdata[n];
39793f3933aSMatthew Dillon 			if (n < HAMMER2_FREEMAP_COUNT && bmap->avail &&
39891abd410SMatthew Dillon 			    (bmap->class == 0 || bmap->class == class)) {
39993f3933aSMatthew Dillon 				base_key = key + n * l0size;
40091abd410SMatthew Dillon 				error = hammer2_bmap_alloc(trans, hmp, bmap,
40191abd410SMatthew Dillon 							   class, n, radix,
40291abd410SMatthew Dillon 							   &base_key);
40393f3933aSMatthew Dillon 				if (error != ENOSPC) {
40493f3933aSMatthew Dillon 					key = base_key;
40593f3933aSMatthew Dillon 					break;
40693f3933aSMatthew Dillon 				}
40793f3933aSMatthew Dillon 			}
40893f3933aSMatthew Dillon 			n = start - count;
40993f3933aSMatthew Dillon 			bmap = &chain->data->bmdata[n];
41093f3933aSMatthew Dillon 			if (n >= 0 && bmap->avail &&
41191abd410SMatthew Dillon 			    (bmap->class == 0 || bmap->class == class)) {
41293f3933aSMatthew Dillon 				base_key = key + n * l0size;
41391abd410SMatthew Dillon 				error = hammer2_bmap_alloc(trans, hmp, bmap,
41491abd410SMatthew Dillon 							   class, n, radix,
41591abd410SMatthew Dillon 							   &base_key);
41693f3933aSMatthew Dillon 				if (error != ENOSPC) {
41793f3933aSMatthew Dillon 					key = base_key;
41893f3933aSMatthew Dillon 					break;
41993f3933aSMatthew Dillon 				}
42093f3933aSMatthew Dillon 			}
42193f3933aSMatthew Dillon 		}
42293f3933aSMatthew Dillon 		if (error == ENOSPC)
42393f3933aSMatthew Dillon 			chain->bref.check.freemap.bigmask &= ~(1 << radix);
42493f3933aSMatthew Dillon 		/* XXX also scan down from original count */
42593f3933aSMatthew Dillon 	}
4261a7cfe5aSMatthew Dillon 
4271a7cfe5aSMatthew Dillon 	if (error == 0) {
4281a7cfe5aSMatthew Dillon 		/*
4291a7cfe5aSMatthew Dillon 		 * Assert validity.  Must be beyond the static allocator used
4301a7cfe5aSMatthew Dillon 		 * by newfs_hammer2 (and thus also beyond the aux area),
4311a7cfe5aSMatthew Dillon 		 * not go past the volume size, and must not be in the
4321a7cfe5aSMatthew Dillon 		 * reserved segment area for a zone.
4331a7cfe5aSMatthew Dillon 		 */
4341a7cfe5aSMatthew Dillon 		KKASSERT(key >= hmp->voldata.allocator_beg &&
4351a7cfe5aSMatthew Dillon 			 key + bytes <= hmp->voldata.volu_size);
4361a7cfe5aSMatthew Dillon 		KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG);
4371a7cfe5aSMatthew Dillon 		bref->data_off = key | radix;
4381a7cfe5aSMatthew Dillon 
4391a7cfe5aSMatthew Dillon #if 0
44093f3933aSMatthew Dillon 		kprintf("alloc cp=%p %016jx %016jx using %016jx\n",
4411a7cfe5aSMatthew Dillon 			chain,
44293f3933aSMatthew Dillon 			bref->key, bref->data_off, chain->bref.data_off);
4431a7cfe5aSMatthew Dillon #endif
4441a7cfe5aSMatthew Dillon 	} else if (error == ENOSPC) {
4451a7cfe5aSMatthew Dillon 		/*
44691abd410SMatthew Dillon 		 * Return EAGAIN with next iteration in iter->bnext, or
4471a7cfe5aSMatthew Dillon 		 * return ENOSPC if the allocation map has been exhausted.
4481a7cfe5aSMatthew Dillon 		 */
44991abd410SMatthew Dillon 		error = hammer2_freemap_iterate(trans, parentp, &chain, iter);
4501a7cfe5aSMatthew Dillon 	}
4511a7cfe5aSMatthew Dillon 
4521a7cfe5aSMatthew Dillon 	/*
4531a7cfe5aSMatthew Dillon 	 * Cleanup
4541a7cfe5aSMatthew Dillon 	 */
4551a7cfe5aSMatthew Dillon 	if (chain)
4561a7cfe5aSMatthew Dillon 		hammer2_chain_unlock(chain);
4571a7cfe5aSMatthew Dillon 	return (error);
4581a7cfe5aSMatthew Dillon }
4591a7cfe5aSMatthew Dillon 
46093f3933aSMatthew Dillon /*
46193f3933aSMatthew Dillon  * Allocate (1<<radix) bytes from the bmap whos base data offset is (*basep).
46293f3933aSMatthew Dillon  *
46393f3933aSMatthew Dillon  * If the linear iterator is mid-block we use it directly (the bitmap should
46493f3933aSMatthew Dillon  * already be marked allocated), otherwise we search for a block in the bitmap
46593f3933aSMatthew Dillon  * that fits the allocation request.
46693f3933aSMatthew Dillon  *
46793f3933aSMatthew Dillon  * A partial bitmap allocation sets the minimum bitmap granularity (16KB)
46893f3933aSMatthew Dillon  * to fully allocated and adjusts the linear allocator to allow the
46993f3933aSMatthew Dillon  * remaining space to be allocated.
47093f3933aSMatthew Dillon  */
47193f3933aSMatthew Dillon static
47293f3933aSMatthew Dillon int
473a5913bdfSMatthew Dillon hammer2_bmap_alloc(hammer2_trans_t *trans, hammer2_mount_t *hmp,
474a5913bdfSMatthew Dillon 		   hammer2_bmap_data_t *bmap,
47591abd410SMatthew Dillon 		   uint16_t class, int n, int radix, hammer2_key_t *basep)
47693f3933aSMatthew Dillon {
477*fdf62707SMatthew Dillon 	hammer2_io_t *dio;
47893f3933aSMatthew Dillon 	size_t size;
47991abd410SMatthew Dillon 	size_t bsize;
48093f3933aSMatthew Dillon 	int bmradix;
48193f3933aSMatthew Dillon 	uint32_t bmmask;
48293f3933aSMatthew Dillon 	int offset;
483*fdf62707SMatthew Dillon 	int error;
48493f3933aSMatthew Dillon 	int i;
48593f3933aSMatthew Dillon 	int j;
48693f3933aSMatthew Dillon 
48793f3933aSMatthew Dillon 	/*
48893f3933aSMatthew Dillon 	 * Take into account 2-bits per block when calculating bmradix.
48993f3933aSMatthew Dillon 	 */
49093f3933aSMatthew Dillon 	size = (size_t)1 << radix;
49191abd410SMatthew Dillon 
49293f3933aSMatthew Dillon 	if (radix <= HAMMER2_FREEMAP_BLOCK_RADIX) {
49393f3933aSMatthew Dillon 		bmradix = 2;
49491abd410SMatthew Dillon 		bsize = HAMMER2_FREEMAP_BLOCK_SIZE;
49593f3933aSMatthew Dillon 		/* (16K) 2 bits per allocation block */
49693f3933aSMatthew Dillon 	} else {
49793f3933aSMatthew Dillon 		bmradix = 2 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX);
49891abd410SMatthew Dillon 		bsize = size;
49993f3933aSMatthew Dillon 		/* (32K-256K) 4, 8, 16, 32 bits per allocation block */
50093f3933aSMatthew Dillon 	}
50193f3933aSMatthew Dillon 
50293f3933aSMatthew Dillon 	/*
50391abd410SMatthew Dillon 	 * Use the linear iterator to pack small allocations, otherwise
50491abd410SMatthew Dillon 	 * fall-back to finding a free 16KB chunk.  The linear iterator
50591abd410SMatthew Dillon 	 * is only valid when *NOT* on a freemap chunking boundary (16KB).
50691abd410SMatthew Dillon 	 * If it is the bitmap must be scanned.  It can become invalid
50791abd410SMatthew Dillon 	 * once we pack to the boundary.  We adjust it after a bitmap
50891abd410SMatthew Dillon 	 * allocation only for sub-16KB allocations (so the perfectly good
50991abd410SMatthew Dillon 	 * previous value can still be used for fragments when 16KB+
51091abd410SMatthew Dillon 	 * allocations are made).
51191abd410SMatthew Dillon 	 *
51291abd410SMatthew Dillon 	 * Beware of hardware artifacts when bmradix == 32 (intermediate
51391abd410SMatthew Dillon 	 * result can wind up being '1' instead of '0' if hardware masks
51491abd410SMatthew Dillon 	 * bit-count & 31).
51593f3933aSMatthew Dillon 	 *
51693f3933aSMatthew Dillon 	 * NOTE: j needs to be even in the j= calculation.  As an artifact
51793f3933aSMatthew Dillon 	 *	 of the /2 division, our bitmask has to clear bit 0.
51891abd410SMatthew Dillon 	 *
51991abd410SMatthew Dillon 	 * NOTE: TODO this can leave little unallocatable fragments lying
52091abd410SMatthew Dillon 	 *	 around.
52193f3933aSMatthew Dillon 	 */
52291abd410SMatthew Dillon 	if (((uint32_t)bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) + size <=
52391abd410SMatthew Dillon 	    HAMMER2_FREEMAP_BLOCK_SIZE &&
5242c6f594dSMatthew Dillon 	    (bmap->linear & HAMMER2_FREEMAP_BLOCK_MASK) &&
52591abd410SMatthew Dillon 	    bmap->linear < HAMMER2_SEGSIZE) {
52693f3933aSMatthew Dillon 		KKASSERT(bmap->linear >= 0 &&
52791abd410SMatthew Dillon 			 bmap->linear + size <= HAMMER2_SEGSIZE &&
52891abd410SMatthew Dillon 			 (bmap->linear & (HAMMER2_MIN_ALLOC - 1)) == 0);
52993f3933aSMatthew Dillon 		offset = bmap->linear;
53093f3933aSMatthew Dillon 		i = offset / (HAMMER2_SEGSIZE / 8);
53193f3933aSMatthew Dillon 		j = (offset / (HAMMER2_FREEMAP_BLOCK_SIZE / 2)) & 30;
53293f3933aSMatthew Dillon 		bmmask = (bmradix == 32) ?
53393f3933aSMatthew Dillon 			 0xFFFFFFFFU : (1 << bmradix) - 1;
53493f3933aSMatthew Dillon 		bmmask <<= j;
53591abd410SMatthew Dillon 		bmap->linear = offset + size;
53693f3933aSMatthew Dillon 	} else {
53793f3933aSMatthew Dillon 		for (i = 0; i < 8; ++i) {
53893f3933aSMatthew Dillon 			bmmask = (bmradix == 32) ?
53993f3933aSMatthew Dillon 				 0xFFFFFFFFU : (1 << bmradix) - 1;
54093f3933aSMatthew Dillon 			for (j = 0; j < 32; j += bmradix) {
54193f3933aSMatthew Dillon 				if ((bmap->bitmap[i] & bmmask) == 0)
54293f3933aSMatthew Dillon 					goto success;
54393f3933aSMatthew Dillon 				bmmask <<= bmradix;
54493f3933aSMatthew Dillon 			}
54593f3933aSMatthew Dillon 		}
54691abd410SMatthew Dillon 		/*fragments might remain*/
54791abd410SMatthew Dillon 		/*KKASSERT(bmap->avail == 0);*/
54893f3933aSMatthew Dillon 		return (ENOSPC);
54993f3933aSMatthew Dillon success:
55093f3933aSMatthew Dillon 		offset = i * (HAMMER2_SEGSIZE / 8) +
55193f3933aSMatthew Dillon 			 (j * (HAMMER2_FREEMAP_BLOCK_SIZE / 2));
55291abd410SMatthew Dillon 		if (size & HAMMER2_FREEMAP_BLOCK_MASK)
55391abd410SMatthew Dillon 			bmap->linear = offset + size;
55493f3933aSMatthew Dillon 	}
55593f3933aSMatthew Dillon 
55693f3933aSMatthew Dillon 	KKASSERT(i >= 0 && i < 8);	/* 8 x 16 -> 128 x 16K -> 2MB */
55793f3933aSMatthew Dillon 
55893f3933aSMatthew Dillon 	/*
55993f3933aSMatthew Dillon 	 * Optimize the buffer cache to avoid unnecessary read-before-write
56093f3933aSMatthew Dillon 	 * operations.
56191abd410SMatthew Dillon 	 *
56291abd410SMatthew Dillon 	 * The device block size could be larger than the allocation size
56391abd410SMatthew Dillon 	 * so the actual bitmap test is somewhat more involved.  We have
56491abd410SMatthew Dillon 	 * to use a compatible buffer size for this operation.
56593f3933aSMatthew Dillon 	 */
5662c6f594dSMatthew Dillon 	if ((bmap->bitmap[i] & bmmask) == 0 &&
5672c6f594dSMatthew Dillon 	    hammer2_devblksize(size) != size) {
56891abd410SMatthew Dillon 		size_t psize = hammer2_devblksize(size);
56991abd410SMatthew Dillon 		hammer2_off_t pmask = (hammer2_off_t)psize - 1;
57091abd410SMatthew Dillon 		int pbmradix = 2 << (hammer2_devblkradix(radix) -
57191abd410SMatthew Dillon 				     HAMMER2_FREEMAP_BLOCK_RADIX);
57291abd410SMatthew Dillon 		uint32_t pbmmask;
573*fdf62707SMatthew Dillon 		int pradix = hammer2_getradix(psize);
57491abd410SMatthew Dillon 
57591abd410SMatthew Dillon 		pbmmask = (pbmradix == 32) ? 0xFFFFFFFFU : (1 << pbmradix) - 1;
57691abd410SMatthew Dillon 		while ((pbmmask & bmmask) == 0)
57791abd410SMatthew Dillon 			pbmmask <<= pbmradix;
57891abd410SMatthew Dillon 
5792c6f594dSMatthew Dillon #if 0
5802c6f594dSMatthew Dillon 		kprintf("%016jx mask %08x %08x %08x (%zd/%zd)\n",
5812c6f594dSMatthew Dillon 			*basep + offset, bmap->bitmap[i],
5822c6f594dSMatthew Dillon 			pbmmask, bmmask, size, psize);
5832c6f594dSMatthew Dillon #endif
5842c6f594dSMatthew Dillon 
58591abd410SMatthew Dillon 		if ((bmap->bitmap[i] & pbmmask) == 0) {
586*fdf62707SMatthew Dillon 			error = hammer2_io_newq(hmp,
587*fdf62707SMatthew Dillon 						(*basep + (offset & ~pmask)) |
588*fdf62707SMatthew Dillon 						 pradix,
589*fdf62707SMatthew Dillon 						psize, &dio);
590*fdf62707SMatthew Dillon 			hammer2_io_bqrelse(&dio);
59193f3933aSMatthew Dillon 		}
59291abd410SMatthew Dillon 	}
59393f3933aSMatthew Dillon 
59493f3933aSMatthew Dillon #if 0
59593f3933aSMatthew Dillon 	/*
59693f3933aSMatthew Dillon 	 * When initializing a new inode segment also attempt to initialize
59793f3933aSMatthew Dillon 	 * an adjacent segment.  Be careful not to index beyond the array
59893f3933aSMatthew Dillon 	 * bounds.
59993f3933aSMatthew Dillon 	 *
60093f3933aSMatthew Dillon 	 * We do this to try to localize inode accesses to improve
60193f3933aSMatthew Dillon 	 * directory scan rates.  XXX doesn't improve scan rates.
60293f3933aSMatthew Dillon 	 */
60393f3933aSMatthew Dillon 	if (size == HAMMER2_INODE_BYTES) {
60493f3933aSMatthew Dillon 		if (n & 1) {
60593f3933aSMatthew Dillon 			if (bmap[-1].radix == 0 && bmap[-1].avail)
60693f3933aSMatthew Dillon 				bmap[-1].radix = radix;
60793f3933aSMatthew Dillon 		} else {
60893f3933aSMatthew Dillon 			if (bmap[1].radix == 0 && bmap[1].avail)
60993f3933aSMatthew Dillon 				bmap[1].radix = radix;
61093f3933aSMatthew Dillon 		}
61193f3933aSMatthew Dillon 	}
61293f3933aSMatthew Dillon #endif
61393f3933aSMatthew Dillon 
61493f3933aSMatthew Dillon 	/*
61593f3933aSMatthew Dillon 	 * Adjust the linear iterator, set the radix if necessary (might as
61693f3933aSMatthew Dillon 	 * well just set it unconditionally), adjust *basep to return the
61793f3933aSMatthew Dillon 	 * allocated data offset.
61893f3933aSMatthew Dillon 	 */
61993f3933aSMatthew Dillon 	bmap->bitmap[i] |= bmmask;
62091abd410SMatthew Dillon 	bmap->class = class;
62193f3933aSMatthew Dillon 	bmap->avail -= size;
62293f3933aSMatthew Dillon 	*basep += offset;
62393f3933aSMatthew Dillon 
6249f604b01SMatthew Dillon 	hammer2_voldata_lock(hmp);
6259f604b01SMatthew Dillon 	hmp->voldata.allocator_free -= size;  /* XXX */
6269f604b01SMatthew Dillon 	hammer2_voldata_unlock(hmp, 1);
6279f604b01SMatthew Dillon 
62893f3933aSMatthew Dillon 	return(0);
62993f3933aSMatthew Dillon }
63093f3933aSMatthew Dillon 
63193f3933aSMatthew Dillon static
63293f3933aSMatthew Dillon void
633a5913bdfSMatthew Dillon hammer2_freemap_init(hammer2_trans_t *trans, hammer2_mount_t *hmp,
634a5913bdfSMatthew Dillon 		     hammer2_key_t key, hammer2_chain_t *chain)
63593f3933aSMatthew Dillon {
63693f3933aSMatthew Dillon 	hammer2_off_t l1size;
63793f3933aSMatthew Dillon 	hammer2_off_t lokey;
63893f3933aSMatthew Dillon 	hammer2_off_t hikey;
63993f3933aSMatthew Dillon 	hammer2_bmap_data_t *bmap;
64093f3933aSMatthew Dillon 	int count;
64193f3933aSMatthew Dillon 
64293f3933aSMatthew Dillon 	l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
64393f3933aSMatthew Dillon 
64493f3933aSMatthew Dillon 	/*
64593f3933aSMatthew Dillon 	 * Calculate the portion of the 2GB map that should be initialized
64693f3933aSMatthew Dillon 	 * as free.  Portions below or after will be initialized as allocated.
64793f3933aSMatthew Dillon 	 * SEGMASK-align the areas so we don't have to worry about sub-scans
64893f3933aSMatthew Dillon 	 * or endianess when using memset.
64993f3933aSMatthew Dillon 	 *
65093f3933aSMatthew Dillon 	 * (1) Ensure that all statically allocated space from newfs_hammer2
65193f3933aSMatthew Dillon 	 *     is marked allocated.
65293f3933aSMatthew Dillon 	 *
65393f3933aSMatthew Dillon 	 * (2) Ensure that the reserved area is marked allocated (typically
65493f3933aSMatthew Dillon 	 *     the first 4MB of the 2GB area being represented).
65593f3933aSMatthew Dillon 	 *
65693f3933aSMatthew Dillon 	 * (3) Ensure that any trailing space at the end-of-volume is marked
65793f3933aSMatthew Dillon 	 *     allocated.
65893f3933aSMatthew Dillon 	 *
65993f3933aSMatthew Dillon 	 * WARNING! It is possible for lokey to be larger than hikey if the
66093f3933aSMatthew Dillon 	 *	    entire 2GB segment is within the static allocation.
66193f3933aSMatthew Dillon 	 */
662a5913bdfSMatthew Dillon 	lokey = (hmp->voldata.allocator_beg + HAMMER2_SEGMASK64) &
66393f3933aSMatthew Dillon 		~HAMMER2_SEGMASK64;
66493f3933aSMatthew Dillon 
66593f3933aSMatthew Dillon 	if (lokey < H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
66693f3933aSMatthew Dillon 		  HAMMER2_ZONE_SEG64) {
66793f3933aSMatthew Dillon 		lokey = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
66893f3933aSMatthew Dillon 			HAMMER2_ZONE_SEG64;
66993f3933aSMatthew Dillon 	}
67093f3933aSMatthew Dillon 
67193f3933aSMatthew Dillon 	hikey = key + H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
672a5913bdfSMatthew Dillon 	if (hikey > hmp->voldata.volu_size) {
673a5913bdfSMatthew Dillon 		hikey = hmp->voldata.volu_size & ~HAMMER2_SEGMASK64;
67493f3933aSMatthew Dillon 	}
67593f3933aSMatthew Dillon 
67693f3933aSMatthew Dillon 	chain->bref.check.freemap.avail =
67793f3933aSMatthew Dillon 		H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
67893f3933aSMatthew Dillon 	bmap = &chain->data->bmdata[0];
67993f3933aSMatthew Dillon 
68093f3933aSMatthew Dillon 	for (count = 0; count < HAMMER2_FREEMAP_COUNT; ++count) {
68193f3933aSMatthew Dillon 		if (key < lokey || key >= hikey) {
68293f3933aSMatthew Dillon 			memset(bmap->bitmap, -1,
68393f3933aSMatthew Dillon 			       sizeof(bmap->bitmap));
68493f3933aSMatthew Dillon 			bmap->avail = 0;
6852c6f594dSMatthew Dillon 			bmap->linear = HAMMER2_SEGSIZE;
68693f3933aSMatthew Dillon 			chain->bref.check.freemap.avail -=
68793f3933aSMatthew Dillon 				H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
68893f3933aSMatthew Dillon 		} else {
68993f3933aSMatthew Dillon 			bmap->avail = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
69093f3933aSMatthew Dillon 		}
69193f3933aSMatthew Dillon 		key += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
69293f3933aSMatthew Dillon 		++bmap;
69393f3933aSMatthew Dillon 	}
69493f3933aSMatthew Dillon }
69593f3933aSMatthew Dillon 
69693f3933aSMatthew Dillon /*
69793f3933aSMatthew Dillon  * The current Level 1 freemap has been exhausted, iterate to the next
69893f3933aSMatthew Dillon  * one, return ENOSPC if no freemaps remain.
69993f3933aSMatthew Dillon  *
70093f3933aSMatthew Dillon  * XXX this should rotate back to the beginning to handle freed-up space
70193f3933aSMatthew Dillon  * XXX or use intermediate entries to locate free space. TODO
70293f3933aSMatthew Dillon  */
7031a7cfe5aSMatthew Dillon static int
7041a7cfe5aSMatthew Dillon hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
70591abd410SMatthew Dillon 			hammer2_chain_t **chainp, hammer2_fiterate_t *iter)
7061a7cfe5aSMatthew Dillon {
707a5913bdfSMatthew Dillon 	hammer2_mount_t *hmp = (*parentp)->hmp;
708a5913bdfSMatthew Dillon 
70991abd410SMatthew Dillon 	iter->bnext &= ~(H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX) - 1);
71091abd410SMatthew Dillon 	iter->bnext += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
71191abd410SMatthew Dillon 	if (iter->bnext >= hmp->voldata.volu_size) {
71291abd410SMatthew Dillon 		iter->bnext = 0;
71391abd410SMatthew Dillon 		if (++iter->loops == 2)
7141a7cfe5aSMatthew Dillon 			return (ENOSPC);
71591abd410SMatthew Dillon 	}
7161a7cfe5aSMatthew Dillon 	return(EAGAIN);
71737aa19dfSMatthew Dillon }
7181a7cfe5aSMatthew Dillon 
71991abd410SMatthew Dillon /*
72091abd410SMatthew Dillon  * Free the specified blockref.  This code is only able to fully free
72191abd410SMatthew Dillon  * blocks when (how) is non-zero, otherwise the block is marked for
72291abd410SMatthew Dillon  * the bulk freeing pass to check.
72391abd410SMatthew Dillon  *
72491abd410SMatthew Dillon  * Normal use is to only mark inodes as possibly being free.  The underlying
72591abd410SMatthew Dillon  * file blocks are not necessarily marked.  The bulk freescan can
72691abd410SMatthew Dillon  * theoretically handle the case.
72791abd410SMatthew Dillon  *
72891abd410SMatthew Dillon  * XXX currently disabled when how == 0 (the normal real-time case).  At
72991abd410SMatthew Dillon  * the moment we depend on the bulk freescan to actually free blocks.  It
73091abd410SMatthew Dillon  * will still call this routine with a non-zero how to stage possible frees
73191abd410SMatthew Dillon  * and to do the actual free.
732a4dc31e0SMatthew Dillon  *
733a4dc31e0SMatthew Dillon  * WARNING! When called from a flush we have to use the 'live' sync_tid
734a4dc31e0SMatthew Dillon  *	    and not the flush sync_tid.  The live sync_tid is the flush
735a4dc31e0SMatthew Dillon  *	    sync_tid + 1.  That is, freemap allocations which occur during
736a4dc31e0SMatthew Dillon  *	    a flush are not part of the flush.  Crash-recovery will restore
737a4dc31e0SMatthew Dillon  *	    any lost allocations.
73891abd410SMatthew Dillon  */
739004f88b4SMatthew Dillon void
74091abd410SMatthew Dillon hammer2_freemap_free(hammer2_trans_t *trans, hammer2_mount_t *hmp,
74191abd410SMatthew Dillon 		     hammer2_blockref_t *bref, int how)
742004f88b4SMatthew Dillon {
74391abd410SMatthew Dillon 	hammer2_off_t data_off = bref->data_off;
74491abd410SMatthew Dillon 	hammer2_chain_t *chain;
74591abd410SMatthew Dillon 	hammer2_chain_t *parent;
74691abd410SMatthew Dillon 	hammer2_bmap_data_t *bmap;
74791abd410SMatthew Dillon 	hammer2_key_t key;
7481897c66eSMatthew Dillon 	hammer2_key_t key_dummy;
74991abd410SMatthew Dillon 	hammer2_off_t l0size;
75091abd410SMatthew Dillon 	hammer2_off_t l1size;
75191abd410SMatthew Dillon 	hammer2_off_t l1mask;
75291abd410SMatthew Dillon 	uint32_t *bitmap;
75391abd410SMatthew Dillon 	const uint32_t bmmask00 = 0;
75491abd410SMatthew Dillon 	uint32_t bmmask01;
75591abd410SMatthew Dillon 	uint32_t bmmask10;
75691abd410SMatthew Dillon 	uint32_t bmmask11;
75791abd410SMatthew Dillon 	size_t bytes;
75891abd410SMatthew Dillon 	uint16_t class;
759004f88b4SMatthew Dillon 	int radix;
76091abd410SMatthew Dillon 	int start;
76191abd410SMatthew Dillon 	int count;
76291abd410SMatthew Dillon 	int modified = 0;
7631897c66eSMatthew Dillon 	int cache_index = -1;
764004f88b4SMatthew Dillon 
765004f88b4SMatthew Dillon 	radix = (int)data_off & HAMMER2_OFF_MASK_RADIX;
766004f88b4SMatthew Dillon 	data_off &= ~HAMMER2_OFF_MASK_RADIX;
76791abd410SMatthew Dillon 	KKASSERT(radix <= HAMMER2_MAX_RADIX);
768004f88b4SMatthew Dillon 
76991abd410SMatthew Dillon 	bytes = (size_t)1 << radix;
77091abd410SMatthew Dillon 	class = (bref->type << 8) | hammer2_devblkradix(radix);
771004f88b4SMatthew Dillon 
7725c23d7f1SMatthew Dillon 	/*
773355d67fcSMatthew Dillon 	 * We can't free data allocated by newfs_hammer2.
774355d67fcSMatthew Dillon 	 * Assert validity.
775355d67fcSMatthew Dillon 	 */
776a4dc31e0SMatthew Dillon 	KKASSERT((data_off & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG);
777355d67fcSMatthew Dillon 	if (data_off < hmp->voldata.allocator_beg)
778355d67fcSMatthew Dillon 		return;
779355d67fcSMatthew Dillon 
780a4dc31e0SMatthew Dillon 	KKASSERT((trans->flags & HAMMER2_TRANS_ISALLOCATING) == 0);
781a7720be7SMatthew Dillon 	atomic_set_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING);
782a4dc31e0SMatthew Dillon 	if (trans->flags & HAMMER2_TRANS_ISFLUSH)
783a4dc31e0SMatthew Dillon 		++trans->sync_tid;
784a7720be7SMatthew Dillon 
785355d67fcSMatthew Dillon 	/*
78691abd410SMatthew Dillon 	 * Lookup the level1 freemap chain.  The chain must exist.
7875c23d7f1SMatthew Dillon 	 */
78891abd410SMatthew Dillon 	key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL1_RADIX);
78991abd410SMatthew Dillon 	l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
79091abd410SMatthew Dillon 	l1size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL1_RADIX);
79191abd410SMatthew Dillon 	l1mask = l1size - 1;
79291abd410SMatthew Dillon 
79391abd410SMatthew Dillon 	parent = &hmp->fchain;
79491abd410SMatthew Dillon 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
79591abd410SMatthew Dillon 
7961897c66eSMatthew Dillon 	chain = hammer2_chain_lookup(&parent, &key_dummy, key, key + l1mask,
7971897c66eSMatthew Dillon 				     &cache_index,
79891abd410SMatthew Dillon 				     HAMMER2_LOOKUP_FREEMAP |
79991abd410SMatthew Dillon 				     HAMMER2_LOOKUP_ALWAYS |
80091abd410SMatthew Dillon 				     HAMMER2_LOOKUP_MATCHIND/*XXX*/);
80191abd410SMatthew Dillon 	if (chain == NULL) {
80291abd410SMatthew Dillon 		kprintf("hammer2_freemap_free: %016jx: no chain\n",
80391abd410SMatthew Dillon 			(intmax_t)bref->data_off);
80491abd410SMatthew Dillon 		hammer2_chain_unlock(parent);
80591abd410SMatthew Dillon 		return;
80691abd410SMatthew Dillon 	}
80791abd410SMatthew Dillon 	KKASSERT(chain->bref.type == HAMMER2_BREF_TYPE_FREEMAP_LEAF);
80891abd410SMatthew Dillon 
80991abd410SMatthew Dillon 	/*
81091abd410SMatthew Dillon 	 * Find the bmap entry (covering a 2MB swath)
81191abd410SMatthew Dillon 	 * Find the bitmap array index
81291abd410SMatthew Dillon 	 * Find the bitmap bit index (runs in 2-bit pairs)
81391abd410SMatthew Dillon 	 */
81491abd410SMatthew Dillon 	bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) &
81591abd410SMatthew Dillon 				    (HAMMER2_FREEMAP_COUNT - 1)];
81691abd410SMatthew Dillon 	bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7];
81791abd410SMatthew Dillon 
81891abd410SMatthew Dillon 	start = ((int)(data_off >> HAMMER2_FREEMAP_BLOCK_RADIX) & 15) * 2;
81991abd410SMatthew Dillon 	bmmask01 = 1 << start;
82091abd410SMatthew Dillon 	bmmask10 = 2 << start;
82191abd410SMatthew Dillon 	bmmask11 = 3 << start;
82291abd410SMatthew Dillon 
82391abd410SMatthew Dillon 	/*
82491abd410SMatthew Dillon 	 * Fixup the bitmap
82591abd410SMatthew Dillon 	 */
82691abd410SMatthew Dillon 	if (radix < HAMMER2_FREEMAP_BLOCK_RADIX) {
82791abd410SMatthew Dillon 		count = 1;
82891abd410SMatthew Dillon 		how = 0;	/* partial block, cannot set to 00 */
82991abd410SMatthew Dillon 	} else {
83091abd410SMatthew Dillon 		count = 1 << (radix - HAMMER2_FREEMAP_BLOCK_RADIX);
8315c23d7f1SMatthew Dillon 	}
8325c23d7f1SMatthew Dillon 
83391abd410SMatthew Dillon 	while (count) {
83491abd410SMatthew Dillon 		KKASSERT(bmmask11);
83591abd410SMatthew Dillon 		KKASSERT((*bitmap & bmmask11) != bmmask00);
83691abd410SMatthew Dillon 		if ((*bitmap & bmmask11) == bmmask11) {
83791abd410SMatthew Dillon 			if (!modified) {
83891abd410SMatthew Dillon 				hammer2_chain_modify(trans, &chain, 0);
83991abd410SMatthew Dillon 				modified = 1;
84091abd410SMatthew Dillon 	bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) &
84191abd410SMatthew Dillon 				    (HAMMER2_FREEMAP_COUNT - 1)];
84291abd410SMatthew Dillon 	bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7];
84391abd410SMatthew Dillon 			}
84491abd410SMatthew Dillon 			if (how)
84591abd410SMatthew Dillon 				*bitmap &= ~bmmask11;
84691abd410SMatthew Dillon 			else
84791abd410SMatthew Dillon 				*bitmap = (*bitmap & ~bmmask11) | bmmask10;
84891abd410SMatthew Dillon 		} else if ((*bitmap & bmmask11) == bmmask10) {
84991abd410SMatthew Dillon 			if (how) {
85091abd410SMatthew Dillon 				if (!modified) {
85191abd410SMatthew Dillon 					hammer2_chain_modify(trans, &chain, 0);
85291abd410SMatthew Dillon 					modified = 1;
85391abd410SMatthew Dillon 	bmap = &chain->data->bmdata[(int)(data_off >> HAMMER2_SEGRADIX) &
85491abd410SMatthew Dillon 				    (HAMMER2_FREEMAP_COUNT - 1)];
85591abd410SMatthew Dillon 	bitmap = &bmap->bitmap[(int)(data_off >> (HAMMER2_SEGRADIX - 3)) & 7];
85691abd410SMatthew Dillon 				}
85791abd410SMatthew Dillon 				*bitmap &= ~bmmask11;
85891abd410SMatthew Dillon 			}
85991abd410SMatthew Dillon 		} else if ((*bitmap & bmmask11) == bmmask01) {
86091abd410SMatthew Dillon 			KKASSERT(0);
86191abd410SMatthew Dillon 		}
86291abd410SMatthew Dillon 		--count;
86391abd410SMatthew Dillon 		bmmask01 <<= 2;
86491abd410SMatthew Dillon 		bmmask10 <<= 2;
86591abd410SMatthew Dillon 		bmmask11 <<= 2;
86691abd410SMatthew Dillon 	}
86791abd410SMatthew Dillon 	if (how && modified) {
86891abd410SMatthew Dillon 		bmap->avail += 1 << radix;
86991abd410SMatthew Dillon 		KKASSERT(bmap->avail <= HAMMER2_SEGSIZE);
87091abd410SMatthew Dillon 		if (bmap->avail == HAMMER2_SEGSIZE &&
87191abd410SMatthew Dillon 		    bmap->bitmap[0] == 0 &&
87291abd410SMatthew Dillon 		    bmap->bitmap[1] == 0 &&
87391abd410SMatthew Dillon 		    bmap->bitmap[2] == 0 &&
87491abd410SMatthew Dillon 		    bmap->bitmap[3] == 0 &&
87591abd410SMatthew Dillon 		    bmap->bitmap[4] == 0 &&
87691abd410SMatthew Dillon 		    bmap->bitmap[5] == 0 &&
87791abd410SMatthew Dillon 		    bmap->bitmap[6] == 0 &&
87891abd410SMatthew Dillon 		    bmap->bitmap[7] == 0) {
87991abd410SMatthew Dillon 			key = H2FMBASE(data_off, HAMMER2_FREEMAP_LEVEL0_RADIX);
88091abd410SMatthew Dillon 			kprintf("Freeseg %016jx\n", (intmax_t)key);
88191abd410SMatthew Dillon 			bmap->class = 0;
88291abd410SMatthew Dillon 		}
88391abd410SMatthew Dillon 	}
88491abd410SMatthew Dillon 
88591abd410SMatthew Dillon 	/*
88691abd410SMatthew Dillon 	 * chain->bref.check.freemap.bigmask (XXX)
88791abd410SMatthew Dillon 	 */
88891abd410SMatthew Dillon 	if (modified)
88991abd410SMatthew Dillon 		chain->bref.check.freemap.bigmask |= 1 << radix;
89091abd410SMatthew Dillon 
89191abd410SMatthew Dillon 	hammer2_chain_unlock(chain);
89291abd410SMatthew Dillon 	hammer2_chain_unlock(parent);
893a7720be7SMatthew Dillon 
894a7720be7SMatthew Dillon 	atomic_clear_int(&trans->flags, HAMMER2_TRANS_ISALLOCATING);
895a4dc31e0SMatthew Dillon 	if (trans->flags & HAMMER2_TRANS_ISFLUSH)
896a4dc31e0SMatthew Dillon 		--trans->sync_tid;
89791abd410SMatthew Dillon }
898