1*b076d4fbSjsg /* $OpenBSD: subr_blist.c,v 1.4 2023/05/30 08:30:01 jsg Exp $ */
2d280b40aSsemarie /* DragonFlyBSD:7b80531f545c7d3c51c1660130c71d01f6bccbe0:/sys/kern/subr_blist.c */
302de433dSsemarie /*
402de433dSsemarie * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
502de433dSsemarie *
602de433dSsemarie * Copyright (c) 1998,2004 The DragonFly Project. All rights reserved.
702de433dSsemarie *
802de433dSsemarie * This code is derived from software contributed to The DragonFly Project
902de433dSsemarie * by Matthew Dillon <dillon@backplane.com>
1002de433dSsemarie *
1102de433dSsemarie * Redistribution and use in source and binary forms, with or without
1202de433dSsemarie * modification, are permitted provided that the following conditions
1302de433dSsemarie * are met:
1402de433dSsemarie *
1502de433dSsemarie * 1. Redistributions of source code must retain the above copyright
1602de433dSsemarie * notice, this list of conditions and the following disclaimer.
1702de433dSsemarie * 2. Redistributions in binary form must reproduce the above copyright
1802de433dSsemarie * notice, this list of conditions and the following disclaimer in
1902de433dSsemarie * the documentation and/or other materials provided with the
2002de433dSsemarie * distribution.
2102de433dSsemarie * 3. Neither the name of The DragonFly Project nor the names of its
2202de433dSsemarie * contributors may be used to endorse or promote products derived
2302de433dSsemarie * from this software without specific, prior written permission.
2402de433dSsemarie *
2502de433dSsemarie * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
2602de433dSsemarie * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
2702de433dSsemarie * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
2802de433dSsemarie * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
2902de433dSsemarie * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
3002de433dSsemarie * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
3102de433dSsemarie * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
3202de433dSsemarie * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
3302de433dSsemarie * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
3402de433dSsemarie * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
3502de433dSsemarie * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
3602de433dSsemarie * SUCH DAMAGE.
3702de433dSsemarie *
3802de433dSsemarie *
3902de433dSsemarie * This module implements a general bitmap allocator/deallocator. The
4002de433dSsemarie * allocator eats around 2 bits per 'block'. The module does not
4102de433dSsemarie * try to interpret the meaning of a 'block' other than to return
4202de433dSsemarie * SWAPBLK_NONE on an allocation failure.
4302de433dSsemarie *
4402de433dSsemarie * A radix tree is used to maintain the bitmap. Two radix constants are
4502de433dSsemarie * involved: One for the bitmaps contained in the leaf nodes (typically
4602de433dSsemarie * 32), and one for the meta nodes (typically 16). Both meta and leaf
4702de433dSsemarie * nodes have a hint field. This field gives us a hint as to the largest
4802de433dSsemarie * free contiguous range of blocks under the node. It may contain a
4902de433dSsemarie * value that is too high, but will never contain a value that is too
5002de433dSsemarie * low. When the radix tree is searched, allocation failures in subtrees
5102de433dSsemarie * update the hint.
5202de433dSsemarie *
5302de433dSsemarie * The radix tree also implements two collapsed states for meta nodes:
5402de433dSsemarie * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
5502de433dSsemarie * in either of these two states, all information contained underneath
5602de433dSsemarie * the node is considered stale. These states are used to optimize
5702de433dSsemarie * allocation and freeing operations.
5802de433dSsemarie *
5902de433dSsemarie * The hinting greatly increases code efficiency for allocations while
6002de433dSsemarie * the general radix structure optimizes both allocations and frees. The
6102de433dSsemarie * radix tree should be able to operate well no matter how much
6202de433dSsemarie * fragmentation there is and no matter how large a bitmap is used.
6302de433dSsemarie *
6402de433dSsemarie * Unlike the rlist code, the blist code wires all necessary memory at
6502de433dSsemarie * creation time. Neither allocations nor frees require interaction with
6602de433dSsemarie * the memory subsystem. In contrast, the rlist code may allocate memory
6702de433dSsemarie * on an blist_free() call. The non-blocking features of the blist code
6802de433dSsemarie * are used to great advantage in the swap code (uvm/uvm_swap.c). The
6902de433dSsemarie * rlist code uses a little less overall memory than the blist code (but
7002de433dSsemarie * due to swap interleaving not all that much less), but the blist code
7102de433dSsemarie * scales much, much better.
7202de433dSsemarie *
73*b076d4fbSjsg * LAYOUT: The radix tree is laid out recursively using a
74*b076d4fbSjsg * linear array. Each meta node is immediately followed (laid out
7502de433dSsemarie * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
7602de433dSsemarie * is a recursive structure but one that can be easily scanned through
7702de433dSsemarie * a very simple 'skip' calculation. In order to support large radixes,
7802de433dSsemarie * portions of the tree may reside outside our memory allocation. We
7902de433dSsemarie * handle this with an early-termination optimization (when bighint is
8002de433dSsemarie * set to -1) on the scan. The memory allocation is only large enough
8102de433dSsemarie * to cover the number of blocks requested at creation time even if it
8202de433dSsemarie * must be encompassed in larger root-node radix.
8302de433dSsemarie *
8402de433dSsemarie * NOTE: The allocator cannot currently allocate more than
8502de433dSsemarie * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
8602de433dSsemarie * large' if you try. This is an area that could use improvement. The
8702de433dSsemarie * radix is large enough that this restriction does not effect the swap
8802de433dSsemarie * system, though. Currently only the allocation code is effected by
8902de433dSsemarie * this algorithmic unfeature. The freeing code can handle arbitrary
9002de433dSsemarie * ranges.
9102de433dSsemarie *
9202de433dSsemarie * NOTE: The radix may exceed BLIST_BMAP_RADIX bits in order to support
93*b076d4fbSjsg * up to 2^(BLIST_BMAP_RADIX-1) blocks. The first division will
9402de433dSsemarie * drop the radix down and fit it within a signed BLIST_BMAP_RADIX
9502de433dSsemarie * bit integer.
9602de433dSsemarie *
9702de433dSsemarie * This code can be compiled stand-alone for debugging.
9802de433dSsemarie */
9902de433dSsemarie
10002de433dSsemarie #ifdef _KERNEL
10102de433dSsemarie
10202de433dSsemarie #include <sys/param.h>
10302de433dSsemarie #include <sys/systm.h>
10402de433dSsemarie #include <sys/blist.h>
10502de433dSsemarie #include <sys/malloc.h>
10602de433dSsemarie
10702de433dSsemarie #else
10802de433dSsemarie
10902de433dSsemarie #ifndef BLIST_NO_DEBUG
11002de433dSsemarie #define BLIST_DEBUG
11102de433dSsemarie #endif
11202de433dSsemarie
11302de433dSsemarie #include <sys/types.h>
11402de433dSsemarie #include <assert.h>
11502de433dSsemarie #include <err.h>
11602de433dSsemarie #include <stdio.h>
11702de433dSsemarie #include <string.h>
11802de433dSsemarie #include <stdlib.h>
11902de433dSsemarie #include <stdarg.h>
12002de433dSsemarie #include <limits.h>
12102de433dSsemarie
12202de433dSsemarie #define malloc(s,t,f) calloc(1, s)
12302de433dSsemarie #define mallocarray(n,s,t,f) reallocarray(NULL, n, s)
12402de433dSsemarie #define free(p,t,s) free(p)
12502de433dSsemarie #define KASSERT(exp) assert(exp)
12631c3ffdaSsemarie #define KDASSERT(exp) assert(exp)
12702de433dSsemarie
12802de433dSsemarie #include "../sys/blist.h"
12902de433dSsemarie
13002de433dSsemarie #define panic(...) do { errx(1, __VA_ARGS__); } while (0)
13102de433dSsemarie
13202de433dSsemarie #endif
13302de433dSsemarie
13402de433dSsemarie /*
13502de433dSsemarie * static support functions
13602de433dSsemarie */
13702de433dSsemarie
138d280b40aSsemarie static swblk_t blst_leaf_alloc(blmeta_t *scan, swblk_t blkat,
139d280b40aSsemarie swblk_t blk, swblk_t count);
140d280b40aSsemarie static swblk_t blst_meta_alloc(blmeta_t *scan, swblk_t blkat,
141d280b40aSsemarie swblk_t blk, swblk_t count,
142d280b40aSsemarie swblk_t radix, swblk_t skip);
143d280b40aSsemarie static void blst_leaf_free(blmeta_t *scan, swblk_t relblk, swblk_t count);
144d280b40aSsemarie static void blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count,
145d280b40aSsemarie swblk_t radix, swblk_t skip,
146d280b40aSsemarie swblk_t blk);
147d280b40aSsemarie static swblk_t blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count);
148d280b40aSsemarie static swblk_t blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count,
149d280b40aSsemarie swblk_t radix, swblk_t skip,
150d280b40aSsemarie swblk_t blk);
151d280b40aSsemarie static void blst_copy(blmeta_t *scan, swblk_t blk, swblk_t radix,
152d280b40aSsemarie swblk_t skip, blist_t dest, swblk_t count);
153d280b40aSsemarie static swblk_t blst_radix_init(blmeta_t *scan, swblk_t radix,
154d280b40aSsemarie swblk_t skip, swblk_t count);
155d280b40aSsemarie static int blst_radix_gapfind(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip,
156d280b40aSsemarie int state, swblk_t *maxbp, swblk_t *maxep, swblk_t *bp, swblk_t *ep);
15702de433dSsemarie
15802de433dSsemarie #if defined(BLIST_DEBUG) || defined(DDB)
159d280b40aSsemarie static void blst_radix_print(blmeta_t *scan, swblk_t blk,
160d280b40aSsemarie swblk_t radix, swblk_t skip, int tab);
16102de433dSsemarie #endif
16202de433dSsemarie
16302de433dSsemarie /*
16402de433dSsemarie * blist_create() - create a blist capable of handling up to the specified
16502de433dSsemarie * number of blocks
16602de433dSsemarie *
16702de433dSsemarie * blocks must be greater than 0
16802de433dSsemarie *
16902de433dSsemarie * The smallest blist consists of a single leaf node capable of
17002de433dSsemarie * managing BLIST_BMAP_RADIX blocks.
17102de433dSsemarie *
17202de433dSsemarie * The pages are addressable in range [0, nblocks[
17302de433dSsemarie */
17402de433dSsemarie
17502de433dSsemarie blist_t
blist_create(swblk_t blocks)176d280b40aSsemarie blist_create(swblk_t blocks)
17702de433dSsemarie {
17802de433dSsemarie blist_t bl;
179d280b40aSsemarie swblk_t radix;
180d280b40aSsemarie swblk_t skip = 0;
18102de433dSsemarie
18202de433dSsemarie KASSERT(blocks > 0);
18302de433dSsemarie
18402de433dSsemarie /*
18502de433dSsemarie * Calculate radix and skip field used for scanning.
18602de433dSsemarie *
187d280b40aSsemarie * Radix can exceed BLIST_BMAP_RADIX bits even if swblk_t is limited
18802de433dSsemarie * to BLIST_BMAP_RADIX bits.
18902de433dSsemarie *
19002de433dSsemarie * XXX check overflow
19102de433dSsemarie */
19202de433dSsemarie radix = BLIST_BMAP_RADIX;
19302de433dSsemarie
19402de433dSsemarie while (radix < blocks) {
19502de433dSsemarie radix *= BLIST_META_RADIX;
19602de433dSsemarie skip = (skip + 1) * BLIST_META_RADIX;
19702de433dSsemarie KASSERT(skip > 0);
19802de433dSsemarie }
19902de433dSsemarie
20002de433dSsemarie bl = malloc(sizeof(struct blist), M_VMSWAP, M_WAITOK | M_ZERO);
20102de433dSsemarie
20202de433dSsemarie bl->bl_blocks = blocks;
20302de433dSsemarie bl->bl_radix = radix;
20402de433dSsemarie bl->bl_skip = skip;
20502de433dSsemarie bl->bl_rootblks = 1 +
20602de433dSsemarie blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
20702de433dSsemarie bl->bl_root = mallocarray(bl->bl_rootblks, sizeof(blmeta_t),
20802de433dSsemarie M_VMSWAP, M_WAITOK);
20902de433dSsemarie
21002de433dSsemarie #if defined(BLIST_DEBUG)
21102de433dSsemarie printf(
21202de433dSsemarie "BLIST representing %lu blocks (%lu MB of swap)"
21302de433dSsemarie ", requiring %6.2fM of ram\n",
21402de433dSsemarie bl->bl_blocks,
21502de433dSsemarie bl->bl_blocks * 4 / 1024,
21602de433dSsemarie (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / (1024.0 * 1024.0)
21702de433dSsemarie );
21802de433dSsemarie printf("BLIST raw radix tree: %lu records, top-radix %lu\n",
21902de433dSsemarie bl->bl_rootblks, bl->bl_radix);
22002de433dSsemarie #endif
22102de433dSsemarie blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
22202de433dSsemarie
22302de433dSsemarie return(bl);
22402de433dSsemarie }
22502de433dSsemarie
22602de433dSsemarie void
blist_destroy(blist_t bl)22702de433dSsemarie blist_destroy(blist_t bl)
22802de433dSsemarie {
22902de433dSsemarie KASSERT(bl != NULL);
23002de433dSsemarie
23102de433dSsemarie free(bl->bl_root, M_VMSWAP, sizeof(blmeta_t) * bl->bl_rootblks);
23202de433dSsemarie free(bl, M_VMSWAP, sizeof(struct blist));
23302de433dSsemarie }
23402de433dSsemarie
23502de433dSsemarie /*
23602de433dSsemarie * blist_alloc() - reserve space in the block bitmap. Return the base
23702de433dSsemarie * of a contiguous region or SWAPBLK_NONE if space could
23802de433dSsemarie * not be allocated.
23902de433dSsemarie */
24002de433dSsemarie
241d280b40aSsemarie swblk_t
blist_alloc(blist_t bl,swblk_t count)242d280b40aSsemarie blist_alloc(blist_t bl, swblk_t count)
24302de433dSsemarie {
244d280b40aSsemarie swblk_t blk = SWAPBLK_NONE;
24502de433dSsemarie
24602de433dSsemarie if (bl) {
24702de433dSsemarie if (bl->bl_radix == BLIST_BMAP_RADIX)
24802de433dSsemarie blk = blst_leaf_alloc(bl->bl_root, 0, 0, count);
24902de433dSsemarie else
25002de433dSsemarie blk = blst_meta_alloc(bl->bl_root, 0, 0, count,
25102de433dSsemarie bl->bl_radix, bl->bl_skip);
25231c3ffdaSsemarie if (blk != SWAPBLK_NONE) {
25302de433dSsemarie bl->bl_free -= count;
25431c3ffdaSsemarie
25531c3ffdaSsemarie KDASSERT(blk < bl->bl_blocks);
25631c3ffdaSsemarie KDASSERT(bl->bl_free <= bl->bl_blocks);
25731c3ffdaSsemarie }
25802de433dSsemarie }
25902de433dSsemarie return(blk);
26002de433dSsemarie }
26102de433dSsemarie
262d280b40aSsemarie swblk_t
blist_allocat(blist_t bl,swblk_t count,swblk_t blkat)263d280b40aSsemarie blist_allocat(blist_t bl, swblk_t count, swblk_t blkat)
26402de433dSsemarie {
265d280b40aSsemarie swblk_t blk = SWAPBLK_NONE;
26602de433dSsemarie
26702de433dSsemarie if (bl) {
26831c3ffdaSsemarie KDASSERT(blkat < bl->bl_blocks);
26931c3ffdaSsemarie KDASSERT(blkat + count <= bl->bl_blocks);
27002de433dSsemarie
27102de433dSsemarie if (bl->bl_radix == BLIST_BMAP_RADIX)
27202de433dSsemarie blk = blst_leaf_alloc(bl->bl_root, blkat, 0, count);
27302de433dSsemarie else
27402de433dSsemarie blk = blst_meta_alloc(bl->bl_root, blkat, 0, count,
27502de433dSsemarie bl->bl_radix, bl->bl_skip);
27631c3ffdaSsemarie if (blk != SWAPBLK_NONE) {
27702de433dSsemarie bl->bl_free -= count;
27831c3ffdaSsemarie
27931c3ffdaSsemarie KDASSERT(blk < bl->bl_blocks);
28031c3ffdaSsemarie KDASSERT(bl->bl_free <= bl->bl_blocks);
28131c3ffdaSsemarie }
28202de433dSsemarie }
28302de433dSsemarie return(blk);
28402de433dSsemarie }
28502de433dSsemarie
28602de433dSsemarie /*
28702de433dSsemarie * blist_free() - free up space in the block bitmap. Return the base
288*b076d4fbSjsg * of a contiguous region. Panic if an inconsistency is
28902de433dSsemarie * found.
29002de433dSsemarie */
29102de433dSsemarie
29202de433dSsemarie void
blist_free(blist_t bl,swblk_t blkno,swblk_t count)293d280b40aSsemarie blist_free(blist_t bl, swblk_t blkno, swblk_t count)
29402de433dSsemarie {
29502de433dSsemarie if (bl) {
29631c3ffdaSsemarie KDASSERT(blkno < bl->bl_blocks);
29731c3ffdaSsemarie KDASSERT(blkno + count <= bl->bl_blocks);
29802de433dSsemarie
29902de433dSsemarie if (bl->bl_radix == BLIST_BMAP_RADIX)
30002de433dSsemarie blst_leaf_free(bl->bl_root, blkno, count);
30102de433dSsemarie else
30202de433dSsemarie blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
30302de433dSsemarie bl->bl_free += count;
30431c3ffdaSsemarie
30531c3ffdaSsemarie KDASSERT(bl->bl_free <= bl->bl_blocks);
30602de433dSsemarie }
30702de433dSsemarie }
30802de433dSsemarie
30902de433dSsemarie /*
31002de433dSsemarie * blist_fill() - mark a region in the block bitmap as off-limits
31102de433dSsemarie * to the allocator (i.e. allocate it), ignoring any
31202de433dSsemarie * existing allocations. Return the number of blocks
31302de433dSsemarie * actually filled that were free before the call.
31402de433dSsemarie */
31502de433dSsemarie
316d280b40aSsemarie swblk_t
blist_fill(blist_t bl,swblk_t blkno,swblk_t count)317d280b40aSsemarie blist_fill(blist_t bl, swblk_t blkno, swblk_t count)
31802de433dSsemarie {
319d280b40aSsemarie swblk_t filled;
32002de433dSsemarie
32102de433dSsemarie if (bl) {
32231c3ffdaSsemarie KDASSERT(blkno < bl->bl_blocks);
32331c3ffdaSsemarie KDASSERT(blkno + count <= bl->bl_blocks);
32402de433dSsemarie
32502de433dSsemarie if (bl->bl_radix == BLIST_BMAP_RADIX) {
32602de433dSsemarie filled = blst_leaf_fill(bl->bl_root, blkno, count);
32702de433dSsemarie } else {
32802de433dSsemarie filled = blst_meta_fill(bl->bl_root, blkno, count,
32902de433dSsemarie bl->bl_radix, bl->bl_skip, 0);
33002de433dSsemarie }
33102de433dSsemarie bl->bl_free -= filled;
33231c3ffdaSsemarie KDASSERT(bl->bl_free <= bl->bl_blocks);
33302de433dSsemarie return (filled);
33402de433dSsemarie } else {
33502de433dSsemarie return 0;
33602de433dSsemarie }
33702de433dSsemarie }
33802de433dSsemarie
33902de433dSsemarie /*
34002de433dSsemarie * blist_resize() - resize an existing radix tree to handle the
34102de433dSsemarie * specified number of blocks. This will reallocate
34202de433dSsemarie * the tree and transfer the previous bitmap to the new
34302de433dSsemarie * one. When extending the tree you can specify whether
34402de433dSsemarie * the new blocks are to left allocated or freed.
34502de433dSsemarie */
34602de433dSsemarie
34702de433dSsemarie void
blist_resize(blist_t * pbl,swblk_t count,int freenew)348d280b40aSsemarie blist_resize(blist_t *pbl, swblk_t count, int freenew)
34902de433dSsemarie {
35002de433dSsemarie blist_t newbl = blist_create(count);
35102de433dSsemarie blist_t save = *pbl;
35202de433dSsemarie
35302de433dSsemarie *pbl = newbl;
35402de433dSsemarie if (count > save->bl_blocks)
35502de433dSsemarie count = save->bl_blocks;
35602de433dSsemarie blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
35702de433dSsemarie
35802de433dSsemarie /*
35902de433dSsemarie * If resizing upwards, should we free the new space or not?
36002de433dSsemarie */
36102de433dSsemarie if (freenew && count < newbl->bl_blocks) {
36202de433dSsemarie blist_free(newbl, count, newbl->bl_blocks - count);
36302de433dSsemarie }
36402de433dSsemarie blist_destroy(save);
36502de433dSsemarie }
36602de433dSsemarie
36702de433dSsemarie #define GAPFIND_FIRSTFREE 0
36802de433dSsemarie #define GAPFIND_FIRSTUSED 1
36902de433dSsemarie
37002de433dSsemarie /*
37102de433dSsemarie * blist_gapfind() - return the largest gap (free pages) in blist.
37202de433dSsemarie * the blist isn't modified. the returned range
37302de433dSsemarie * is [maxbp, maxep[ . The size of the gap is
37402de433dSsemarie * maxep - maxbp. If not found, the size is 0.
37502de433dSsemarie */
37602de433dSsemarie
37702de433dSsemarie void
blist_gapfind(blist_t bl,swblk_t * maxbp,swblk_t * maxep)378d280b40aSsemarie blist_gapfind(blist_t bl, swblk_t *maxbp, swblk_t *maxep)
37902de433dSsemarie {
38002de433dSsemarie int state;
381d280b40aSsemarie swblk_t b, e;
38202de433dSsemarie
38302de433dSsemarie /* initialize gaps (max and current) */
38402de433dSsemarie *maxbp = *maxep = 0;
38502de433dSsemarie b = e = 0;
38602de433dSsemarie
38702de433dSsemarie /* search the larger gap from block 0 */
38802de433dSsemarie state = blst_radix_gapfind(bl->bl_root, 0, bl->bl_radix, bl->bl_skip,
38902de433dSsemarie GAPFIND_FIRSTFREE, maxbp, maxep, &b, &e);
39002de433dSsemarie
39102de433dSsemarie if (state == GAPFIND_FIRSTUSED) {
39202de433dSsemarie e = bl->bl_blocks;
39302de433dSsemarie if (*maxep - *maxbp < e - b) {
39402de433dSsemarie *maxbp = b;
39502de433dSsemarie *maxep = e;
39602de433dSsemarie }
39702de433dSsemarie }
39802de433dSsemarie
39931c3ffdaSsemarie KDASSERT(*maxbp <= *maxep);
40031c3ffdaSsemarie KDASSERT(*maxbp < bl->bl_blocks);
40131c3ffdaSsemarie KDASSERT(*maxep <= bl->bl_blocks);
40202de433dSsemarie }
40302de433dSsemarie
40402de433dSsemarie /*
40502de433dSsemarie * blst_radix_gapfind - search the larger gap in one pass
40602de433dSsemarie *
40702de433dSsemarie * - search first free block, from X -> set B
40802de433dSsemarie * - search first used block, from B -> set E
40902de433dSsemarie * - if the size (E - B) is larger than max, update it
41002de433dSsemarie * - loop (with X=E) until end of blist
41102de433dSsemarie * - max is the larger free gap
41202de433dSsemarie */
41302de433dSsemarie static int
blst_radix_gapfind(blmeta_t * scan,swblk_t blk,swblk_t radix,swblk_t skip,int state,swblk_t * maxbp,swblk_t * maxep,swblk_t * bp,swblk_t * ep)414d280b40aSsemarie blst_radix_gapfind(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip,
415d280b40aSsemarie int state, swblk_t *maxbp, swblk_t *maxep, swblk_t *bp, swblk_t *ep)
41602de433dSsemarie {
417d280b40aSsemarie swblk_t i;
418d280b40aSsemarie swblk_t next_skip;
41902de433dSsemarie
42002de433dSsemarie if (radix == BLIST_BMAP_RADIX) {
421*b076d4fbSjsg /* leaf node: we consider only completely free bitmaps as free */
42202de433dSsemarie if (state == GAPFIND_FIRSTFREE) {
423d280b40aSsemarie if (scan->u.bmu_bitmap == (u_swblk_t)-1) {
42402de433dSsemarie /* node is fully free */
42502de433dSsemarie *bp = blk;
42602de433dSsemarie return GAPFIND_FIRSTUSED;
42702de433dSsemarie }
42802de433dSsemarie
42902de433dSsemarie /* it isn't fully free, not found, keep state */
43002de433dSsemarie return state;
43102de433dSsemarie
43202de433dSsemarie } else if (state == GAPFIND_FIRSTUSED) {
433d280b40aSsemarie if (scan->u.bmu_bitmap == (u_swblk_t)-1) {
43402de433dSsemarie /* it is free, not found, keep state */
43502de433dSsemarie return state;
43602de433dSsemarie }
43702de433dSsemarie
43802de433dSsemarie /* it is (at least partially) used */
43902de433dSsemarie *ep = blk;
44002de433dSsemarie if (*maxep - *maxbp < *ep - *bp) {
44102de433dSsemarie *maxbp = *bp;
44202de433dSsemarie *maxep = *ep;
44302de433dSsemarie }
44402de433dSsemarie return GAPFIND_FIRSTFREE;
44502de433dSsemarie }
44602de433dSsemarie }
44702de433dSsemarie
44802de433dSsemarie if (scan->u.bmu_avail == 0) {
44902de433dSsemarie /* ALL-ALLOCATED */
45002de433dSsemarie if (state == GAPFIND_FIRSTFREE) {
45102de433dSsemarie /* searching free block, not found, keep state */
45202de433dSsemarie return state;
45302de433dSsemarie
45402de433dSsemarie } else if (state == GAPFIND_FIRSTUSED) {
45502de433dSsemarie /* searching used block, found */
45602de433dSsemarie *ep = blk;
45702de433dSsemarie if (*maxep - *maxbp < *ep - *bp) {
45802de433dSsemarie *maxbp = *bp;
45902de433dSsemarie *maxep = *ep;
46002de433dSsemarie }
46102de433dSsemarie return GAPFIND_FIRSTFREE;
46202de433dSsemarie }
46302de433dSsemarie }
46402de433dSsemarie
46502de433dSsemarie if (scan->u.bmu_avail == radix) {
46602de433dSsemarie /* ALL-FREE */
46702de433dSsemarie if (state == GAPFIND_FIRSTFREE) {
46802de433dSsemarie /* searching free block, found */
46902de433dSsemarie *bp = blk;
47002de433dSsemarie return GAPFIND_FIRSTUSED;
47102de433dSsemarie
47202de433dSsemarie } else if (state == GAPFIND_FIRSTUSED) {
47302de433dSsemarie /* searching used block, not found, keep state */
47402de433dSsemarie return state;
47502de433dSsemarie }
47602de433dSsemarie }
47702de433dSsemarie
47802de433dSsemarie radix /= BLIST_META_RADIX;
47902de433dSsemarie next_skip = (skip / BLIST_META_RADIX);
48002de433dSsemarie
48102de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
482d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1)
48302de433dSsemarie /* Terminator */
48402de433dSsemarie break;
48502de433dSsemarie
48602de433dSsemarie state = blst_radix_gapfind(&scan[i], blk, radix, next_skip - 1,
48702de433dSsemarie state, maxbp, maxep, bp, ep);
48802de433dSsemarie
48902de433dSsemarie blk += radix;
49002de433dSsemarie }
49102de433dSsemarie
49202de433dSsemarie return state;
49302de433dSsemarie }
49402de433dSsemarie
49502de433dSsemarie #if defined(BLIST_DEBUG) || defined(DDB)
49602de433dSsemarie
49702de433dSsemarie /*
49802de433dSsemarie * blist_print() - dump radix tree
49902de433dSsemarie */
50002de433dSsemarie
50102de433dSsemarie void
blist_print(blist_t bl)50202de433dSsemarie blist_print(blist_t bl)
50302de433dSsemarie {
50402de433dSsemarie printf("BLIST {\n");
50502de433dSsemarie blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
50602de433dSsemarie printf("}\n");
50702de433dSsemarie }
50802de433dSsemarie
50902de433dSsemarie #endif
51002de433dSsemarie
51102de433dSsemarie /************************************************************************
51202de433dSsemarie * ALLOCATION SUPPORT FUNCTIONS *
51302de433dSsemarie ************************************************************************
51402de433dSsemarie *
51502de433dSsemarie * These support functions do all the actual work. They may seem
51602de433dSsemarie * rather longish, but that's because I've commented them up. The
51702de433dSsemarie * actual code is straight forward.
51802de433dSsemarie *
51902de433dSsemarie */
52002de433dSsemarie
52102de433dSsemarie /*
52202de433dSsemarie * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
52302de433dSsemarie *
52402de433dSsemarie * This is the core of the allocator and is optimized for the 1 block
52502de433dSsemarie * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
52602de433dSsemarie * somewhat slower. The 1 block allocation case is log2 and extremely
52702de433dSsemarie * quick.
52802de433dSsemarie */
52902de433dSsemarie
530d280b40aSsemarie static swblk_t
blst_leaf_alloc(blmeta_t * scan,swblk_t blkat __unused,swblk_t blk,swblk_t count)531d280b40aSsemarie blst_leaf_alloc(blmeta_t *scan, swblk_t blkat __unused, swblk_t blk,
532d280b40aSsemarie swblk_t count)
53302de433dSsemarie {
534d280b40aSsemarie u_swblk_t orig = scan->u.bmu_bitmap;
53502de433dSsemarie
53602de433dSsemarie if (orig == 0) {
53702de433dSsemarie /*
53802de433dSsemarie * Optimize bitmap all-allocated case. Also, count = 1
53902de433dSsemarie * case assumes at least 1 bit is free in the bitmap, so
54002de433dSsemarie * we have to take care of this case here.
54102de433dSsemarie */
54202de433dSsemarie scan->bm_bighint = 0;
54302de433dSsemarie return(SWAPBLK_NONE);
54402de433dSsemarie }
54502de433dSsemarie if (count == 1) {
54602de433dSsemarie /*
54702de433dSsemarie * Optimized code to allocate one bit out of the bitmap
54802de433dSsemarie */
549d280b40aSsemarie u_swblk_t mask;
55002de433dSsemarie int j = BLIST_BMAP_RADIX/2;
55102de433dSsemarie int r = 0;
55202de433dSsemarie
553d280b40aSsemarie mask = (u_swblk_t)-1 >> (BLIST_BMAP_RADIX/2);
55402de433dSsemarie
55502de433dSsemarie while (j) {
55602de433dSsemarie if ((orig & mask) == 0) {
55702de433dSsemarie r += j;
55802de433dSsemarie orig >>= j;
55902de433dSsemarie }
56002de433dSsemarie j >>= 1;
56102de433dSsemarie mask >>= j;
56202de433dSsemarie }
563d280b40aSsemarie scan->u.bmu_bitmap &= ~((u_swblk_t)1 << r);
56402de433dSsemarie return(blk + r);
56502de433dSsemarie }
56602de433dSsemarie if (count <= BLIST_BMAP_RADIX) {
56702de433dSsemarie /*
56802de433dSsemarie * non-optimized code to allocate N bits out of the bitmap.
56902de433dSsemarie * The more bits, the faster the code runs. It will run
57002de433dSsemarie * the slowest allocating 2 bits, but since there aren't any
57102de433dSsemarie * memory ops in the core loop (or shouldn't be, anyway),
57202de433dSsemarie * you probably won't notice the difference.
57302de433dSsemarie */
57402de433dSsemarie int j;
57502de433dSsemarie int n = (int)(BLIST_BMAP_RADIX - count);
576d280b40aSsemarie u_swblk_t mask;
57702de433dSsemarie
578d280b40aSsemarie mask = (u_swblk_t)-1 >> n;
57902de433dSsemarie
58002de433dSsemarie for (j = 0; j <= n; ++j) {
58102de433dSsemarie if ((orig & mask) == mask) {
58202de433dSsemarie scan->u.bmu_bitmap &= ~mask;
58302de433dSsemarie return(blk + j);
58402de433dSsemarie }
58502de433dSsemarie mask = (mask << 1);
58602de433dSsemarie }
58702de433dSsemarie }
58802de433dSsemarie
58902de433dSsemarie /*
59002de433dSsemarie * We couldn't allocate count in this subtree, update bighint.
59102de433dSsemarie */
59202de433dSsemarie scan->bm_bighint = count - 1;
59302de433dSsemarie
59402de433dSsemarie return(SWAPBLK_NONE);
59502de433dSsemarie }
59602de433dSsemarie
59702de433dSsemarie /*
59802de433dSsemarie * blist_meta_alloc() - allocate at a meta in the radix tree.
59902de433dSsemarie *
60002de433dSsemarie * Attempt to allocate at a meta node. If we can't, we update
60102de433dSsemarie * bighint and return a failure. Updating bighint optimize future
60202de433dSsemarie * calls that hit this node. We have to check for our collapse cases
60302de433dSsemarie * and we have a few optimizations strewn in as well.
60402de433dSsemarie */
605d280b40aSsemarie static swblk_t
blst_meta_alloc(blmeta_t * scan,swblk_t blkat,swblk_t blk,swblk_t count,swblk_t radix,swblk_t skip)606d280b40aSsemarie blst_meta_alloc(blmeta_t *scan, swblk_t blkat,
607d280b40aSsemarie swblk_t blk, swblk_t count,
608d280b40aSsemarie swblk_t radix, swblk_t skip)
60902de433dSsemarie {
61002de433dSsemarie int hintok = (blk >= blkat);
611d280b40aSsemarie swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX);
612d280b40aSsemarie swblk_t i;
61302de433dSsemarie
61402de433dSsemarie #ifndef _KERNEL
61502de433dSsemarie printf("blist_meta_alloc blkat %lu blk %lu count %lu radix %lu\n",
61602de433dSsemarie blkat, blk, count, radix);
61702de433dSsemarie #endif
61802de433dSsemarie
61902de433dSsemarie /*
62002de433dSsemarie * ALL-ALLOCATED special case
62102de433dSsemarie */
62202de433dSsemarie if (scan->u.bmu_avail == 0) {
62302de433dSsemarie scan->bm_bighint = 0;
62402de433dSsemarie return(SWAPBLK_NONE);
62502de433dSsemarie }
62602de433dSsemarie
62702de433dSsemarie /*
62802de433dSsemarie * ALL-FREE special case, initialize uninitialized
62902de433dSsemarie * sublevel.
63002de433dSsemarie *
63102de433dSsemarie * NOTE: radix may exceed 32 bits until first division.
63202de433dSsemarie */
63302de433dSsemarie if (scan->u.bmu_avail == radix) {
63402de433dSsemarie scan->bm_bighint = radix;
63502de433dSsemarie
63602de433dSsemarie radix /= BLIST_META_RADIX;
63702de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
638d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1)
63902de433dSsemarie break;
64002de433dSsemarie if (next_skip == 1) {
641d280b40aSsemarie scan[i].u.bmu_bitmap = (u_swblk_t)-1;
64202de433dSsemarie scan[i].bm_bighint = BLIST_BMAP_RADIX;
64302de433dSsemarie } else {
644d280b40aSsemarie scan[i].bm_bighint = (swblk_t)radix;
645d280b40aSsemarie scan[i].u.bmu_avail = (swblk_t)radix;
64602de433dSsemarie }
64702de433dSsemarie }
64802de433dSsemarie } else {
64902de433dSsemarie radix /= BLIST_META_RADIX;
65002de433dSsemarie }
65102de433dSsemarie
65202de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
65331c3ffdaSsemarie if (scan[i].bm_bighint == (swblk_t)-1) {
65431c3ffdaSsemarie /*
65531c3ffdaSsemarie * Terminator
65631c3ffdaSsemarie *
65731c3ffdaSsemarie * note: check it first, as swblk_t may be unsigned.
65831c3ffdaSsemarie * otherwise, the second if() might match and the
65931c3ffdaSsemarie * Terminator will be ignored.
66031c3ffdaSsemarie */
66131c3ffdaSsemarie break;
66231c3ffdaSsemarie }
66331c3ffdaSsemarie
66402de433dSsemarie if (count <= scan[i].bm_bighint &&
665d280b40aSsemarie blk + (swblk_t)radix > blkat) {
66602de433dSsemarie /*
66702de433dSsemarie * count fits in object
66802de433dSsemarie */
669d280b40aSsemarie swblk_t r;
67002de433dSsemarie if (next_skip == 1) {
67102de433dSsemarie r = blst_leaf_alloc(&scan[i], blkat,
67202de433dSsemarie blk, count);
67302de433dSsemarie } else {
67402de433dSsemarie r = blst_meta_alloc(&scan[i], blkat,
67502de433dSsemarie blk, count,
67602de433dSsemarie radix, next_skip - 1);
67702de433dSsemarie }
67802de433dSsemarie if (r != SWAPBLK_NONE) {
67902de433dSsemarie scan->u.bmu_avail -= count;
68002de433dSsemarie if (scan->bm_bighint > scan->u.bmu_avail)
68102de433dSsemarie scan->bm_bighint = scan->u.bmu_avail;
68202de433dSsemarie return(r);
68302de433dSsemarie }
68402de433dSsemarie /* bighint was updated by recursion */
685d280b40aSsemarie } else if (count > (swblk_t)radix) {
68602de433dSsemarie /*
68702de433dSsemarie * count does not fit in object even if it were
68802de433dSsemarie * complete free.
68902de433dSsemarie */
69002de433dSsemarie panic("%s: allocation too large %lu/%lu",
69102de433dSsemarie __func__, count, radix);
69202de433dSsemarie }
693d280b40aSsemarie blk += (swblk_t)radix;
69402de433dSsemarie }
69502de433dSsemarie
69602de433dSsemarie /*
69702de433dSsemarie * We couldn't allocate count in this subtree, update bighint.
69802de433dSsemarie */
69902de433dSsemarie if (hintok && scan->bm_bighint >= count)
70002de433dSsemarie scan->bm_bighint = count - 1;
70102de433dSsemarie return(SWAPBLK_NONE);
70202de433dSsemarie }
70302de433dSsemarie
70402de433dSsemarie /*
70502de433dSsemarie * BLST_LEAF_FREE() - free allocated block from leaf bitmap
70602de433dSsemarie */
70702de433dSsemarie static void
blst_leaf_free(blmeta_t * scan,swblk_t blk,swblk_t count)708d280b40aSsemarie blst_leaf_free(blmeta_t *scan, swblk_t blk, swblk_t count)
70902de433dSsemarie {
71002de433dSsemarie /*
71102de433dSsemarie * free some data in this bitmap
71202de433dSsemarie *
71302de433dSsemarie * e.g.
71402de433dSsemarie * 0000111111111110000
71502de433dSsemarie * \_________/\__/
71602de433dSsemarie * v n
71702de433dSsemarie */
71802de433dSsemarie int n = blk & (BLIST_BMAP_RADIX - 1);
719d280b40aSsemarie u_swblk_t mask;
72002de433dSsemarie
721d280b40aSsemarie mask = ((u_swblk_t)-1 << n) &
722d280b40aSsemarie ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n));
72302de433dSsemarie
72402de433dSsemarie if (scan->u.bmu_bitmap & mask)
72502de433dSsemarie panic("%s: freeing free block", __func__);
72602de433dSsemarie scan->u.bmu_bitmap |= mask;
72702de433dSsemarie
72802de433dSsemarie /*
72902de433dSsemarie * We could probably do a better job here. We are required to make
73002de433dSsemarie * bighint at least as large as the biggest contiguous block of
73102de433dSsemarie * data. If we just shoehorn it, a little extra overhead will
732*b076d4fbSjsg * be incurred on the next allocation (but only that one typically).
73302de433dSsemarie */
73402de433dSsemarie scan->bm_bighint = BLIST_BMAP_RADIX;
73502de433dSsemarie }
73602de433dSsemarie
73702de433dSsemarie /*
73802de433dSsemarie * BLST_META_FREE() - free allocated blocks from radix tree meta info
73902de433dSsemarie *
74002de433dSsemarie * This support routine frees a range of blocks from the bitmap.
74102de433dSsemarie * The range must be entirely enclosed by this radix node. If a
74202de433dSsemarie * meta node, we break the range down recursively to free blocks
74302de433dSsemarie * in subnodes (which means that this code can free an arbitrary
74402de433dSsemarie * range whereas the allocation code cannot allocate an arbitrary
74502de433dSsemarie * range).
74602de433dSsemarie */
74702de433dSsemarie
74802de433dSsemarie static void
blst_meta_free(blmeta_t * scan,swblk_t freeBlk,swblk_t count,swblk_t radix,swblk_t skip,swblk_t blk)749d280b40aSsemarie blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count,
750d280b40aSsemarie swblk_t radix, swblk_t skip, swblk_t blk)
75102de433dSsemarie {
752d280b40aSsemarie swblk_t i;
753d280b40aSsemarie swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX);
75402de433dSsemarie
75502de433dSsemarie #if 0
75602de433dSsemarie printf("FREE (%04lx,%lu) FROM (%04lx,%lu)\n",
75702de433dSsemarie freeBlk, count,
75802de433dSsemarie blk, radix
75902de433dSsemarie );
76002de433dSsemarie #endif
76102de433dSsemarie
76202de433dSsemarie /*
76302de433dSsemarie * ALL-ALLOCATED special case, initialize for recursion.
76402de433dSsemarie *
76502de433dSsemarie * We will short-cut the ALL-ALLOCATED -> ALL-FREE case.
76602de433dSsemarie */
76702de433dSsemarie if (scan->u.bmu_avail == 0) {
76802de433dSsemarie scan->u.bmu_avail = count;
76902de433dSsemarie scan->bm_bighint = count;
77002de433dSsemarie
77102de433dSsemarie if (count != radix) {
77202de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
773d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1)
77402de433dSsemarie break;
77502de433dSsemarie scan[i].bm_bighint = 0;
77602de433dSsemarie if (next_skip == 1) {
77702de433dSsemarie scan[i].u.bmu_bitmap = 0;
77802de433dSsemarie } else {
77902de433dSsemarie scan[i].u.bmu_avail = 0;
78002de433dSsemarie }
78102de433dSsemarie }
78202de433dSsemarie /* fall through */
78302de433dSsemarie }
78402de433dSsemarie } else {
78502de433dSsemarie scan->u.bmu_avail += count;
78602de433dSsemarie /* scan->bm_bighint = radix; */
78702de433dSsemarie }
78802de433dSsemarie
78902de433dSsemarie /*
79002de433dSsemarie * ALL-FREE special case.
79102de433dSsemarie *
79202de433dSsemarie * Set bighint for higher levels to snoop.
79302de433dSsemarie */
79402de433dSsemarie if (scan->u.bmu_avail == radix) {
79502de433dSsemarie scan->bm_bighint = radix;
79602de433dSsemarie return;
79702de433dSsemarie }
79802de433dSsemarie
79902de433dSsemarie /*
80002de433dSsemarie * Break the free down into its components
80102de433dSsemarie */
80202de433dSsemarie if (scan->u.bmu_avail > radix) {
80302de433dSsemarie panic("%s: freeing already "
80402de433dSsemarie "free blocks (%lu) %lu/%lu",
80502de433dSsemarie __func__, count, (long)scan->u.bmu_avail, radix);
80602de433dSsemarie }
80702de433dSsemarie
80802de433dSsemarie radix /= BLIST_META_RADIX;
80902de433dSsemarie
810d280b40aSsemarie i = (freeBlk - blk) / (swblk_t)radix;
811d280b40aSsemarie blk += i * (swblk_t)radix;
81202de433dSsemarie i = i * next_skip + 1;
81302de433dSsemarie
81402de433dSsemarie while (i <= skip && blk < freeBlk + count) {
815d280b40aSsemarie swblk_t v;
81602de433dSsemarie
817d280b40aSsemarie v = blk + (swblk_t)radix - freeBlk;
81802de433dSsemarie if (v > count)
81902de433dSsemarie v = count;
82002de433dSsemarie
821d280b40aSsemarie if (scan->bm_bighint == (swblk_t)-1)
82202de433dSsemarie panic("%s: freeing unexpected range", __func__);
82302de433dSsemarie
82402de433dSsemarie if (next_skip == 1) {
82502de433dSsemarie blst_leaf_free(&scan[i], freeBlk, v);
82602de433dSsemarie } else {
82702de433dSsemarie blst_meta_free(&scan[i], freeBlk, v,
82802de433dSsemarie radix, next_skip - 1, blk);
82902de433dSsemarie }
83002de433dSsemarie
83102de433dSsemarie /*
83202de433dSsemarie * After having dealt with the becomes-all-free case any
83302de433dSsemarie * partial free will not be able to bring us to the
83402de433dSsemarie * becomes-all-free state.
83502de433dSsemarie *
83602de433dSsemarie * We can raise bighint to at least the sub-segment's
83702de433dSsemarie * bighint.
83802de433dSsemarie */
83902de433dSsemarie if (scan->bm_bighint < scan[i].bm_bighint) {
84002de433dSsemarie scan->bm_bighint = scan[i].bm_bighint;
84102de433dSsemarie }
84202de433dSsemarie count -= v;
84302de433dSsemarie freeBlk += v;
844d280b40aSsemarie blk += (swblk_t)radix;
84502de433dSsemarie i += next_skip;
84602de433dSsemarie }
84702de433dSsemarie }
84802de433dSsemarie
84902de433dSsemarie /*
85002de433dSsemarie * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
85102de433dSsemarie *
85202de433dSsemarie * Allocates all blocks in the specified range regardless of
85302de433dSsemarie * any existing allocations in that range. Returns the number
85402de433dSsemarie * of blocks allocated by the call.
85502de433dSsemarie */
856d280b40aSsemarie static swblk_t
blst_leaf_fill(blmeta_t * scan,swblk_t blk,swblk_t count)857d280b40aSsemarie blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count)
85802de433dSsemarie {
85902de433dSsemarie int n = blk & (BLIST_BMAP_RADIX - 1);
860d280b40aSsemarie swblk_t nblks;
861d280b40aSsemarie u_swblk_t mask, bitmap;
86202de433dSsemarie
863d280b40aSsemarie mask = ((u_swblk_t)-1 << n) &
864d280b40aSsemarie ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n));
86502de433dSsemarie
86602de433dSsemarie /* Count the number of blocks we're about to allocate */
86702de433dSsemarie bitmap = scan->u.bmu_bitmap & mask;
86802de433dSsemarie for (nblks = 0; bitmap != 0; nblks++)
86902de433dSsemarie bitmap &= bitmap - 1;
87002de433dSsemarie
87102de433dSsemarie scan->u.bmu_bitmap &= ~mask;
87202de433dSsemarie return (nblks);
87302de433dSsemarie }
87402de433dSsemarie
87502de433dSsemarie /*
87602de433dSsemarie * BLST_META_FILL() - allocate specific blocks at a meta node
87702de433dSsemarie *
87802de433dSsemarie * Allocates the specified range of blocks, regardless of
87902de433dSsemarie * any existing allocations in the range. The range must
88002de433dSsemarie * be within the extent of this node. Returns the number
88102de433dSsemarie * of blocks allocated by the call.
88202de433dSsemarie */
883d280b40aSsemarie static swblk_t
blst_meta_fill(blmeta_t * scan,swblk_t fillBlk,swblk_t count,swblk_t radix,swblk_t skip,swblk_t blk)884d280b40aSsemarie blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count,
885d280b40aSsemarie swblk_t radix, swblk_t skip, swblk_t blk)
88602de433dSsemarie {
887d280b40aSsemarie swblk_t i;
888d280b40aSsemarie swblk_t next_skip = ((swblk_t)skip / BLIST_META_RADIX);
889d280b40aSsemarie swblk_t nblks = 0;
89002de433dSsemarie
89102de433dSsemarie if (count == radix || scan->u.bmu_avail == 0) {
89202de433dSsemarie /*
89302de433dSsemarie * ALL-ALLOCATED special case
89402de433dSsemarie */
89502de433dSsemarie nblks = scan->u.bmu_avail;
89602de433dSsemarie scan->u.bmu_avail = 0;
89702de433dSsemarie scan->bm_bighint = count;
89802de433dSsemarie return (nblks);
89902de433dSsemarie }
90002de433dSsemarie
90102de433dSsemarie if (scan->u.bmu_avail == radix) {
90202de433dSsemarie radix /= BLIST_META_RADIX;
90302de433dSsemarie
90402de433dSsemarie /*
90502de433dSsemarie * ALL-FREE special case, initialize sublevel
90602de433dSsemarie */
90702de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
908d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1)
90902de433dSsemarie break;
91002de433dSsemarie if (next_skip == 1) {
911d280b40aSsemarie scan[i].u.bmu_bitmap = (u_swblk_t)-1;
91202de433dSsemarie scan[i].bm_bighint = BLIST_BMAP_RADIX;
91302de433dSsemarie } else {
914d280b40aSsemarie scan[i].bm_bighint = (swblk_t)radix;
915d280b40aSsemarie scan[i].u.bmu_avail = (swblk_t)radix;
91602de433dSsemarie }
91702de433dSsemarie }
91802de433dSsemarie } else {
91902de433dSsemarie radix /= BLIST_META_RADIX;
92002de433dSsemarie }
92102de433dSsemarie
922d280b40aSsemarie if (count > (swblk_t)radix)
92302de433dSsemarie panic("%s: allocation too large", __func__);
92402de433dSsemarie
925d280b40aSsemarie i = (fillBlk - blk) / (swblk_t)radix;
926d280b40aSsemarie blk += i * (swblk_t)radix;
92702de433dSsemarie i = i * next_skip + 1;
92802de433dSsemarie
92902de433dSsemarie while (i <= skip && blk < fillBlk + count) {
930d280b40aSsemarie swblk_t v;
93102de433dSsemarie
932d280b40aSsemarie v = blk + (swblk_t)radix - fillBlk;
93302de433dSsemarie if (v > count)
93402de433dSsemarie v = count;
93502de433dSsemarie
936d280b40aSsemarie if (scan->bm_bighint == (swblk_t)-1)
93702de433dSsemarie panic("%s: filling unexpected range", __func__);
93802de433dSsemarie
93902de433dSsemarie if (next_skip == 1) {
94002de433dSsemarie nblks += blst_leaf_fill(&scan[i], fillBlk, v);
94102de433dSsemarie } else {
94202de433dSsemarie nblks += blst_meta_fill(&scan[i], fillBlk, v,
94302de433dSsemarie radix, next_skip - 1, blk);
94402de433dSsemarie }
94502de433dSsemarie count -= v;
94602de433dSsemarie fillBlk += v;
947d280b40aSsemarie blk += (swblk_t)radix;
94802de433dSsemarie i += next_skip;
94902de433dSsemarie }
95002de433dSsemarie scan->u.bmu_avail -= nblks;
95102de433dSsemarie return (nblks);
95202de433dSsemarie }
95302de433dSsemarie
95402de433dSsemarie /*
95502de433dSsemarie * BLIST_RADIX_COPY() - copy one radix tree to another
95602de433dSsemarie *
95702de433dSsemarie * Locates free space in the source tree and frees it in the destination
95802de433dSsemarie * tree. The space may not already be free in the destination.
95902de433dSsemarie */
96002de433dSsemarie
96102de433dSsemarie static void
blst_copy(blmeta_t * scan,swblk_t blk,swblk_t radix,swblk_t skip,blist_t dest,swblk_t count)962d280b40aSsemarie blst_copy(blmeta_t *scan, swblk_t blk, swblk_t radix,
963d280b40aSsemarie swblk_t skip, blist_t dest, swblk_t count)
96402de433dSsemarie {
965d280b40aSsemarie swblk_t next_skip;
966d280b40aSsemarie swblk_t i;
96702de433dSsemarie
96802de433dSsemarie /*
96902de433dSsemarie * Leaf node
97002de433dSsemarie */
97102de433dSsemarie
97202de433dSsemarie if (radix == BLIST_BMAP_RADIX) {
973d280b40aSsemarie u_swblk_t v = scan->u.bmu_bitmap;
97402de433dSsemarie
975d280b40aSsemarie if (v == (u_swblk_t)-1) {
97602de433dSsemarie blist_free(dest, blk, count);
97702de433dSsemarie } else if (v != 0) {
97802de433dSsemarie for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
979d280b40aSsemarie if (v & ((swblk_t)1 << i))
98002de433dSsemarie blist_free(dest, blk + i, 1);
98102de433dSsemarie }
98202de433dSsemarie }
98302de433dSsemarie return;
98402de433dSsemarie }
98502de433dSsemarie
98602de433dSsemarie /*
98702de433dSsemarie * Meta node
98802de433dSsemarie */
98902de433dSsemarie
99002de433dSsemarie if (scan->u.bmu_avail == 0) {
99102de433dSsemarie /*
99202de433dSsemarie * Source all allocated, leave dest allocated
99302de433dSsemarie */
99402de433dSsemarie return;
99502de433dSsemarie }
99602de433dSsemarie if (scan->u.bmu_avail == radix) {
99702de433dSsemarie /*
99802de433dSsemarie * Source all free, free entire dest
99902de433dSsemarie */
100002de433dSsemarie if (count < radix)
100102de433dSsemarie blist_free(dest, blk, count);
100202de433dSsemarie else
1003d280b40aSsemarie blist_free(dest, blk, (swblk_t)radix);
100402de433dSsemarie return;
100502de433dSsemarie }
100602de433dSsemarie
100702de433dSsemarie
100802de433dSsemarie radix /= BLIST_META_RADIX;
1009d280b40aSsemarie next_skip = ((u_swblk_t)skip / BLIST_META_RADIX);
101002de433dSsemarie
101102de433dSsemarie for (i = 1; count && i <= skip; i += next_skip) {
1012d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1)
101302de433dSsemarie break;
101402de433dSsemarie
1015d280b40aSsemarie if (count >= (swblk_t)radix) {
101602de433dSsemarie blst_copy(
101702de433dSsemarie &scan[i],
101802de433dSsemarie blk,
101902de433dSsemarie radix,
102002de433dSsemarie next_skip - 1,
102102de433dSsemarie dest,
1022d280b40aSsemarie (swblk_t)radix
102302de433dSsemarie );
1024d280b40aSsemarie count -= (swblk_t)radix;
102502de433dSsemarie } else {
102602de433dSsemarie if (count) {
102702de433dSsemarie blst_copy(
102802de433dSsemarie &scan[i],
102902de433dSsemarie blk,
103002de433dSsemarie radix,
103102de433dSsemarie next_skip - 1,
103202de433dSsemarie dest,
103302de433dSsemarie count
103402de433dSsemarie );
103502de433dSsemarie }
103602de433dSsemarie count = 0;
103702de433dSsemarie }
1038d280b40aSsemarie blk += (swblk_t)radix;
103902de433dSsemarie }
104002de433dSsemarie }
104102de433dSsemarie
104202de433dSsemarie /*
104302de433dSsemarie * BLST_RADIX_INIT() - initialize radix tree
104402de433dSsemarie *
104502de433dSsemarie * Initialize our meta structures and bitmaps and calculate the exact
104602de433dSsemarie * amount of space required to manage 'count' blocks - this space may
104702de433dSsemarie * be considerably less than the calculated radix due to the large
104802de433dSsemarie * RADIX values we use.
104902de433dSsemarie */
105002de433dSsemarie
1051d280b40aSsemarie static swblk_t
blst_radix_init(blmeta_t * scan,swblk_t radix,swblk_t skip,swblk_t count)1052d280b40aSsemarie blst_radix_init(blmeta_t *scan, swblk_t radix, swblk_t skip, swblk_t count)
105302de433dSsemarie {
1054d280b40aSsemarie swblk_t i;
1055d280b40aSsemarie swblk_t next_skip;
1056d280b40aSsemarie swblk_t memindex = 0;
105702de433dSsemarie
105802de433dSsemarie /*
105902de433dSsemarie * Leaf node
106002de433dSsemarie */
106102de433dSsemarie
106202de433dSsemarie if (radix == BLIST_BMAP_RADIX) {
106302de433dSsemarie if (scan) {
106402de433dSsemarie scan->bm_bighint = 0;
106502de433dSsemarie scan->u.bmu_bitmap = 0;
106602de433dSsemarie }
106702de433dSsemarie return(memindex);
106802de433dSsemarie }
106902de433dSsemarie
107002de433dSsemarie /*
107102de433dSsemarie * Meta node. If allocating the entire object we can special
107202de433dSsemarie * case it. However, we need to figure out how much memory
107302de433dSsemarie * is required to manage 'count' blocks, so we continue on anyway.
107402de433dSsemarie */
107502de433dSsemarie
107602de433dSsemarie if (scan) {
107702de433dSsemarie scan->bm_bighint = 0;
107802de433dSsemarie scan->u.bmu_avail = 0;
107902de433dSsemarie }
108002de433dSsemarie
108102de433dSsemarie radix /= BLIST_META_RADIX;
1082d280b40aSsemarie next_skip = ((u_swblk_t)skip / BLIST_META_RADIX);
108302de433dSsemarie
108402de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
1085d280b40aSsemarie if (count >= (swblk_t)radix) {
108602de433dSsemarie /*
108702de433dSsemarie * Allocate the entire object
108802de433dSsemarie */
108902de433dSsemarie memindex = i + blst_radix_init(
109002de433dSsemarie ((scan) ? &scan[i] : NULL),
109102de433dSsemarie radix,
109202de433dSsemarie next_skip - 1,
1093d280b40aSsemarie (swblk_t)radix
109402de433dSsemarie );
1095d280b40aSsemarie count -= (swblk_t)radix;
109602de433dSsemarie } else if (count > 0) {
109702de433dSsemarie /*
109802de433dSsemarie * Allocate a partial object
109902de433dSsemarie */
110002de433dSsemarie memindex = i + blst_radix_init(
110102de433dSsemarie ((scan) ? &scan[i] : NULL),
110202de433dSsemarie radix,
110302de433dSsemarie next_skip - 1,
110402de433dSsemarie count
110502de433dSsemarie );
110602de433dSsemarie count = 0;
110702de433dSsemarie } else {
110802de433dSsemarie /*
110902de433dSsemarie * Add terminator and break out
111002de433dSsemarie */
111102de433dSsemarie if (scan)
1112d280b40aSsemarie scan[i].bm_bighint = (swblk_t)-1;
111302de433dSsemarie break;
111402de433dSsemarie }
111502de433dSsemarie }
111602de433dSsemarie if (memindex < i)
111702de433dSsemarie memindex = i;
111802de433dSsemarie return(memindex);
111902de433dSsemarie }
112002de433dSsemarie
112102de433dSsemarie #if defined(BLIST_DEBUG) || defined(DDB)
112202de433dSsemarie
112302de433dSsemarie static void
blst_radix_print(blmeta_t * scan,swblk_t blk,swblk_t radix,swblk_t skip,int tab)1124d280b40aSsemarie blst_radix_print(blmeta_t *scan, swblk_t blk, swblk_t radix, swblk_t skip, int tab)
112502de433dSsemarie {
1126d280b40aSsemarie swblk_t i;
1127d280b40aSsemarie swblk_t next_skip;
112802de433dSsemarie
112902de433dSsemarie if (radix == BLIST_BMAP_RADIX) {
113002de433dSsemarie printf(
1131d280b40aSsemarie "%*.*s(%04lx,%lu): bitmap %0*llx big=%lu\n",
113202de433dSsemarie tab, tab, "",
113302de433dSsemarie blk, radix,
113402de433dSsemarie (int)(1 + (BLIST_BMAP_RADIX - 1) / 4),
113502de433dSsemarie scan->u.bmu_bitmap,
113602de433dSsemarie scan->bm_bighint
113702de433dSsemarie );
113802de433dSsemarie return;
113902de433dSsemarie }
114002de433dSsemarie
114102de433dSsemarie if (scan->u.bmu_avail == 0) {
114202de433dSsemarie printf(
114302de433dSsemarie "%*.*s(%04lx,%lu) ALL ALLOCATED\n",
114402de433dSsemarie tab, tab, "",
114502de433dSsemarie blk,
114602de433dSsemarie radix
114702de433dSsemarie );
114802de433dSsemarie return;
114902de433dSsemarie }
115002de433dSsemarie if (scan->u.bmu_avail == radix) {
115102de433dSsemarie printf(
115202de433dSsemarie "%*.*s(%04lx,%lu) ALL FREE\n",
115302de433dSsemarie tab, tab, "",
115402de433dSsemarie blk,
115502de433dSsemarie radix
115602de433dSsemarie );
115702de433dSsemarie return;
115802de433dSsemarie }
115902de433dSsemarie
116002de433dSsemarie printf(
116102de433dSsemarie "%*.*s(%04lx,%lu): subtree (%lu/%lu) big=%lu {\n",
116202de433dSsemarie tab, tab, "",
116302de433dSsemarie blk, radix,
116402de433dSsemarie scan->u.bmu_avail,
116502de433dSsemarie radix,
116602de433dSsemarie scan->bm_bighint
116702de433dSsemarie );
116802de433dSsemarie
116902de433dSsemarie radix /= BLIST_META_RADIX;
1170d280b40aSsemarie next_skip = ((u_swblk_t)skip / BLIST_META_RADIX);
117102de433dSsemarie tab += 4;
117202de433dSsemarie
117302de433dSsemarie for (i = 1; i <= skip; i += next_skip) {
1174d280b40aSsemarie if (scan[i].bm_bighint == (swblk_t)-1) {
117502de433dSsemarie printf(
117602de433dSsemarie "%*.*s(%04lx,%lu): Terminator\n",
117702de433dSsemarie tab, tab, "",
117802de433dSsemarie blk, radix
117902de433dSsemarie );
118002de433dSsemarie break;
118102de433dSsemarie }
118202de433dSsemarie blst_radix_print(
118302de433dSsemarie &scan[i],
118402de433dSsemarie blk,
118502de433dSsemarie radix,
118602de433dSsemarie next_skip - 1,
118702de433dSsemarie tab
118802de433dSsemarie );
1189d280b40aSsemarie blk += (swblk_t)radix;
119002de433dSsemarie }
119102de433dSsemarie tab -= 4;
119202de433dSsemarie
119302de433dSsemarie printf(
119402de433dSsemarie "%*.*s}\n",
119502de433dSsemarie tab, tab, ""
119602de433dSsemarie );
119702de433dSsemarie }
119802de433dSsemarie
119902de433dSsemarie #endif
120002de433dSsemarie
120102de433dSsemarie #if !defined(_KERNEL) && defined(BLIST_DEBUG)
120202de433dSsemarie
120302de433dSsemarie int
main(int ac,char ** av)120402de433dSsemarie main(int ac, char **av)
120502de433dSsemarie {
1206d280b40aSsemarie swblk_t size = 1024;
1207d280b40aSsemarie swblk_t i;
120802de433dSsemarie blist_t bl;
120902de433dSsemarie
1210d280b40aSsemarie for (i = 1; i < (swblk_t)ac; ++i) {
121102de433dSsemarie const char *ptr = av[i];
121202de433dSsemarie if (*ptr != '-') {
121302de433dSsemarie size = strtol(ptr, NULL, 0);
121402de433dSsemarie continue;
121502de433dSsemarie }
121602de433dSsemarie ptr += 2;
121702de433dSsemarie fprintf(stderr, "Bad option: %s\n", ptr - 2);
121802de433dSsemarie exit(1);
121902de433dSsemarie }
122002de433dSsemarie bl = blist_create(size);
122102de433dSsemarie blist_free(bl, 0, size);
122202de433dSsemarie
122302de433dSsemarie for (;;) {
122402de433dSsemarie char buf[1024];
1225d280b40aSsemarie swblk_t da = 0;
1226d280b40aSsemarie swblk_t count = 0;
1227d280b40aSsemarie swblk_t blkat;
122802de433dSsemarie
122902de433dSsemarie
123002de433dSsemarie printf("%lu/%lu/%lu> ",
123102de433dSsemarie bl->bl_free, size, bl->bl_radix);
123202de433dSsemarie fflush(stdout);
123302de433dSsemarie if (fgets(buf, sizeof(buf), stdin) == NULL)
123402de433dSsemarie break;
123502de433dSsemarie switch(buf[0]) {
123602de433dSsemarie case '#':
123702de433dSsemarie continue;
123802de433dSsemarie case 'r':
123902de433dSsemarie if (sscanf(buf + 1, "%li", &count) == 1) {
124002de433dSsemarie blist_resize(&bl, count, 1);
124102de433dSsemarie size = count;
124202de433dSsemarie } else {
124302de433dSsemarie printf("?\n");
124402de433dSsemarie }
124502de433dSsemarie case 'p':
124602de433dSsemarie blist_print(bl);
124702de433dSsemarie break;
124802de433dSsemarie case 'a':
124902de433dSsemarie if (sscanf(buf + 1, "%li %li", &count, &blkat) == 1) {
125002de433dSsemarie printf("count %lu\n", count);
1251d280b40aSsemarie swblk_t blk = blist_alloc(bl, count);
125231c3ffdaSsemarie if (blk == SWAPBLK_NONE)
125331c3ffdaSsemarie printf(" R=SWAPBLK_NONE\n");
125431c3ffdaSsemarie else
125502de433dSsemarie printf(" R=%04lx\n", blk);
125602de433dSsemarie } else if (sscanf(buf + 1, "%li %li", &count, &blkat) == 2) {
1257d280b40aSsemarie swblk_t blk = blist_allocat(bl, count, blkat);
125831c3ffdaSsemarie if (blk == SWAPBLK_NONE)
125931c3ffdaSsemarie printf(" R=SWAPBLK_NONE\n");
126031c3ffdaSsemarie else
126102de433dSsemarie printf(" R=%04lx\n", blk);
126202de433dSsemarie } else {
126302de433dSsemarie printf("?\n");
126402de433dSsemarie }
126502de433dSsemarie break;
126602de433dSsemarie case 'f':
126702de433dSsemarie if (sscanf(buf + 1, "%li %li", &da, &count) == 2) {
126802de433dSsemarie blist_free(bl, da, count);
126902de433dSsemarie } else {
127002de433dSsemarie printf("?\n");
127102de433dSsemarie }
127202de433dSsemarie break;
127302de433dSsemarie case 'g': {
1274d280b40aSsemarie swblk_t b, e;
127502de433dSsemarie blist_gapfind(bl, &b, &e);
127602de433dSsemarie printf("gapfind: begin=%04lx end=%04lx size=%lu\n",
127702de433dSsemarie b, e, e-b);
127802de433dSsemarie break;
127902de433dSsemarie }
128002de433dSsemarie case 'l':
128102de433dSsemarie if (sscanf(buf + 1, "%li %li", &da, &count) == 2) {
128202de433dSsemarie printf(" n=%lu\n",
128302de433dSsemarie blist_fill(bl, da, count));
128402de433dSsemarie } else {
128502de433dSsemarie printf("?\n");
128602de433dSsemarie }
128702de433dSsemarie break;
128802de433dSsemarie case '?':
128902de433dSsemarie case 'h':
129002de433dSsemarie puts(
129102de433dSsemarie "p -print\n"
129202de433dSsemarie "a %li -allocate\n"
129302de433dSsemarie "f %li %li -free\n"
129402de433dSsemarie "l %li %li -fill\n"
129502de433dSsemarie "g -gapfind\n"
129602de433dSsemarie "r %li -resize\n"
129702de433dSsemarie "h/? -help\n"
129802de433dSsemarie " hex may be specified with 0x prefix\n"
129902de433dSsemarie );
130002de433dSsemarie break;
130102de433dSsemarie default:
130202de433dSsemarie printf("?\n");
130302de433dSsemarie break;
130402de433dSsemarie }
130502de433dSsemarie }
130602de433dSsemarie return(0);
130702de433dSsemarie }
130802de433dSsemarie
130902de433dSsemarie #endif
1310