1 /* 2 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 3 * 4 * Copyright (c) 1998,2004 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * 37 * This module implements a general bitmap allocator/deallocator. The 38 * allocator eats around 2 bits per 'block'. The module does not 39 * try to interpret the meaning of a 'block' other then to return 40 * SWAPBLK_NONE on an allocation failure. 41 * 42 * A radix tree is used to maintain the bitmap. Two radix constants are 43 * involved: One for the bitmaps contained in the leaf nodes (typically 44 * 32), and one for the meta nodes (typically 16). Both meta and leaf 45 * nodes have a hint field. This field gives us a hint as to the largest 46 * free contiguous range of blocks under the node. It may contain a 47 * value that is too high, but will never contain a value that is too 48 * low. When the radix tree is searched, allocation failures in subtrees 49 * update the hint. 50 * 51 * The radix tree also implements two collapsed states for meta nodes: 52 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 53 * in either of these two states, all information contained underneath 54 * the node is considered stale. These states are used to optimize 55 * allocation and freeing operations. 56 * 57 * The hinting greatly increases code efficiency for allocations while 58 * the general radix structure optimizes both allocations and frees. The 59 * radix tree should be able to operate well no matter how much 60 * fragmentation there is and no matter how large a bitmap is used. 61 * 62 * Unlike the rlist code, the blist code wires all necessary memory at 63 * creation time. Neither allocations nor frees require interaction with 64 * the memory subsystem. In contrast, the rlist code may allocate memory 65 * on an rlist_free() call. The non-blocking features of the blist code 66 * are used to great advantage in the swap code (vm/nswap_pager.c). The 67 * rlist code uses a little less overall memory then the blist code (but 68 * due to swap interleaving not all that much less), but the blist code 69 * scales much, much better. 70 * 71 * LAYOUT: The radix tree is layed out recursively using a 72 * linear array. Each meta node is immediately followed (layed out 73 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 74 * is a recursive structure but one that can be easily scanned through 75 * a very simple 'skip' calculation. In order to support large radixes, 76 * portions of the tree may reside outside our memory allocation. We 77 * handle this with an early-termination optimization (when bighint is 78 * set to -1) on the scan. The memory allocation is only large enough 79 * to cover the number of blocks requested at creation time even if it 80 * must be encompassed in larger root-node radix. 81 * 82 * NOTE: The allocator cannot currently allocate more then 83 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 84 * large' if you try. This is an area that could use improvement. The 85 * radix is large enough that this restriction does not effect the swap 86 * system, though. Currently only the allocation code is effected by 87 * this algorithmic unfeature. The freeing code can handle arbitrary 88 * ranges. 89 * 90 * NOTE: The radix may exceed 32 bits in order to support up to 2^31 91 * blocks. The first divison will drop the radix down and fit 92 * it within a signed 32 bit integer. 93 * 94 * This code can be compiled stand-alone for debugging. 95 */ 96 97 #ifdef _KERNEL 98 99 #include <sys/param.h> 100 #include <sys/systm.h> 101 #include <sys/lock.h> 102 #include <sys/kernel.h> 103 #include <sys/blist.h> 104 #include <sys/malloc.h> 105 106 #else 107 108 #ifndef BLIST_NO_DEBUG 109 #define BLIST_DEBUG 110 #endif 111 112 #define SWAPBLK_NONE ((swblk_t)-1) 113 114 #include <sys/types.h> 115 #include <stdio.h> 116 #include <string.h> 117 #include <stdlib.h> 118 #include <stdarg.h> 119 120 #define kmalloc(a,b,c) malloc(a) 121 #define kfree(a,b) free(a) 122 #define kprintf printf 123 #define KKASSERT(exp) 124 125 #include <sys/blist.h> 126 127 void panic(const char *ctl, ...); 128 129 #endif 130 131 /* 132 * static support functions 133 */ 134 135 static swblk_t blst_leaf_alloc(blmeta_t *scan, swblk_t blkat, 136 swblk_t blk, int count); 137 static swblk_t blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 138 swblk_t blk, swblk_t count, 139 int64_t radix, int skip); 140 static void blst_leaf_free(blmeta_t *scan, swblk_t relblk, int count); 141 static void blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 142 int64_t radix, int skip, swblk_t blk); 143 static swblk_t blst_leaf_fill(blmeta_t *scan, swblk_t blk, int count); 144 static swblk_t blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 145 int64_t radix, int skip, swblk_t blk); 146 static void blst_copy(blmeta_t *scan, swblk_t blk, int64_t radix, 147 swblk_t skip, blist_t dest, swblk_t count); 148 static swblk_t blst_radix_init(blmeta_t *scan, int64_t radix, 149 int skip, swblk_t count); 150 #ifndef _KERNEL 151 static void blst_radix_print(blmeta_t *scan, swblk_t blk, 152 int64_t radix, int skip, int tab); 153 #endif 154 155 #ifdef _KERNEL 156 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 157 #endif 158 159 /* 160 * blist_create() - create a blist capable of handling up to the specified 161 * number of blocks 162 * 163 * blocks must be greater then 0 164 * 165 * The smallest blist consists of a single leaf node capable of 166 * managing BLIST_BMAP_RADIX blocks. 167 */ 168 169 blist_t 170 blist_create(swblk_t blocks) 171 { 172 blist_t bl; 173 int64_t radix; 174 int skip = 0; 175 176 /* 177 * Calculate radix and skip field used for scanning. 178 * 179 * Radix can exceed 32 bits even if swblk_t is limited to 32 bits. 180 */ 181 radix = BLIST_BMAP_RADIX; 182 183 while (radix < blocks) { 184 radix *= BLIST_META_RADIX; 185 skip = (skip + 1) * BLIST_META_RADIX; 186 KKASSERT(skip > 0); 187 } 188 189 bl = kmalloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO); 190 191 bl->bl_blocks = blocks; 192 bl->bl_radix = radix; 193 bl->bl_skip = skip; 194 bl->bl_rootblks = 1 + 195 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 196 bl->bl_root = kmalloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK); 197 198 #if defined(BLIST_DEBUG) 199 kprintf( 200 "BLIST representing %d blocks (%d MB of swap)" 201 ", requiring %dK of ram\n", 202 bl->bl_blocks, 203 bl->bl_blocks * 4 / 1024, 204 (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 205 ); 206 kprintf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks); 207 #endif 208 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 209 210 return(bl); 211 } 212 213 void 214 blist_destroy(blist_t bl) 215 { 216 kfree(bl->bl_root, M_SWAP); 217 kfree(bl, M_SWAP); 218 } 219 220 /* 221 * blist_alloc() - reserve space in the block bitmap. Return the base 222 * of a contiguous region or SWAPBLK_NONE if space could 223 * not be allocated. 224 */ 225 226 swblk_t 227 blist_alloc(blist_t bl, swblk_t count) 228 { 229 swblk_t blk = SWAPBLK_NONE; 230 231 if (bl) { 232 if (bl->bl_radix == BLIST_BMAP_RADIX) 233 blk = blst_leaf_alloc(bl->bl_root, 0, 0, count); 234 else 235 blk = blst_meta_alloc(bl->bl_root, 0, 0, count, 236 bl->bl_radix, bl->bl_skip); 237 if (blk != SWAPBLK_NONE) 238 bl->bl_free -= count; 239 } 240 return(blk); 241 } 242 243 swblk_t 244 blist_allocat(blist_t bl, swblk_t count, swblk_t blkat) 245 { 246 swblk_t blk = SWAPBLK_NONE; 247 248 if (bl) { 249 if (bl->bl_radix == BLIST_BMAP_RADIX) 250 blk = blst_leaf_alloc(bl->bl_root, blkat, 0, count); 251 else 252 blk = blst_meta_alloc(bl->bl_root, blkat, 0, count, 253 bl->bl_radix, bl->bl_skip); 254 if (blk != SWAPBLK_NONE) 255 bl->bl_free -= count; 256 } 257 return(blk); 258 } 259 260 /* 261 * blist_free() - free up space in the block bitmap. Return the base 262 * of a contiguous region. Panic if an inconsistancy is 263 * found. 264 */ 265 266 void 267 blist_free(blist_t bl, swblk_t blkno, swblk_t count) 268 { 269 if (bl) { 270 if (bl->bl_radix == BLIST_BMAP_RADIX) 271 blst_leaf_free(bl->bl_root, blkno, count); 272 else 273 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 274 bl->bl_free += count; 275 } 276 } 277 278 /* 279 * blist_fill() - mark a region in the block bitmap as off-limits 280 * to the allocator (i.e. allocate it), ignoring any 281 * existing allocations. Return the number of blocks 282 * actually filled that were free before the call. 283 */ 284 285 swblk_t 286 blist_fill(blist_t bl, swblk_t blkno, swblk_t count) 287 { 288 swblk_t filled; 289 290 if (bl) { 291 if (bl->bl_radix == BLIST_BMAP_RADIX) { 292 filled = blst_leaf_fill(bl->bl_root, blkno, count); 293 } else { 294 filled = blst_meta_fill(bl->bl_root, blkno, count, 295 bl->bl_radix, bl->bl_skip, 0); 296 } 297 bl->bl_free -= filled; 298 return (filled); 299 } else { 300 return 0; 301 } 302 } 303 304 /* 305 * blist_resize() - resize an existing radix tree to handle the 306 * specified number of blocks. This will reallocate 307 * the tree and transfer the previous bitmap to the new 308 * one. When extending the tree you can specify whether 309 * the new blocks are to left allocated or freed. 310 */ 311 312 void 313 blist_resize(blist_t *pbl, swblk_t count, int freenew) 314 { 315 blist_t newbl = blist_create(count); 316 blist_t save = *pbl; 317 318 *pbl = newbl; 319 if (count > save->bl_blocks) 320 count = save->bl_blocks; 321 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 322 323 /* 324 * If resizing upwards, should we free the new space or not? 325 */ 326 if (freenew && count < newbl->bl_blocks) { 327 blist_free(newbl, count, newbl->bl_blocks - count); 328 } 329 blist_destroy(save); 330 } 331 332 #ifdef BLIST_DEBUG 333 334 /* 335 * blist_print() - dump radix tree 336 */ 337 338 void 339 blist_print(blist_t bl) 340 { 341 kprintf("BLIST {\n"); 342 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 343 kprintf("}\n"); 344 } 345 346 #endif 347 348 /************************************************************************ 349 * ALLOCATION SUPPORT FUNCTIONS * 350 ************************************************************************ 351 * 352 * These support functions do all the actual work. They may seem 353 * rather longish, but that's because I've commented them up. The 354 * actual code is straight forward. 355 * 356 */ 357 358 /* 359 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 360 * 361 * This is the core of the allocator and is optimized for the 1 block 362 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 363 * somewhat slower. The 1 block allocation case is log2 and extremely 364 * quick. 365 */ 366 367 static swblk_t 368 blst_leaf_alloc(blmeta_t *scan, swblk_t blkat __unused, swblk_t blk, int count) 369 { 370 u_swblk_t orig = scan->u.bmu_bitmap; 371 372 if (orig == 0) { 373 /* 374 * Optimize bitmap all-allocated case. Also, count = 1 375 * case assumes at least 1 bit is free in the bitmap, so 376 * we have to take care of this case here. 377 */ 378 scan->bm_bighint = 0; 379 return(SWAPBLK_NONE); 380 } 381 if (count == 1) { 382 /* 383 * Optimized code to allocate one bit out of the bitmap 384 */ 385 u_swblk_t mask; 386 int j = BLIST_BMAP_RADIX/2; 387 int r = 0; 388 389 mask = (u_swblk_t)-1 >> (BLIST_BMAP_RADIX/2); 390 391 while (j) { 392 if ((orig & mask) == 0) { 393 r += j; 394 orig >>= j; 395 } 396 j >>= 1; 397 mask >>= j; 398 } 399 scan->u.bmu_bitmap &= ~(1 << r); 400 return(blk + r); 401 } 402 if (count <= BLIST_BMAP_RADIX) { 403 /* 404 * non-optimized code to allocate N bits out of the bitmap. 405 * The more bits, the faster the code runs. It will run 406 * the slowest allocating 2 bits, but since there aren't any 407 * memory ops in the core loop (or shouldn't be, anyway), 408 * you probably won't notice the difference. 409 */ 410 int j; 411 int n = BLIST_BMAP_RADIX - count; 412 u_swblk_t mask; 413 414 mask = (u_swblk_t)-1 >> n; 415 416 for (j = 0; j <= n; ++j) { 417 if ((orig & mask) == mask) { 418 scan->u.bmu_bitmap &= ~mask; 419 return(blk + j); 420 } 421 mask = (mask << 1); 422 } 423 } 424 /* 425 * We couldn't allocate count in this subtree, update bighint. 426 */ 427 scan->bm_bighint = count - 1; 428 return(SWAPBLK_NONE); 429 } 430 431 /* 432 * blist_meta_alloc() - allocate at a meta in the radix tree. 433 * 434 * Attempt to allocate at a meta node. If we can't, we update 435 * bighint and return a failure. Updating bighint optimize future 436 * calls that hit this node. We have to check for our collapse cases 437 * and we have a few optimizations strewn in as well. 438 */ 439 static swblk_t 440 blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 441 swblk_t blk, swblk_t count, 442 int64_t radix, int skip) 443 { 444 int i; 445 int next_skip = ((u_int)skip / BLIST_META_RADIX); 446 int hintok = (blk >= blkat); 447 448 /* 449 * ALL-ALLOCATED special case 450 */ 451 if (scan->u.bmu_avail == 0) { 452 scan->bm_bighint = 0; 453 return(SWAPBLK_NONE); 454 } 455 456 /* 457 * ALL-FREE special case, initialize uninitialized 458 * sublevel. 459 * 460 * NOTE: radix may exceed 32 bits until first division. 461 */ 462 if (scan->u.bmu_avail == radix) { 463 scan->bm_bighint = radix; 464 465 radix /= BLIST_META_RADIX; 466 for (i = 1; i <= skip; i += next_skip) { 467 if (scan[i].bm_bighint == (swblk_t)-1) 468 break; 469 if (next_skip == 1) { 470 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 471 scan[i].bm_bighint = BLIST_BMAP_RADIX; 472 } else { 473 scan[i].bm_bighint = (swblk_t)radix; 474 scan[i].u.bmu_avail = (swblk_t)radix; 475 } 476 } 477 } else { 478 radix /= BLIST_META_RADIX; 479 } 480 481 for (i = 1; i <= skip; i += next_skip) { 482 if (count <= scan[i].bm_bighint && 483 blk + (swblk_t)radix > blkat) { 484 /* 485 * count fits in object 486 */ 487 swblk_t r; 488 if (next_skip == 1) { 489 r = blst_leaf_alloc(&scan[i], blkat, 490 blk, count); 491 } else { 492 r = blst_meta_alloc(&scan[i], blkat, 493 blk, count, 494 radix, next_skip - 1); 495 } 496 if (r != SWAPBLK_NONE) { 497 scan->u.bmu_avail -= count; 498 if (scan->bm_bighint > scan->u.bmu_avail) 499 scan->bm_bighint = scan->u.bmu_avail; 500 return(r); 501 } 502 /* bighint was updated by recursion */ 503 } else if (scan[i].bm_bighint == (swblk_t)-1) { 504 /* 505 * Terminator 506 */ 507 break; 508 } else if (count > (swblk_t)radix) { 509 /* 510 * count does not fit in object even if it were 511 * complete free. 512 */ 513 panic("blist_meta_alloc: allocation too large"); 514 } 515 blk += (swblk_t)radix; 516 } 517 518 /* 519 * We couldn't allocate count in this subtree, update bighint. 520 */ 521 if (hintok && scan->bm_bighint >= count) 522 scan->bm_bighint = count - 1; 523 return(SWAPBLK_NONE); 524 } 525 526 /* 527 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 528 */ 529 static void 530 blst_leaf_free(blmeta_t *scan, swblk_t blk, int count) 531 { 532 /* 533 * free some data in this bitmap 534 * 535 * e.g. 536 * 0000111111111110000 537 * \_________/\__/ 538 * v n 539 */ 540 int n = blk & (BLIST_BMAP_RADIX - 1); 541 u_swblk_t mask; 542 543 mask = ((u_swblk_t)-1 << n) & 544 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 545 546 if (scan->u.bmu_bitmap & mask) 547 panic("blst_radix_free: freeing free block"); 548 scan->u.bmu_bitmap |= mask; 549 550 /* 551 * We could probably do a better job here. We are required to make 552 * bighint at least as large as the biggest contiguous block of 553 * data. If we just shoehorn it, a little extra overhead will 554 * be incured on the next allocation (but only that one typically). 555 */ 556 scan->bm_bighint = BLIST_BMAP_RADIX; 557 } 558 559 /* 560 * BLST_META_FREE() - free allocated blocks from radix tree meta info 561 * 562 * This support routine frees a range of blocks from the bitmap. 563 * The range must be entirely enclosed by this radix node. If a 564 * meta node, we break the range down recursively to free blocks 565 * in subnodes (which means that this code can free an arbitrary 566 * range whereas the allocation code cannot allocate an arbitrary 567 * range). 568 */ 569 570 static void 571 blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 572 int64_t radix, int skip, swblk_t blk) 573 { 574 int i; 575 int next_skip = ((u_int)skip / BLIST_META_RADIX); 576 577 #if 0 578 kprintf("FREE (%x,%d) FROM (%x,%lld)\n", 579 freeBlk, count, 580 blk, (long long)radix 581 ); 582 #endif 583 584 /* 585 * ALL-ALLOCATED special case, initialize for recursion. 586 * 587 * We will short-cut the ALL-ALLOCATED -> ALL-FREE case. 588 */ 589 if (scan->u.bmu_avail == 0) { 590 scan->u.bmu_avail = count; 591 scan->bm_bighint = count; 592 593 if (count != radix) { 594 for (i = 1; i <= skip; i += next_skip) { 595 if (scan[i].bm_bighint == (swblk_t)-1) 596 break; 597 scan[i].bm_bighint = 0; 598 if (next_skip == 1) { 599 scan[i].u.bmu_bitmap = 0; 600 } else { 601 scan[i].u.bmu_avail = 0; 602 } 603 } 604 /* fall through */ 605 } 606 } else { 607 scan->u.bmu_avail += count; 608 /* scan->bm_bighint = radix; */ 609 } 610 611 /* 612 * ALL-FREE special case. 613 * 614 * Set bighint for higher levels to snoop. 615 */ 616 if (scan->u.bmu_avail == radix) { 617 scan->bm_bighint = radix; 618 return; 619 } 620 621 /* 622 * Break the free down into its components 623 */ 624 if (scan->u.bmu_avail > radix) { 625 panic("blst_meta_free: freeing already " 626 "free blocks (%d) %d/%lld", 627 count, scan->u.bmu_avail, (long long)radix); 628 } 629 630 radix /= BLIST_META_RADIX; 631 632 i = (freeBlk - blk) / (swblk_t)radix; 633 blk += i * (swblk_t)radix; 634 i = i * next_skip + 1; 635 636 while (i <= skip && blk < freeBlk + count) { 637 swblk_t v; 638 639 v = blk + (swblk_t)radix - freeBlk; 640 if (v > count) 641 v = count; 642 643 if (scan->bm_bighint == (swblk_t)-1) 644 panic("blst_meta_free: freeing unexpected range"); 645 646 if (next_skip == 1) { 647 blst_leaf_free(&scan[i], freeBlk, v); 648 } else { 649 blst_meta_free(&scan[i], freeBlk, v, 650 radix, next_skip - 1, blk); 651 } 652 653 /* 654 * After having dealt with the becomes-all-free case any 655 * partial free will not be able to bring us to the 656 * becomes-all-free state. 657 * 658 * We can raise bighint to at least the sub-segment's 659 * bighint. 660 */ 661 if (scan->bm_bighint < scan[i].bm_bighint) { 662 scan->bm_bighint = scan[i].bm_bighint; 663 } 664 count -= v; 665 freeBlk += v; 666 blk += (swblk_t)radix; 667 i += next_skip; 668 } 669 } 670 671 /* 672 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 673 * 674 * Allocates all blocks in the specified range regardless of 675 * any existing allocations in that range. Returns the number 676 * of blocks allocated by the call. 677 */ 678 static swblk_t 679 blst_leaf_fill(blmeta_t *scan, swblk_t blk, int count) 680 { 681 int n = blk & (BLIST_BMAP_RADIX - 1); 682 swblk_t nblks; 683 u_swblk_t mask, bitmap; 684 685 mask = ((u_swblk_t)-1 << n) & 686 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 687 688 /* Count the number of blocks we're about to allocate */ 689 bitmap = scan->u.bmu_bitmap & mask; 690 for (nblks = 0; bitmap != 0; nblks++) 691 bitmap &= bitmap - 1; 692 693 scan->u.bmu_bitmap &= ~mask; 694 return (nblks); 695 } 696 697 /* 698 * BLST_META_FILL() - allocate specific blocks at a meta node 699 * 700 * Allocates the specified range of blocks, regardless of 701 * any existing allocations in the range. The range must 702 * be within the extent of this node. Returns the number 703 * of blocks allocated by the call. 704 */ 705 static swblk_t 706 blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 707 int64_t radix, int skip, swblk_t blk) 708 { 709 int i; 710 int next_skip = ((u_int)skip / BLIST_META_RADIX); 711 swblk_t nblks = 0; 712 713 if (count == radix || scan->u.bmu_avail == 0) { 714 /* 715 * ALL-ALLOCATED special case 716 */ 717 nblks = scan->u.bmu_avail; 718 scan->u.bmu_avail = 0; 719 scan->bm_bighint = count; 720 return (nblks); 721 } 722 723 if (scan->u.bmu_avail == radix) { 724 radix /= BLIST_META_RADIX; 725 726 /* 727 * ALL-FREE special case, initialize sublevel 728 */ 729 for (i = 1; i <= skip; i += next_skip) { 730 if (scan[i].bm_bighint == (swblk_t)-1) 731 break; 732 if (next_skip == 1) { 733 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 734 scan[i].bm_bighint = BLIST_BMAP_RADIX; 735 } else { 736 scan[i].bm_bighint = (swblk_t)radix; 737 scan[i].u.bmu_avail = (swblk_t)radix; 738 } 739 } 740 } else { 741 radix /= BLIST_META_RADIX; 742 } 743 744 if (count > (swblk_t)radix) 745 panic("blst_meta_fill: allocation too large"); 746 747 i = (fillBlk - blk) / (swblk_t)radix; 748 blk += i * (swblk_t)radix; 749 i = i * next_skip + 1; 750 751 while (i <= skip && blk < fillBlk + count) { 752 swblk_t v; 753 754 v = blk + (swblk_t)radix - fillBlk; 755 if (v > count) 756 v = count; 757 758 if (scan->bm_bighint == (swblk_t)-1) 759 panic("blst_meta_fill: filling unexpected range"); 760 761 if (next_skip == 1) { 762 nblks += blst_leaf_fill(&scan[i], fillBlk, v); 763 } else { 764 nblks += blst_meta_fill(&scan[i], fillBlk, v, 765 radix, next_skip - 1, blk); 766 } 767 count -= v; 768 fillBlk += v; 769 blk += (swblk_t)radix; 770 i += next_skip; 771 } 772 scan->u.bmu_avail -= nblks; 773 return (nblks); 774 } 775 776 /* 777 * BLIST_RADIX_COPY() - copy one radix tree to another 778 * 779 * Locates free space in the source tree and frees it in the destination 780 * tree. The space may not already be free in the destination. 781 */ 782 783 static void 784 blst_copy(blmeta_t *scan, swblk_t blk, int64_t radix, 785 swblk_t skip, blist_t dest, swblk_t count) 786 { 787 int next_skip; 788 int i; 789 790 /* 791 * Leaf node 792 */ 793 794 if (radix == BLIST_BMAP_RADIX) { 795 u_swblk_t v = scan->u.bmu_bitmap; 796 797 if (v == (u_swblk_t)-1) { 798 blist_free(dest, blk, count); 799 } else if (v != 0) { 800 int i; 801 802 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 803 if (v & (1 << i)) 804 blist_free(dest, blk + i, 1); 805 } 806 } 807 return; 808 } 809 810 /* 811 * Meta node 812 */ 813 814 if (scan->u.bmu_avail == 0) { 815 /* 816 * Source all allocated, leave dest allocated 817 */ 818 return; 819 } 820 if (scan->u.bmu_avail == radix) { 821 /* 822 * Source all free, free entire dest 823 */ 824 if (count < radix) 825 blist_free(dest, blk, count); 826 else 827 blist_free(dest, blk, (swblk_t)radix); 828 return; 829 } 830 831 832 radix /= BLIST_META_RADIX; 833 next_skip = ((u_int)skip / BLIST_META_RADIX); 834 835 for (i = 1; count && i <= skip; i += next_skip) { 836 if (scan[i].bm_bighint == (swblk_t)-1) 837 break; 838 839 if (count >= (swblk_t)radix) { 840 blst_copy( 841 &scan[i], 842 blk, 843 radix, 844 next_skip - 1, 845 dest, 846 (swblk_t)radix 847 ); 848 count -= (swblk_t)radix; 849 } else { 850 if (count) { 851 blst_copy( 852 &scan[i], 853 blk, 854 radix, 855 next_skip - 1, 856 dest, 857 count 858 ); 859 } 860 count = 0; 861 } 862 blk += (swblk_t)radix; 863 } 864 } 865 866 /* 867 * BLST_RADIX_INIT() - initialize radix tree 868 * 869 * Initialize our meta structures and bitmaps and calculate the exact 870 * amount of space required to manage 'count' blocks - this space may 871 * be considerably less then the calculated radix due to the large 872 * RADIX values we use. 873 */ 874 875 static swblk_t 876 blst_radix_init(blmeta_t *scan, int64_t radix, int skip, swblk_t count) 877 { 878 int i; 879 int next_skip; 880 swblk_t memindex = 0; 881 882 /* 883 * Leaf node 884 */ 885 886 if (radix == BLIST_BMAP_RADIX) { 887 if (scan) { 888 scan->bm_bighint = 0; 889 scan->u.bmu_bitmap = 0; 890 } 891 return(memindex); 892 } 893 894 /* 895 * Meta node. If allocating the entire object we can special 896 * case it. However, we need to figure out how much memory 897 * is required to manage 'count' blocks, so we continue on anyway. 898 */ 899 900 if (scan) { 901 scan->bm_bighint = 0; 902 scan->u.bmu_avail = 0; 903 } 904 905 radix /= BLIST_META_RADIX; 906 next_skip = ((u_int)skip / BLIST_META_RADIX); 907 908 for (i = 1; i <= skip; i += next_skip) { 909 if (count >= (swblk_t)radix) { 910 /* 911 * Allocate the entire object 912 */ 913 memindex = i + blst_radix_init( 914 ((scan) ? &scan[i] : NULL), 915 radix, 916 next_skip - 1, 917 (swblk_t)radix 918 ); 919 count -= (swblk_t)radix; 920 } else if (count > 0) { 921 /* 922 * Allocate a partial object 923 */ 924 memindex = i + blst_radix_init( 925 ((scan) ? &scan[i] : NULL), 926 radix, 927 next_skip - 1, 928 count 929 ); 930 count = 0; 931 } else { 932 /* 933 * Add terminator and break out 934 */ 935 if (scan) 936 scan[i].bm_bighint = (swblk_t)-1; 937 break; 938 } 939 } 940 if (memindex < i) 941 memindex = i; 942 return(memindex); 943 } 944 945 #ifdef BLIST_DEBUG 946 947 static void 948 blst_radix_print(blmeta_t *scan, swblk_t blk, int64_t radix, int skip, int tab) 949 { 950 int i; 951 int next_skip; 952 953 if (radix == BLIST_BMAP_RADIX) { 954 kprintf( 955 "%*.*s(%04x,%lld): bitmap %08x big=%d\n", 956 tab, tab, "", 957 blk, (long long)radix, 958 scan->u.bmu_bitmap, 959 scan->bm_bighint 960 ); 961 return; 962 } 963 964 if (scan->u.bmu_avail == 0) { 965 kprintf( 966 "%*.*s(%04x,%lld) ALL ALLOCATED\n", 967 tab, tab, "", 968 blk, 969 (long long)radix 970 ); 971 return; 972 } 973 if (scan->u.bmu_avail == radix) { 974 kprintf( 975 "%*.*s(%04x,%lld) ALL FREE\n", 976 tab, tab, "", 977 blk, 978 (long long)radix 979 ); 980 return; 981 } 982 983 kprintf( 984 "%*.*s(%04x,%lld): subtree (%d/%lld) big=%d {\n", 985 tab, tab, "", 986 blk, (long long)radix, 987 scan->u.bmu_avail, 988 (long long)radix, 989 scan->bm_bighint 990 ); 991 992 radix /= BLIST_META_RADIX; 993 next_skip = ((u_int)skip / BLIST_META_RADIX); 994 tab += 4; 995 996 for (i = 1; i <= skip; i += next_skip) { 997 if (scan[i].bm_bighint == (swblk_t)-1) { 998 kprintf( 999 "%*.*s(%04x,%lld): Terminator\n", 1000 tab, tab, "", 1001 blk, (long long)radix 1002 ); 1003 break; 1004 } 1005 blst_radix_print( 1006 &scan[i], 1007 blk, 1008 radix, 1009 next_skip - 1, 1010 tab 1011 ); 1012 blk += (swblk_t)radix; 1013 } 1014 tab -= 4; 1015 1016 kprintf( 1017 "%*.*s}\n", 1018 tab, tab, "" 1019 ); 1020 } 1021 1022 #endif 1023 1024 #ifdef BLIST_DEBUG 1025 1026 int 1027 main(int ac, char **av) 1028 { 1029 int size = 1024; 1030 int i; 1031 blist_t bl; 1032 1033 for (i = 1; i < ac; ++i) { 1034 const char *ptr = av[i]; 1035 if (*ptr != '-') { 1036 size = strtol(ptr, NULL, 0); 1037 continue; 1038 } 1039 ptr += 2; 1040 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1041 exit(1); 1042 } 1043 bl = blist_create(size); 1044 blist_free(bl, 0, size); 1045 1046 for (;;) { 1047 char buf[1024]; 1048 swblk_t da = 0; 1049 swblk_t count = 0; 1050 swblk_t blkat; 1051 1052 1053 kprintf("%d/%d/%lld> ", 1054 bl->bl_free, size, (long long)bl->bl_radix); 1055 fflush(stdout); 1056 if (fgets(buf, sizeof(buf), stdin) == NULL) 1057 break; 1058 switch(buf[0]) { 1059 case 'r': 1060 if (sscanf(buf + 1, "%d", &count) == 1) { 1061 blist_resize(&bl, count, 1); 1062 size = count; 1063 } else { 1064 kprintf("?\n"); 1065 } 1066 case 'p': 1067 blist_print(bl); 1068 break; 1069 case 'a': 1070 if (sscanf(buf + 1, "%d %d", &count, &blkat) == 1) { 1071 swblk_t blk = blist_alloc(bl, count); 1072 kprintf(" R=%04x\n", blk); 1073 } else if (sscanf(buf + 1, "%d %d", &count, &blkat) == 2) { 1074 swblk_t blk = blist_allocat(bl, count, blkat); 1075 kprintf(" R=%04x\n", blk); 1076 } else { 1077 kprintf("?\n"); 1078 } 1079 break; 1080 case 'f': 1081 if (sscanf(buf + 1, "%x %d", &da, &count) == 2) { 1082 blist_free(bl, da, count); 1083 } else { 1084 kprintf("?\n"); 1085 } 1086 break; 1087 case 'l': 1088 if (sscanf(buf + 1, "%x %d", &da, &count) == 2) { 1089 printf(" n=%d\n", 1090 blist_fill(bl, da, count)); 1091 } else { 1092 kprintf("?\n"); 1093 } 1094 break; 1095 case '?': 1096 case 'h': 1097 puts( 1098 "p -print\n" 1099 "a %d -allocate\n" 1100 "f %x %d -free\n" 1101 "l %x %d -fill\n" 1102 "r %d -resize\n" 1103 "h/? -help" 1104 ); 1105 break; 1106 default: 1107 kprintf("?\n"); 1108 break; 1109 } 1110 } 1111 return(0); 1112 } 1113 1114 void 1115 panic(const char *ctl, ...) 1116 { 1117 __va_list va; 1118 1119 __va_start(va, ctl); 1120 vfprintf(stderr, ctl, va); 1121 fprintf(stderr, "\n"); 1122 __va_end(va); 1123 exit(1); 1124 } 1125 1126 #endif 1127 1128