1 /* 2 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting 3 * 4 * Copyright (c) 1998,2004 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * 37 * This module implements a general bitmap allocator/deallocator. The 38 * allocator eats around 2 bits per 'block'. The module does not 39 * try to interpret the meaning of a 'block' other then to return 40 * SWAPBLK_NONE on an allocation failure. 41 * 42 * A radix tree is used to maintain the bitmap. Two radix constants are 43 * involved: One for the bitmaps contained in the leaf nodes (typically 44 * 32), and one for the meta nodes (typically 16). Both meta and leaf 45 * nodes have a hint field. This field gives us a hint as to the largest 46 * free contiguous range of blocks under the node. It may contain a 47 * value that is too high, but will never contain a value that is too 48 * low. When the radix tree is searched, allocation failures in subtrees 49 * update the hint. 50 * 51 * The radix tree also implements two collapsed states for meta nodes: 52 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is 53 * in either of these two states, all information contained underneath 54 * the node is considered stale. These states are used to optimize 55 * allocation and freeing operations. 56 * 57 * The hinting greatly increases code efficiency for allocations while 58 * the general radix structure optimizes both allocations and frees. The 59 * radix tree should be able to operate well no matter how much 60 * fragmentation there is and no matter how large a bitmap is used. 61 * 62 * Unlike the rlist code, the blist code wires all necessary memory at 63 * creation time. Neither allocations nor frees require interaction with 64 * the memory subsystem. In contrast, the rlist code may allocate memory 65 * on an rlist_free() call. The non-blocking features of the blist code 66 * are used to great advantage in the swap code (vm/nswap_pager.c). The 67 * rlist code uses a little less overall memory then the blist code (but 68 * due to swap interleaving not all that much less), but the blist code 69 * scales much, much better. 70 * 71 * LAYOUT: The radix tree is layed out recursively using a 72 * linear array. Each meta node is immediately followed (layed out 73 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This 74 * is a recursive structure but one that can be easily scanned through 75 * a very simple 'skip' calculation. In order to support large radixes, 76 * portions of the tree may reside outside our memory allocation. We 77 * handle this with an early-termination optimization (when bighint is 78 * set to -1) on the scan. The memory allocation is only large enough 79 * to cover the number of blocks requested at creation time even if it 80 * must be encompassed in larger root-node radix. 81 * 82 * NOTE: The allocator cannot currently allocate more then 83 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too 84 * large' if you try. This is an area that could use improvement. The 85 * radix is large enough that this restriction does not effect the swap 86 * system, though. Currently only the allocation code is effected by 87 * this algorithmic unfeature. The freeing code can handle arbitrary 88 * ranges. 89 * 90 * NOTE: The radix may exceed BLIST_BMAP_RADIX bits in order to support 91 * up to 2^(BLIST_BMAP_RADIX-1) blocks. The first divison will 92 * drop the radix down and fit it within a signed BLIST_BMAP_RADIX 93 * bit integer. 94 * 95 * This code can be compiled stand-alone for debugging. 96 */ 97 98 #ifdef _KERNEL 99 100 #include <sys/param.h> 101 #include <sys/systm.h> 102 #include <sys/lock.h> 103 #include <sys/kernel.h> 104 #include <sys/blist.h> 105 #include <sys/malloc.h> 106 107 #else 108 109 #ifndef BLIST_NO_DEBUG 110 #define BLIST_DEBUG 111 #endif 112 113 #define SWAPBLK_NONE ((swblk_t)-1) 114 115 #include <sys/types.h> 116 #include <stdio.h> 117 #include <string.h> 118 #include <stdlib.h> 119 #include <stdarg.h> 120 121 #define kmalloc(a,b,c) malloc(a) 122 #define kfree(a,b) free(a) 123 #define kprintf printf 124 #define KKASSERT(exp) 125 126 #include <sys/blist.h> 127 128 void panic(const char *ctl, ...); 129 130 #endif 131 132 /* 133 * static support functions 134 */ 135 136 static swblk_t blst_leaf_alloc(blmeta_t *scan, swblk_t blkat, 137 swblk_t blk, swblk_t count); 138 static swblk_t blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 139 swblk_t blk, swblk_t count, 140 int64_t radix, swblk_t skip); 141 static void blst_leaf_free(blmeta_t *scan, swblk_t relblk, swblk_t count); 142 static void blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 143 int64_t radix, swblk_t skip, 144 swblk_t blk); 145 static swblk_t blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count); 146 static swblk_t blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 147 int64_t radix, swblk_t skip, 148 swblk_t blk); 149 static void blst_copy(blmeta_t *scan, swblk_t blk, int64_t radix, 150 swblk_t skip, blist_t dest, swblk_t count); 151 static swblk_t blst_radix_init(blmeta_t *scan, int64_t radix, 152 swblk_t skip, swblk_t count); 153 #ifndef _KERNEL 154 static void blst_radix_print(blmeta_t *scan, swblk_t blk, 155 int64_t radix, swblk_t skip, int tab); 156 #endif 157 158 #ifdef _KERNEL 159 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space"); 160 #endif 161 162 /* 163 * blist_create() - create a blist capable of handling up to the specified 164 * number of blocks 165 * 166 * blocks must be greater then 0 167 * 168 * The smallest blist consists of a single leaf node capable of 169 * managing BLIST_BMAP_RADIX blocks. 170 */ 171 172 blist_t 173 blist_create(swblk_t blocks) 174 { 175 blist_t bl; 176 int64_t radix; 177 swblk_t skip = 0; 178 179 /* 180 * Calculate radix and skip field used for scanning. 181 * 182 * Radix can exceed BLIST_BMAP_RADIX bits even if swblk_t is limited 183 * to BLIST_BMAP_RADIX bits. 184 */ 185 radix = BLIST_BMAP_RADIX; 186 187 while (radix < blocks) { 188 radix *= BLIST_META_RADIX; 189 skip = (skip + 1) * BLIST_META_RADIX; 190 KKASSERT(skip > 0); 191 } 192 193 bl = kmalloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO); 194 195 bl->bl_blocks = blocks; 196 bl->bl_radix = radix; 197 bl->bl_skip = skip; 198 bl->bl_rootblks = 1 + 199 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks); 200 bl->bl_root = kmalloc(sizeof(blmeta_t) * bl->bl_rootblks, 201 M_SWAP, M_WAITOK); 202 203 #if defined(BLIST_DEBUG) 204 kprintf( 205 "BLIST representing %d blocks (%d MB of swap)" 206 ", requiring %dK of ram\n", 207 bl->bl_blocks, 208 bl->bl_blocks * 4 / 1024, 209 (bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024 210 ); 211 kprintf("BLIST raw radix tree contains %d records\n", bl->bl_rootblks); 212 #endif 213 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks); 214 215 return(bl); 216 } 217 218 void 219 blist_destroy(blist_t bl) 220 { 221 kfree(bl->bl_root, M_SWAP); 222 kfree(bl, M_SWAP); 223 } 224 225 /* 226 * blist_alloc() - reserve space in the block bitmap. Return the base 227 * of a contiguous region or SWAPBLK_NONE if space could 228 * not be allocated. 229 */ 230 231 swblk_t 232 blist_alloc(blist_t bl, swblk_t count) 233 { 234 swblk_t blk = SWAPBLK_NONE; 235 236 if (bl) { 237 if (bl->bl_radix == BLIST_BMAP_RADIX) 238 blk = blst_leaf_alloc(bl->bl_root, 0, 0, count); 239 else 240 blk = blst_meta_alloc(bl->bl_root, 0, 0, count, 241 bl->bl_radix, bl->bl_skip); 242 if (blk != SWAPBLK_NONE) 243 bl->bl_free -= count; 244 } 245 return(blk); 246 } 247 248 swblk_t 249 blist_allocat(blist_t bl, swblk_t count, swblk_t blkat) 250 { 251 swblk_t blk = SWAPBLK_NONE; 252 253 if (bl) { 254 if (bl->bl_radix == BLIST_BMAP_RADIX) 255 blk = blst_leaf_alloc(bl->bl_root, blkat, 0, count); 256 else 257 blk = blst_meta_alloc(bl->bl_root, blkat, 0, count, 258 bl->bl_radix, bl->bl_skip); 259 if (blk != SWAPBLK_NONE) 260 bl->bl_free -= count; 261 } 262 return(blk); 263 } 264 265 /* 266 * blist_free() - free up space in the block bitmap. Return the base 267 * of a contiguous region. Panic if an inconsistancy is 268 * found. 269 */ 270 271 void 272 blist_free(blist_t bl, swblk_t blkno, swblk_t count) 273 { 274 if (bl) { 275 if (bl->bl_radix == BLIST_BMAP_RADIX) 276 blst_leaf_free(bl->bl_root, blkno, count); 277 else 278 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0); 279 bl->bl_free += count; 280 } 281 } 282 283 /* 284 * blist_fill() - mark a region in the block bitmap as off-limits 285 * to the allocator (i.e. allocate it), ignoring any 286 * existing allocations. Return the number of blocks 287 * actually filled that were free before the call. 288 */ 289 290 swblk_t 291 blist_fill(blist_t bl, swblk_t blkno, swblk_t count) 292 { 293 swblk_t filled; 294 295 if (bl) { 296 if (bl->bl_radix == BLIST_BMAP_RADIX) { 297 filled = blst_leaf_fill(bl->bl_root, blkno, count); 298 } else { 299 filled = blst_meta_fill(bl->bl_root, blkno, count, 300 bl->bl_radix, bl->bl_skip, 0); 301 } 302 bl->bl_free -= filled; 303 return (filled); 304 } else { 305 return 0; 306 } 307 } 308 309 /* 310 * blist_resize() - resize an existing radix tree to handle the 311 * specified number of blocks. This will reallocate 312 * the tree and transfer the previous bitmap to the new 313 * one. When extending the tree you can specify whether 314 * the new blocks are to left allocated or freed. 315 */ 316 317 void 318 blist_resize(blist_t *pbl, swblk_t count, int freenew) 319 { 320 blist_t newbl = blist_create(count); 321 blist_t save = *pbl; 322 323 *pbl = newbl; 324 if (count > save->bl_blocks) 325 count = save->bl_blocks; 326 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count); 327 328 /* 329 * If resizing upwards, should we free the new space or not? 330 */ 331 if (freenew && count < newbl->bl_blocks) { 332 blist_free(newbl, count, newbl->bl_blocks - count); 333 } 334 blist_destroy(save); 335 } 336 337 #ifdef BLIST_DEBUG 338 339 /* 340 * blist_print() - dump radix tree 341 */ 342 343 void 344 blist_print(blist_t bl) 345 { 346 kprintf("BLIST {\n"); 347 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4); 348 kprintf("}\n"); 349 } 350 351 #endif 352 353 /************************************************************************ 354 * ALLOCATION SUPPORT FUNCTIONS * 355 ************************************************************************ 356 * 357 * These support functions do all the actual work. They may seem 358 * rather longish, but that's because I've commented them up. The 359 * actual code is straight forward. 360 * 361 */ 362 363 /* 364 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 365 * 366 * This is the core of the allocator and is optimized for the 1 block 367 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are 368 * somewhat slower. The 1 block allocation case is log2 and extremely 369 * quick. 370 */ 371 372 static swblk_t 373 blst_leaf_alloc(blmeta_t *scan, swblk_t blkat __unused, swblk_t blk, 374 swblk_t count) 375 { 376 u_swblk_t orig = scan->u.bmu_bitmap; 377 378 if (orig == 0) { 379 /* 380 * Optimize bitmap all-allocated case. Also, count = 1 381 * case assumes at least 1 bit is free in the bitmap, so 382 * we have to take care of this case here. 383 */ 384 scan->bm_bighint = 0; 385 return(SWAPBLK_NONE); 386 } 387 if (count == 1) { 388 /* 389 * Optimized code to allocate one bit out of the bitmap 390 */ 391 u_swblk_t mask; 392 int j = BLIST_BMAP_RADIX/2; 393 int r = 0; 394 395 mask = (u_swblk_t)-1 >> (BLIST_BMAP_RADIX/2); 396 397 while (j) { 398 if ((orig & mask) == 0) { 399 r += j; 400 orig >>= j; 401 } 402 j >>= 1; 403 mask >>= j; 404 } 405 scan->u.bmu_bitmap &= ~((swblk_t)1 << r); 406 return(blk + r); 407 } 408 if (count <= BLIST_BMAP_RADIX) { 409 /* 410 * non-optimized code to allocate N bits out of the bitmap. 411 * The more bits, the faster the code runs. It will run 412 * the slowest allocating 2 bits, but since there aren't any 413 * memory ops in the core loop (or shouldn't be, anyway), 414 * you probably won't notice the difference. 415 */ 416 int j; 417 int n = (int)(BLIST_BMAP_RADIX - count); 418 u_swblk_t mask; 419 420 mask = (u_swblk_t)-1 >> n; 421 422 for (j = 0; j <= n; ++j) { 423 if ((orig & mask) == mask) { 424 scan->u.bmu_bitmap &= ~mask; 425 return(blk + j); 426 } 427 mask = (mask << 1); 428 } 429 } 430 431 /* 432 * We couldn't allocate count in this subtree, update bighint. 433 */ 434 scan->bm_bighint = count - 1; 435 436 return(SWAPBLK_NONE); 437 } 438 439 /* 440 * blist_meta_alloc() - allocate at a meta in the radix tree. 441 * 442 * Attempt to allocate at a meta node. If we can't, we update 443 * bighint and return a failure. Updating bighint optimize future 444 * calls that hit this node. We have to check for our collapse cases 445 * and we have a few optimizations strewn in as well. 446 */ 447 static swblk_t 448 blst_meta_alloc(blmeta_t *scan, swblk_t blkat, 449 swblk_t blk, swblk_t count, 450 int64_t radix, swblk_t skip) 451 { 452 int hintok = (blk >= blkat); 453 swblk_t next_skip = ((u_int)skip / BLIST_META_RADIX); 454 swblk_t i; 455 456 /* 457 * ALL-ALLOCATED special case 458 */ 459 if (scan->u.bmu_avail == 0) { 460 scan->bm_bighint = 0; 461 return(SWAPBLK_NONE); 462 } 463 464 /* 465 * ALL-FREE special case, initialize uninitialized 466 * sublevel. 467 * 468 * NOTE: radix may exceed 32 bits until first division. 469 */ 470 if (scan->u.bmu_avail == radix) { 471 scan->bm_bighint = radix; 472 473 radix /= BLIST_META_RADIX; 474 for (i = 1; i <= skip; i += next_skip) { 475 if (scan[i].bm_bighint == (swblk_t)-1) 476 break; 477 if (next_skip == 1) { 478 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 479 scan[i].bm_bighint = BLIST_BMAP_RADIX; 480 } else { 481 scan[i].bm_bighint = (swblk_t)radix; 482 scan[i].u.bmu_avail = (swblk_t)radix; 483 } 484 } 485 } else { 486 radix /= BLIST_META_RADIX; 487 } 488 489 for (i = 1; i <= skip; i += next_skip) { 490 if (count <= scan[i].bm_bighint && 491 blk + (swblk_t)radix > blkat) { 492 /* 493 * count fits in object 494 */ 495 swblk_t r; 496 if (next_skip == 1) { 497 r = blst_leaf_alloc(&scan[i], blkat, 498 blk, count); 499 } else { 500 r = blst_meta_alloc(&scan[i], blkat, 501 blk, count, 502 radix, next_skip - 1); 503 } 504 if (r != SWAPBLK_NONE) { 505 scan->u.bmu_avail -= count; 506 if (scan->bm_bighint > scan->u.bmu_avail) 507 scan->bm_bighint = scan->u.bmu_avail; 508 return(r); 509 } 510 /* bighint was updated by recursion */ 511 } else if (scan[i].bm_bighint == (swblk_t)-1) { 512 /* 513 * Terminator 514 */ 515 break; 516 } else if (count > (swblk_t)radix) { 517 /* 518 * count does not fit in object even if it were 519 * complete free. 520 */ 521 panic("blist_meta_alloc: allocation too large"); 522 } 523 blk += (swblk_t)radix; 524 } 525 526 /* 527 * We couldn't allocate count in this subtree, update bighint. 528 */ 529 if (hintok && scan->bm_bighint >= count) 530 scan->bm_bighint = count - 1; 531 return(SWAPBLK_NONE); 532 } 533 534 /* 535 * BLST_LEAF_FREE() - free allocated block from leaf bitmap 536 */ 537 static void 538 blst_leaf_free(blmeta_t *scan, swblk_t blk, swblk_t count) 539 { 540 /* 541 * free some data in this bitmap 542 * 543 * e.g. 544 * 0000111111111110000 545 * \_________/\__/ 546 * v n 547 */ 548 int n = blk & (BLIST_BMAP_RADIX - 1); 549 u_swblk_t mask; 550 551 mask = ((u_swblk_t)-1 << n) & 552 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 553 554 if (scan->u.bmu_bitmap & mask) 555 panic("blst_radix_free: freeing free block"); 556 scan->u.bmu_bitmap |= mask; 557 558 /* 559 * We could probably do a better job here. We are required to make 560 * bighint at least as large as the biggest contiguous block of 561 * data. If we just shoehorn it, a little extra overhead will 562 * be incured on the next allocation (but only that one typically). 563 */ 564 scan->bm_bighint = BLIST_BMAP_RADIX; 565 } 566 567 /* 568 * BLST_META_FREE() - free allocated blocks from radix tree meta info 569 * 570 * This support routine frees a range of blocks from the bitmap. 571 * The range must be entirely enclosed by this radix node. If a 572 * meta node, we break the range down recursively to free blocks 573 * in subnodes (which means that this code can free an arbitrary 574 * range whereas the allocation code cannot allocate an arbitrary 575 * range). 576 */ 577 578 static void 579 blst_meta_free(blmeta_t *scan, swblk_t freeBlk, swblk_t count, 580 int64_t radix, swblk_t skip, swblk_t blk) 581 { 582 swblk_t i; 583 swblk_t next_skip = ((u_int)skip / BLIST_META_RADIX); 584 585 #if 0 586 kprintf("FREE (%x,%d) FROM (%x,%lld)\n", 587 freeBlk, count, 588 blk, (long long)radix 589 ); 590 #endif 591 592 /* 593 * ALL-ALLOCATED special case, initialize for recursion. 594 * 595 * We will short-cut the ALL-ALLOCATED -> ALL-FREE case. 596 */ 597 if (scan->u.bmu_avail == 0) { 598 scan->u.bmu_avail = count; 599 scan->bm_bighint = count; 600 601 if (count != radix) { 602 for (i = 1; i <= skip; i += next_skip) { 603 if (scan[i].bm_bighint == (swblk_t)-1) 604 break; 605 scan[i].bm_bighint = 0; 606 if (next_skip == 1) { 607 scan[i].u.bmu_bitmap = 0; 608 } else { 609 scan[i].u.bmu_avail = 0; 610 } 611 } 612 /* fall through */ 613 } 614 } else { 615 scan->u.bmu_avail += count; 616 /* scan->bm_bighint = radix; */ 617 } 618 619 /* 620 * ALL-FREE special case. 621 * 622 * Set bighint for higher levels to snoop. 623 */ 624 if (scan->u.bmu_avail == radix) { 625 scan->bm_bighint = radix; 626 return; 627 } 628 629 /* 630 * Break the free down into its components 631 */ 632 if (scan->u.bmu_avail > radix) { 633 panic("blst_meta_free: freeing already " 634 "free blocks (%ld) %ld/%lld", 635 count, (long)scan->u.bmu_avail, (long long)radix); 636 } 637 638 radix /= BLIST_META_RADIX; 639 640 i = (freeBlk - blk) / (swblk_t)radix; 641 blk += i * (swblk_t)radix; 642 i = i * next_skip + 1; 643 644 while (i <= skip && blk < freeBlk + count) { 645 swblk_t v; 646 647 v = blk + (swblk_t)radix - freeBlk; 648 if (v > count) 649 v = count; 650 651 if (scan->bm_bighint == (swblk_t)-1) 652 panic("blst_meta_free: freeing unexpected range"); 653 654 if (next_skip == 1) { 655 blst_leaf_free(&scan[i], freeBlk, v); 656 } else { 657 blst_meta_free(&scan[i], freeBlk, v, 658 radix, next_skip - 1, blk); 659 } 660 661 /* 662 * After having dealt with the becomes-all-free case any 663 * partial free will not be able to bring us to the 664 * becomes-all-free state. 665 * 666 * We can raise bighint to at least the sub-segment's 667 * bighint. 668 */ 669 if (scan->bm_bighint < scan[i].bm_bighint) { 670 scan->bm_bighint = scan[i].bm_bighint; 671 } 672 count -= v; 673 freeBlk += v; 674 blk += (swblk_t)radix; 675 i += next_skip; 676 } 677 } 678 679 /* 680 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap 681 * 682 * Allocates all blocks in the specified range regardless of 683 * any existing allocations in that range. Returns the number 684 * of blocks allocated by the call. 685 */ 686 static swblk_t 687 blst_leaf_fill(blmeta_t *scan, swblk_t blk, swblk_t count) 688 { 689 int n = blk & (BLIST_BMAP_RADIX - 1); 690 swblk_t nblks; 691 u_swblk_t mask, bitmap; 692 693 mask = ((u_swblk_t)-1 << n) & 694 ((u_swblk_t)-1 >> (BLIST_BMAP_RADIX - count - n)); 695 696 /* Count the number of blocks we're about to allocate */ 697 bitmap = scan->u.bmu_bitmap & mask; 698 for (nblks = 0; bitmap != 0; nblks++) 699 bitmap &= bitmap - 1; 700 701 scan->u.bmu_bitmap &= ~mask; 702 return (nblks); 703 } 704 705 /* 706 * BLST_META_FILL() - allocate specific blocks at a meta node 707 * 708 * Allocates the specified range of blocks, regardless of 709 * any existing allocations in the range. The range must 710 * be within the extent of this node. Returns the number 711 * of blocks allocated by the call. 712 */ 713 static swblk_t 714 blst_meta_fill(blmeta_t *scan, swblk_t fillBlk, swblk_t count, 715 int64_t radix, swblk_t skip, swblk_t blk) 716 { 717 swblk_t i; 718 swblk_t next_skip = ((u_int)skip / BLIST_META_RADIX); 719 swblk_t nblks = 0; 720 721 if (count == radix || scan->u.bmu_avail == 0) { 722 /* 723 * ALL-ALLOCATED special case 724 */ 725 nblks = scan->u.bmu_avail; 726 scan->u.bmu_avail = 0; 727 scan->bm_bighint = count; 728 return (nblks); 729 } 730 731 if (scan->u.bmu_avail == radix) { 732 radix /= BLIST_META_RADIX; 733 734 /* 735 * ALL-FREE special case, initialize sublevel 736 */ 737 for (i = 1; i <= skip; i += next_skip) { 738 if (scan[i].bm_bighint == (swblk_t)-1) 739 break; 740 if (next_skip == 1) { 741 scan[i].u.bmu_bitmap = (u_swblk_t)-1; 742 scan[i].bm_bighint = BLIST_BMAP_RADIX; 743 } else { 744 scan[i].bm_bighint = (swblk_t)radix; 745 scan[i].u.bmu_avail = (swblk_t)radix; 746 } 747 } 748 } else { 749 radix /= BLIST_META_RADIX; 750 } 751 752 if (count > (swblk_t)radix) 753 panic("blst_meta_fill: allocation too large"); 754 755 i = (fillBlk - blk) / (swblk_t)radix; 756 blk += i * (swblk_t)radix; 757 i = i * next_skip + 1; 758 759 while (i <= skip && blk < fillBlk + count) { 760 swblk_t v; 761 762 v = blk + (swblk_t)radix - fillBlk; 763 if (v > count) 764 v = count; 765 766 if (scan->bm_bighint == (swblk_t)-1) 767 panic("blst_meta_fill: filling unexpected range"); 768 769 if (next_skip == 1) { 770 nblks += blst_leaf_fill(&scan[i], fillBlk, v); 771 } else { 772 nblks += blst_meta_fill(&scan[i], fillBlk, v, 773 radix, next_skip - 1, blk); 774 } 775 count -= v; 776 fillBlk += v; 777 blk += (swblk_t)radix; 778 i += next_skip; 779 } 780 scan->u.bmu_avail -= nblks; 781 return (nblks); 782 } 783 784 /* 785 * BLIST_RADIX_COPY() - copy one radix tree to another 786 * 787 * Locates free space in the source tree and frees it in the destination 788 * tree. The space may not already be free in the destination. 789 */ 790 791 static void 792 blst_copy(blmeta_t *scan, swblk_t blk, int64_t radix, 793 swblk_t skip, blist_t dest, swblk_t count) 794 { 795 swblk_t next_skip; 796 swblk_t i; 797 798 /* 799 * Leaf node 800 */ 801 802 if (radix == BLIST_BMAP_RADIX) { 803 u_swblk_t v = scan->u.bmu_bitmap; 804 805 if (v == (u_swblk_t)-1) { 806 blist_free(dest, blk, count); 807 } else if (v != 0) { 808 swblk_t i; 809 810 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { 811 if (v & ((swblk_t)1 << i)) 812 blist_free(dest, blk + i, 1); 813 } 814 } 815 return; 816 } 817 818 /* 819 * Meta node 820 */ 821 822 if (scan->u.bmu_avail == 0) { 823 /* 824 * Source all allocated, leave dest allocated 825 */ 826 return; 827 } 828 if (scan->u.bmu_avail == radix) { 829 /* 830 * Source all free, free entire dest 831 */ 832 if (count < radix) 833 blist_free(dest, blk, count); 834 else 835 blist_free(dest, blk, (swblk_t)radix); 836 return; 837 } 838 839 840 radix /= BLIST_META_RADIX; 841 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 842 843 for (i = 1; count && i <= skip; i += next_skip) { 844 if (scan[i].bm_bighint == (swblk_t)-1) 845 break; 846 847 if (count >= (swblk_t)radix) { 848 blst_copy( 849 &scan[i], 850 blk, 851 radix, 852 next_skip - 1, 853 dest, 854 (swblk_t)radix 855 ); 856 count -= (swblk_t)radix; 857 } else { 858 if (count) { 859 blst_copy( 860 &scan[i], 861 blk, 862 radix, 863 next_skip - 1, 864 dest, 865 count 866 ); 867 } 868 count = 0; 869 } 870 blk += (swblk_t)radix; 871 } 872 } 873 874 /* 875 * BLST_RADIX_INIT() - initialize radix tree 876 * 877 * Initialize our meta structures and bitmaps and calculate the exact 878 * amount of space required to manage 'count' blocks - this space may 879 * be considerably less then the calculated radix due to the large 880 * RADIX values we use. 881 */ 882 883 static swblk_t 884 blst_radix_init(blmeta_t *scan, int64_t radix, swblk_t skip, swblk_t count) 885 { 886 swblk_t i; 887 swblk_t next_skip; 888 swblk_t memindex = 0; 889 890 /* 891 * Leaf node 892 */ 893 894 if (radix == BLIST_BMAP_RADIX) { 895 if (scan) { 896 scan->bm_bighint = 0; 897 scan->u.bmu_bitmap = 0; 898 } 899 return(memindex); 900 } 901 902 /* 903 * Meta node. If allocating the entire object we can special 904 * case it. However, we need to figure out how much memory 905 * is required to manage 'count' blocks, so we continue on anyway. 906 */ 907 908 if (scan) { 909 scan->bm_bighint = 0; 910 scan->u.bmu_avail = 0; 911 } 912 913 radix /= BLIST_META_RADIX; 914 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 915 916 for (i = 1; i <= skip; i += next_skip) { 917 if (count >= (swblk_t)radix) { 918 /* 919 * Allocate the entire object 920 */ 921 memindex = i + blst_radix_init( 922 ((scan) ? &scan[i] : NULL), 923 radix, 924 next_skip - 1, 925 (swblk_t)radix 926 ); 927 count -= (swblk_t)radix; 928 } else if (count > 0) { 929 /* 930 * Allocate a partial object 931 */ 932 memindex = i + blst_radix_init( 933 ((scan) ? &scan[i] : NULL), 934 radix, 935 next_skip - 1, 936 count 937 ); 938 count = 0; 939 } else { 940 /* 941 * Add terminator and break out 942 */ 943 if (scan) 944 scan[i].bm_bighint = (swblk_t)-1; 945 break; 946 } 947 } 948 if (memindex < i) 949 memindex = i; 950 return(memindex); 951 } 952 953 #ifdef BLIST_DEBUG 954 955 static void 956 blst_radix_print(blmeta_t *scan, swblk_t blk, int64_t radix, swblk_t skip, int tab) 957 { 958 swblk_t i; 959 swblk_t next_skip; 960 961 if (radix == BLIST_BMAP_RADIX) { 962 kprintf( 963 "%*.*s(%04x,%lld): bitmap %08x big=%d\n", 964 tab, tab, "", 965 blk, (long long)radix, 966 scan->u.bmu_bitmap, 967 scan->bm_bighint 968 ); 969 return; 970 } 971 972 if (scan->u.bmu_avail == 0) { 973 kprintf( 974 "%*.*s(%04x,%lld) ALL ALLOCATED\n", 975 tab, tab, "", 976 blk, 977 (long long)radix 978 ); 979 return; 980 } 981 if (scan->u.bmu_avail == radix) { 982 kprintf( 983 "%*.*s(%04x,%lld) ALL FREE\n", 984 tab, tab, "", 985 blk, 986 (long long)radix 987 ); 988 return; 989 } 990 991 kprintf( 992 "%*.*s(%04x,%lld): subtree (%d/%lld) big=%d {\n", 993 tab, tab, "", 994 blk, (long long)radix, 995 scan->u.bmu_avail, 996 (long long)radix, 997 scan->bm_bighint 998 ); 999 1000 radix /= BLIST_META_RADIX; 1001 next_skip = ((u_swblk_t)skip / BLIST_META_RADIX); 1002 tab += 4; 1003 1004 for (i = 1; i <= skip; i += next_skip) { 1005 if (scan[i].bm_bighint == (swblk_t)-1) { 1006 kprintf( 1007 "%*.*s(%04x,%lld): Terminator\n", 1008 tab, tab, "", 1009 blk, (long long)radix 1010 ); 1011 break; 1012 } 1013 blst_radix_print( 1014 &scan[i], 1015 blk, 1016 radix, 1017 next_skip - 1, 1018 tab 1019 ); 1020 blk += (swblk_t)radix; 1021 } 1022 tab -= 4; 1023 1024 kprintf( 1025 "%*.*s}\n", 1026 tab, tab, "" 1027 ); 1028 } 1029 1030 #endif 1031 1032 #ifdef BLIST_DEBUG 1033 1034 int 1035 main(int ac, char **av) 1036 { 1037 swblk_t size = 1024; 1038 swblk_t i; 1039 blist_t bl; 1040 1041 for (i = 1; i < ac; ++i) { 1042 const char *ptr = av[i]; 1043 if (*ptr != '-') { 1044 size = strtol(ptr, NULL, 0); 1045 continue; 1046 } 1047 ptr += 2; 1048 fprintf(stderr, "Bad option: %s\n", ptr - 2); 1049 exit(1); 1050 } 1051 bl = blist_create(size); 1052 blist_free(bl, 0, size); 1053 1054 for (;;) { 1055 char buf[1024]; 1056 swblk_t da = 0; 1057 swblk_t count = 0; 1058 swblk_t blkat; 1059 1060 1061 kprintf("%d/%d/%lld> ", 1062 bl->bl_free, size, (long long)bl->bl_radix); 1063 fflush(stdout); 1064 if (fgets(buf, sizeof(buf), stdin) == NULL) 1065 break; 1066 switch(buf[0]) { 1067 case 'r': 1068 if (sscanf(buf + 1, "%d", &count) == 1) { 1069 blist_resize(&bl, count, 1); 1070 size = count; 1071 } else { 1072 kprintf("?\n"); 1073 } 1074 case 'p': 1075 blist_print(bl); 1076 break; 1077 case 'a': 1078 if (sscanf(buf + 1, "%d %d", &count, &blkat) == 1) { 1079 swblk_t blk = blist_alloc(bl, count); 1080 kprintf(" R=%04x\n", blk); 1081 } else if (sscanf(buf + 1, "%d %d", &count, &blkat) == 2) { 1082 swblk_t blk = blist_allocat(bl, count, blkat); 1083 kprintf(" R=%04x\n", blk); 1084 } else { 1085 kprintf("?\n"); 1086 } 1087 break; 1088 case 'f': 1089 if (sscanf(buf + 1, "%x %d", &da, &count) == 2) { 1090 blist_free(bl, da, count); 1091 } else { 1092 kprintf("?\n"); 1093 } 1094 break; 1095 case 'l': 1096 if (sscanf(buf + 1, "%x %d", &da, &count) == 2) { 1097 printf(" n=%d\n", 1098 blist_fill(bl, da, count)); 1099 } else { 1100 kprintf("?\n"); 1101 } 1102 break; 1103 case '?': 1104 case 'h': 1105 puts( 1106 "p -print\n" 1107 "a %d -allocate\n" 1108 "f %x %d -free\n" 1109 "l %x %d -fill\n" 1110 "r %d -resize\n" 1111 "h/? -help" 1112 ); 1113 break; 1114 default: 1115 kprintf("?\n"); 1116 break; 1117 } 1118 } 1119 return(0); 1120 } 1121 1122 void 1123 panic(const char *ctl, ...) 1124 { 1125 __va_list va; 1126 1127 __va_start(va, ctl); 1128 vfprintf(stderr, ctl, va); 1129 fprintf(stderr, "\n"); 1130 __va_end(va); 1131 exit(1); 1132 } 1133 1134 #endif 1135 1136