1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator drop-in replacement for 38 * the libc malloc(). The slab algorithm has been adjusted to support dynamic 39 * sizing of slabs which effectively allows slabs to be used for allocations of 40 * any size. Because of this we neither have a small-block allocator or a 41 * big-block allocator and the code paths are simplified to the point where 42 * allocations, caching, and freeing, is screaming fast. 43 * 44 * There is very little interaction between threads. A global depot accessed 45 * via atomic cmpxchg instructions (only! no spinlocks!) is used as a 46 * catch-all and to deal with thread exits and such. 47 * 48 * To support dynamic slab sizing available user virtual memory is broken 49 * down into ~1024 regions. Each region has fixed slab size whos value is 50 * set when the region is opened up for use. The free() path simply applies 51 * a mask based on the region to the pointer to acquire the base of the 52 * governing slab structure. 53 * 54 * Regions[NREGIONS] (1024) 55 * 56 * Slab management and locking is done on a per-zone basis. 57 * 58 * Alloc Size Chunking Number of zones 59 * 0-127 8 16 60 * 128-255 16 8 61 * 256-511 32 8 62 * 512-1023 64 8 63 * 1024-2047 128 8 64 * 2048-4095 256 8 65 * 4096-8191 512 8 66 * 8192-16383 1024 8 67 * 16384-32767 2048 8 68 * 32768-65535 4096 8 69 * ... continues unlimited ... 4 zones 70 * 71 * For a 2^63 memory space each doubling >= 64K is broken down into 72 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 73 * 74 * API FEATURES AND SIDE EFFECTS 75 * 76 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 77 * Above that power-of-2 sized allocations are page-aligned. Non 78 * power-of-2 sized allocations are aligned the same as the chunk 79 * size for their zone. 80 * + malloc(0) returns a special non-NULL value 81 * + ability to allocate arbitrarily large chunks of memory 82 * + realloc will reuse the passed pointer if possible, within the 83 * limitations of the zone chunking. 84 * 85 * FUTURE FEATURES 86 * 87 * + [better] garbage collection 88 * + better initial sizing. 89 * 90 * TUNING 91 * 92 * The value of the environment variable MALLOC_OPTIONS is a character string 93 * containing various flags to tune nmalloc. Upper case letters enabled 94 * or increase the feature, lower case disables or decreases the feature. 95 * 96 * U Enable UTRACE for all operations, observable with ktrace. 97 * Diasbled by default. 98 * 99 * Z Zero out allocations, otherwise allocations (except for 100 * calloc) will contain garbage. 101 * Disabled by default. 102 * 103 * H Pass a hint with madvise() about unused pages. 104 * Disabled by default. 105 * Not currently implemented. 106 * 107 * F Disable local per-thread caching. 108 * Disabled by default. 109 * 110 * C Increase (decrease) how much excess cache to retain. 111 * Set to 4 by default. 112 */ 113 114 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 115 116 #ifndef STANDALONE_DEBUG 117 #include "libc_private.h" 118 #endif 119 120 #include <sys/param.h> 121 #include <sys/types.h> 122 #include <sys/mman.h> 123 #include <sys/queue.h> 124 #include <sys/uio.h> 125 #include <sys/ktrace.h> 126 #include <stdio.h> 127 #include <stdint.h> 128 #include <stdlib.h> 129 #include <stdarg.h> 130 #include <stddef.h> 131 #include <unistd.h> 132 #include <string.h> 133 #include <fcntl.h> 134 #include <errno.h> 135 #include <pthread.h> 136 #include <limits.h> 137 138 #include <machine/atomic.h> 139 #include <machine/cpufunc.h> 140 141 #ifdef STANDALONE_DEBUG 142 void _nmalloc_thr_init(void); 143 #else 144 #include "spinlock.h" 145 #include "un-namespace.h" 146 #endif 147 148 #ifndef MAP_SIZEALIGN 149 #define MAP_SIZEALIGN 0 150 #endif 151 152 #if SSIZE_MAX == 0x7FFFFFFF 153 #define ADDRBITS 32 154 #define UVM_BITS 32 /* worst case */ 155 #else 156 #define ADDRBITS 64 157 #define UVM_BITS 48 /* worst case XXX */ 158 #endif 159 160 #if LONG_MAX == 0x7FFFFFFF 161 #define LONG_BITS 32 162 #define LONG_BITS_SHIFT 5 163 #else 164 #define LONG_BITS 64 165 #define LONG_BITS_SHIFT 6 166 #endif 167 168 #define LOCKEDPTR ((void *)(intptr_t)-1) 169 170 /* 171 * Regions[] 172 */ 173 #define NREGIONS_BITS 10 174 #define NREGIONS (1 << NREGIONS_BITS) 175 #define NREGIONS_MASK (NREGIONS - 1) 176 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 177 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 178 179 typedef struct region *region_t; 180 typedef struct slglobaldata *slglobaldata_t; 181 typedef struct slab *slab_t; 182 183 struct region { 184 uintptr_t mask; 185 slab_t slab; /* conditional out of band slab */ 186 }; 187 188 static struct region Regions[NREGIONS]; 189 190 /* 191 * Number of chunking zones available 192 */ 193 #define CHUNKFACTOR 8 194 #if ADDRBITS == 32 195 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 196 #else 197 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 198 #endif 199 200 static int MaxChunks[NZONES]; 201 202 #define NDEPOTS 8 /* must be power of 2 */ 203 204 /* 205 * Maximum number of chunks per slab, governed by the allocation bitmap in 206 * each slab. The maximum is reduced for large chunk sizes. 207 */ 208 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 209 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 210 #define LITSLABSIZE (32 * 1024) 211 #define NOMSLABSIZE (2 * 1024 * 1024) 212 #define BIGSLABSIZE (128 * 1024 * 1024) 213 214 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 215 216 TAILQ_HEAD(slab_list, slab); 217 218 /* 219 * A slab structure 220 */ 221 struct slab { 222 struct slab *next; /* slabs with available space */ 223 TAILQ_ENTRY(slab) entry; 224 int32_t magic; /* magic number for sanity check */ 225 u_int navail; /* number of free elements available */ 226 u_int nmax; 227 u_int free_bit; /* free hint bitno */ 228 u_int free_index; /* free hint index */ 229 u_long bitmap[LONG_BITS]; /* free chunks */ 230 size_t slab_size; /* size of entire slab */ 231 size_t chunk_size; /* chunk size for validation */ 232 int zone_index; 233 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 234 int flags; 235 region_t region; /* related region */ 236 char *chunks; /* chunk base */ 237 slglobaldata_t slgd; 238 }; 239 240 /* 241 * per-thread data 242 */ 243 struct slglobaldata { 244 struct zoneinfo { 245 slab_t avail_base; 246 slab_t empty_base; 247 int best_region; 248 int empty_count; 249 } zone[NZONES]; 250 struct slab_list full_zones; /* via entry */ 251 int masked; 252 int biggest_index; 253 size_t nslabs; 254 slglobaldata_t sldepot; 255 }; 256 257 #define SLAB_ZEROD 0x0001 258 259 /* 260 * Misc constants. Note that allocations that are exact multiples of 261 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 262 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 263 */ 264 #define MIN_CHUNK_SIZE 8 /* in bytes */ 265 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 266 267 #define SAFLAG_ZERO 0x00000001 268 269 /* 270 * The WEIRD_ADDR is used as known text to copy into free objects to 271 * try to create deterministic failure cases if the data is accessed after 272 * free. 273 * 274 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 275 * not be larger then 64. 276 */ 277 #define WEIRD_ADDR 0xdeadc0de 278 #define MAX_COPY sizeof(weirdary) 279 280 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 281 282 /* 283 * Thread control 284 */ 285 286 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 287 _mpanic("assertion: %s in %s", \ 288 #exp, __func__); \ 289 } while (0) 290 291 /* With this attribute set, do not require a function call for accessing 292 * this variable when the code is compiled -fPIC */ 293 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 294 295 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 296 static pthread_key_t thread_malloc_key; 297 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 298 static struct slglobaldata sldepots[NDEPOTS]; 299 300 static int opt_madvise = 0; 301 static int opt_free = 0; 302 static int opt_cache = 4; 303 static int opt_utrace = 0; 304 static int g_malloc_flags = 0; 305 static int malloc_panic; 306 static int malloc_started = 0; 307 308 static const int32_t weirdary[16] = { 309 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 310 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 311 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 312 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 313 }; 314 315 static void *memalloc(size_t size, int flags); 316 static void *memrealloc(void *ptr, size_t size); 317 static void memfree(void *ptr, int); 318 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 319 static void slabfree(slab_t slab); 320 static void slabterm(slglobaldata_t slgd, slab_t slab); 321 static void *_vmem_alloc(int ri, size_t slab_size); 322 static void _vmem_free(void *ptr, size_t slab_size); 323 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 324 #ifndef STANDALONE_DEBUG 325 static void malloc_init(void) __constructor(0); 326 #else 327 static void malloc_init(void) __constructor(101); 328 #endif 329 330 331 struct nmalloc_utrace { 332 void *p; 333 size_t s; 334 void *r; 335 }; 336 337 #define UTRACE(a, b, c) \ 338 if (opt_utrace) { \ 339 struct nmalloc_utrace ut = { \ 340 .p = (a), \ 341 .s = (b), \ 342 .r = (c) \ 343 }; \ 344 utrace(&ut, sizeof(ut)); \ 345 } 346 347 #ifdef INVARIANTS 348 /* 349 * If enabled any memory allocated without M_ZERO is initialized to -1. 350 */ 351 static int use_malloc_pattern; 352 #endif 353 354 static void 355 malloc_init(void) 356 { 357 const char *p = NULL; 358 static spinlock_t malloc_init_lock; 359 360 if (malloc_started) 361 return; 362 363 if (__isthreaded) { 364 _SPINLOCK(&malloc_init_lock); 365 if (malloc_started) { 366 _SPINUNLOCK(&malloc_init_lock); 367 return; 368 } 369 } 370 371 Regions[0].mask = -1; /* disallow activity in lowest region */ 372 373 if (issetugid() == 0) 374 p = getenv("MALLOC_OPTIONS"); 375 376 for (; p != NULL && *p != '\0'; p++) { 377 switch(*p) { 378 case 'u': 379 opt_utrace = 0; 380 break; 381 case 'U': 382 opt_utrace = 1; 383 break; 384 case 'h': 385 opt_madvise = 0; 386 break; 387 case 'H': 388 opt_madvise = 1; 389 break; 390 case 'c': 391 if (opt_cache > 0) 392 --opt_cache; 393 break; 394 case 'C': 395 ++opt_cache; 396 break; 397 case 'f': 398 opt_free = 0; 399 break; 400 case 'F': 401 opt_free = 1; 402 break; 403 case 'z': 404 g_malloc_flags = 0; 405 break; 406 case 'Z': 407 g_malloc_flags = SAFLAG_ZERO; 408 break; 409 default: 410 break; 411 } 412 } 413 414 UTRACE((void *) -1, 0, NULL); 415 _nmalloc_thr_init(); 416 malloc_started = 1; 417 418 if (__isthreaded) 419 _SPINUNLOCK(&malloc_init_lock); 420 } 421 422 /* 423 * We have to install a handler for nmalloc thread teardowns when 424 * the thread is created. We cannot delay this because destructors in 425 * sophisticated userland programs can call malloc() for the first time 426 * during their thread exit. 427 * 428 * This routine is called directly from pthreads. 429 */ 430 static void _nmalloc_thr_init_once(void); 431 static void _nmalloc_thr_destructor(void *thrp); 432 433 void 434 _nmalloc_thr_init(void) 435 { 436 static int did_init; 437 static int SLGI; 438 int slgi; 439 440 slgi = SLGI++; 441 cpu_ccfence(); 442 TAILQ_INIT(&slglobal.full_zones); 443 slglobal.sldepot = &sldepots[slgi & (NDEPOTS - 1)]; 444 445 if (slglobal.masked) 446 return; 447 448 slglobal.masked = 1; 449 if (did_init == 0) { 450 did_init = 1; 451 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 452 } 453 pthread_setspecific(thread_malloc_key, &slglobal); 454 slglobal.masked = 0; 455 } 456 457 /* 458 * Called just once 459 */ 460 static void 461 _nmalloc_thr_init_once(void) 462 { 463 int error; 464 465 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 466 if (error) 467 abort(); 468 } 469 470 /* 471 * Called for each thread undergoing exit 472 * 473 * Move all of the thread's slabs into a depot. 474 */ 475 static void 476 _nmalloc_thr_destructor(void *thrp) 477 { 478 slglobaldata_t slgd = thrp; 479 slab_t slab; 480 int i; 481 482 slgd->masked = 1; 483 484 for (i = 0; i <= slgd->biggest_index; i++) { 485 while ((slab = slgd->zone[i].empty_base) != NULL) { 486 slgd->zone[i].empty_base = slab->next; 487 slabterm(slgd, slab); 488 } 489 490 while ((slab = slgd->zone[i].avail_base) != NULL) { 491 slgd->zone[i].avail_base = slab->next; 492 slabterm(slgd, slab); 493 } 494 495 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 496 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 497 slabterm(slgd, slab); 498 } 499 } 500 } 501 502 /* 503 * Calculate the zone index for the allocation request size and set the 504 * allocation request size to that particular zone's chunk size. 505 */ 506 static __inline int 507 zoneindex(size_t *bytes, size_t *chunking) 508 { 509 size_t n = (size_t)*bytes; 510 size_t x; 511 size_t c; 512 int i; 513 514 if (n < 128) { 515 *bytes = n = (n + 7) & ~7; 516 *chunking = 8; 517 return(n / 8); /* 8 byte chunks, 16 zones */ 518 } 519 if (n < 4096) { 520 x = 256; 521 c = x / (CHUNKFACTOR * 2); 522 i = 16; 523 } else { 524 x = 8192; 525 c = x / (CHUNKFACTOR * 2); 526 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 527 } 528 while (n >= x) { 529 x <<= 1; 530 c <<= 1; 531 i += CHUNKFACTOR; 532 if (x == 0) 533 _mpanic("slaballoc: byte value too high"); 534 } 535 *bytes = n = (n + c - 1) & ~(c - 1); 536 *chunking = c; 537 return (i + n / c - CHUNKFACTOR); 538 #if 0 539 *bytes = n = (n + c - 1) & ~(c - 1); 540 *chunking = c; 541 return (n / c + i); 542 543 if (n < 256) { 544 *bytes = n = (n + 15) & ~15; 545 *chunking = 16; 546 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 547 } 548 if (n < 8192) { 549 if (n < 512) { 550 *bytes = n = (n + 31) & ~31; 551 *chunking = 32; 552 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 553 } 554 if (n < 1024) { 555 *bytes = n = (n + 63) & ~63; 556 *chunking = 64; 557 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 558 } 559 if (n < 2048) { 560 *bytes = n = (n + 127) & ~127; 561 *chunking = 128; 562 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 563 } 564 if (n < 4096) { 565 *bytes = n = (n + 255) & ~255; 566 *chunking = 256; 567 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 568 } 569 *bytes = n = (n + 511) & ~511; 570 *chunking = 512; 571 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 572 } 573 if (n < 16384) { 574 *bytes = n = (n + 1023) & ~1023; 575 *chunking = 1024; 576 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 577 } 578 if (n < 32768) { /* 16384-32767 */ 579 *bytes = n = (n + 2047) & ~2047; 580 *chunking = 2048; 581 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 582 } 583 if (n < 65536) { 584 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 585 *chunking = 4096; 586 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 587 } 588 589 x = 131072; 590 c = 8192; 591 i = CHUNKINGLO*10 - 1; 592 593 while (n >= x) { 594 x <<= 1; 595 c <<= 1; 596 i += CHUNKINGHI; 597 if (x == 0) 598 _mpanic("slaballoc: byte value too high"); 599 } 600 *bytes = n = (n + c - 1) & ~(c - 1); 601 *chunking = c; 602 return (n / c + i); 603 #endif 604 } 605 606 /* 607 * malloc() - call internal slab allocator 608 */ 609 void * 610 malloc(size_t size) 611 { 612 void *ptr; 613 614 ptr = memalloc(size, 0); 615 if (ptr == NULL) 616 errno = ENOMEM; 617 else 618 UTRACE(0, size, ptr); 619 return(ptr); 620 } 621 622 /* 623 * calloc() - call internal slab allocator 624 */ 625 void * 626 calloc(size_t number, size_t size) 627 { 628 void *ptr; 629 630 ptr = memalloc(number * size, SAFLAG_ZERO); 631 if (ptr == NULL) 632 errno = ENOMEM; 633 else 634 UTRACE(0, number * size, ptr); 635 return(ptr); 636 } 637 638 /* 639 * realloc() (SLAB ALLOCATOR) 640 * 641 * We do not attempt to optimize this routine beyond reusing the same 642 * pointer if the new size fits within the chunking of the old pointer's 643 * zone. 644 */ 645 void * 646 realloc(void *ptr, size_t size) 647 { 648 void *ret; 649 650 if (ptr == NULL) 651 ret = memalloc(size, 0); 652 else 653 ret = memrealloc(ptr, size); 654 if (ret == NULL) 655 errno = ENOMEM; 656 else 657 UTRACE(ptr, size, ret); 658 return(ret); 659 } 660 661 /* 662 * posix_memalign() 663 * 664 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 665 * is a power of 2 >= sizeof(void *). 666 * 667 * The slab allocator will allocate on power-of-2 boundaries up to at least 668 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 669 * matching the requirements. 670 */ 671 int 672 posix_memalign(void **memptr, size_t alignment, size_t size) 673 { 674 size_t chunking; 675 int zi; 676 677 /* 678 * OpenGroup spec issue 6 checks 679 */ 680 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 681 *memptr = NULL; 682 return(EINVAL); 683 } 684 if (alignment < sizeof(void *)) { 685 *memptr = NULL; 686 return(EINVAL); 687 } 688 689 /* 690 * XXX for now just find the nearest power of 2 >= size and also 691 * >= alignment and allocate that. 692 */ 693 while (alignment < size) { 694 alignment <<= 1; 695 if (alignment == 0) 696 _mpanic("posix_memalign: byte value too high"); 697 } 698 *memptr = memalloc(alignment, 0); 699 return(*memptr ? 0 : ENOMEM); 700 } 701 702 /* 703 * free() (SLAB ALLOCATOR) - do the obvious 704 */ 705 void 706 free(void *ptr) 707 { 708 if (ptr) { 709 UTRACE(ptr, 0, 0); 710 memfree(ptr, 0); 711 } 712 } 713 714 /* 715 * memalloc() (SLAB ALLOCATOR) 716 * 717 * Allocate memory via the slab allocator. 718 */ 719 static void * 720 memalloc(size_t size, int flags) 721 { 722 slglobaldata_t slgd; 723 struct zoneinfo *zinfo; 724 slab_t slab; 725 size_t chunking; 726 int bmi; 727 int bno; 728 u_long *bmp; 729 int zi; 730 #ifdef INVARIANTS 731 int i; 732 #endif 733 size_t off; 734 char *obj; 735 736 if (!malloc_started) 737 malloc_init(); 738 739 /* 740 * If 0 bytes is requested we have to return a unique pointer, allocate 741 * at least one byte. 742 */ 743 if (size == 0) 744 size = 1; 745 746 /* Capture global flags */ 747 flags |= g_malloc_flags; 748 749 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 750 zi = zoneindex(&size, &chunking); 751 MASSERT(zi < NZONES); 752 if (size == 0) 753 return(NULL); 754 755 /* 756 * Locate a slab with available space. If no slabs are available 757 * back-off to the empty list and if we still come up dry allocate 758 * a new slab (which will try the depot first). 759 */ 760 retry: 761 slgd = &slglobal; 762 zinfo = &slgd->zone[zi]; 763 if ((slab = zinfo->avail_base) == NULL) { 764 if ((slab = zinfo->empty_base) == NULL) { 765 /* 766 * Still dry 767 */ 768 slab = slaballoc(zi, chunking, size); 769 if (slab == NULL) 770 return(NULL); 771 slab->next = zinfo->avail_base; 772 zinfo->avail_base = slab; 773 slab->state = AVAIL; 774 if (slgd->biggest_index < zi) 775 slgd->biggest_index = zi; 776 ++slgd->nslabs; 777 } else { 778 /* 779 * Pulled from empty list 780 */ 781 zinfo->empty_base = slab->next; 782 slab->next = zinfo->avail_base; 783 zinfo->avail_base = slab; 784 slab->state = AVAIL; 785 --zinfo->empty_count; 786 } 787 } 788 789 /* 790 * Allocate a chunk out of the slab. HOT PATH 791 * 792 * Only the thread owning the slab can allocate out of it. 793 * 794 * NOTE: The last bit in the bitmap is always marked allocated so 795 * we cannot overflow here. 796 */ 797 bno = slab->free_bit; 798 bmi = slab->free_index; 799 bmp = &slab->bitmap[bmi]; 800 if (*bmp & (1LU << bno)) { 801 atomic_clear_long(bmp, 1LU << bno); 802 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 803 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 804 atomic_add_int(&slab->navail, -1); 805 if (flags & SAFLAG_ZERO) 806 bzero(obj, size); 807 return (obj); 808 } 809 810 /* 811 * Allocate a chunk out of a slab. COLD PATH 812 */ 813 if (slab->navail == 0) { 814 zinfo->avail_base = slab->next; 815 slab->state = FULL; 816 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 817 goto retry; 818 } 819 820 while (bmi < LONG_BITS) { 821 bmp = &slab->bitmap[bmi]; 822 if (*bmp) { 823 bno = bsflong(*bmp); 824 atomic_clear_long(bmp, 1LU << bno); 825 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 826 size; 827 slab->free_index = bmi; 828 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 829 atomic_add_int(&slab->navail, -1); 830 if (flags & SAFLAG_ZERO) 831 bzero(obj, size); 832 return (obj); 833 } 834 ++bmi; 835 } 836 bmi = 0; 837 while (bmi < LONG_BITS) { 838 bmp = &slab->bitmap[bmi]; 839 if (*bmp) { 840 bno = bsflong(*bmp); 841 atomic_clear_long(bmp, 1LU << bno); 842 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 843 size; 844 slab->free_index = bmi; 845 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 846 atomic_add_int(&slab->navail, -1); 847 if (flags & SAFLAG_ZERO) 848 bzero(obj, size); 849 return (obj); 850 } 851 ++bmi; 852 } 853 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 854 /* not reached */ 855 return NULL; 856 } 857 858 /* 859 * Reallocate memory within the chunk 860 */ 861 static void * 862 memrealloc(void *ptr, size_t nsize) 863 { 864 region_t region; 865 slglobaldata_t slgd; 866 slab_t slab; 867 size_t osize; 868 char *obj; 869 int flags; 870 871 /* 872 * If 0 bytes is requested we have to return a unique pointer, allocate 873 * at least one byte. 874 */ 875 if (nsize == 0) 876 nsize = 1; 877 878 /* Capture global flags */ 879 flags |= g_malloc_flags; 880 881 /* 882 * Locate the zone by looking up the dynamic slab size mask based 883 * on the memory region the allocation resides in. 884 */ 885 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 886 if ((slab = region->slab) == NULL) 887 slab = (void *)((uintptr_t)ptr & region->mask); 888 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 889 osize = slab->chunk_size; 890 if (nsize <= osize) { 891 if (osize < 32 || nsize >= osize / 2) { 892 obj = ptr; 893 if ((flags & SAFLAG_ZERO) && nsize < osize) 894 bzero(obj + nsize, osize - nsize); 895 return(obj); 896 } 897 } 898 899 /* 900 * Otherwise resize the object 901 */ 902 obj = memalloc(nsize, 0); 903 if (obj) { 904 if (nsize > osize) 905 nsize = osize; 906 bcopy(ptr, obj, nsize); 907 memfree(ptr, 0); 908 } 909 return (obj); 910 } 911 912 /* 913 * free (SLAB ALLOCATOR) 914 * 915 * Free a memory block previously allocated by malloc. 916 * 917 * MPSAFE 918 */ 919 static void 920 memfree(void *ptr, int flags) 921 { 922 region_t region; 923 slglobaldata_t slgd; 924 slab_t slab; 925 slab_t stmp; 926 slab_t *slabp; 927 char *obj; 928 int bmi; 929 int bno; 930 u_long *bmp; 931 932 /* 933 * Locate the zone by looking up the dynamic slab size mask based 934 * on the memory region the allocation resides in. 935 * 936 * WARNING! The slab may be owned by another thread! 937 */ 938 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 939 if ((slab = region->slab) == NULL) 940 slab = (void *)((uintptr_t)ptr & region->mask); 941 MASSERT(slab != NULL); 942 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 943 944 #ifdef INVARIANTS 945 /* 946 * Put weird data into the memory to detect modifications after 947 * freeing, illegal pointer use after freeing (we should fault on 948 * the odd address), and so forth. 949 */ 950 if (slab->chunk_size < sizeof(weirdary)) 951 bcopy(weirdary, ptr, slab->chunk_size); 952 else 953 bcopy(weirdary, ptr, sizeof(weirdary)); 954 #endif 955 956 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 957 bmi = bno >> LONG_BITS_SHIFT; 958 bno &= (LONG_BITS - 1); 959 bmp = &slab->bitmap[bmi]; 960 961 MASSERT(bmi >= 0 && bmi < slab->nmax); 962 MASSERT((*bmp & (1LU << bno)) == 0); 963 atomic_set_long(bmp, 1LU << bno); 964 atomic_add_int(&slab->navail, 1); 965 966 /* 967 * We can only do the following if we own the slab 968 */ 969 slgd = &slglobal; 970 if (slab->slgd == slgd) { 971 struct zoneinfo *zinfo; 972 973 if (slab->free_index > bmi) { 974 slab->free_index = bmi; 975 slab->free_bit = bno; 976 } else if (slab->free_index == bmi && 977 slab->free_bit > bno) { 978 slab->free_bit = bno; 979 } 980 zinfo = &slgd->zone[slab->zone_index]; 981 982 /* 983 * Freeing an object from a full slab will move it to the 984 * available list. If the available list already has a 985 * slab we terminate the full slab instead, moving it to 986 * the depot. 987 */ 988 if (slab->state == FULL) { 989 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 990 if (zinfo->avail_base == NULL) { 991 slab->state = AVAIL; 992 stmp = zinfo->avail_base; 993 slab->next = stmp; 994 zinfo->avail_base = slab; 995 } else { 996 slabterm(slgd, slab); 997 goto done; 998 } 999 } 1000 1001 /* 1002 * If the slab becomes completely empty dispose of it in 1003 * some manner. By default each thread caches up to 4 1004 * empty slabs. Only small slabs are cached. 1005 */ 1006 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1007 /* 1008 * Remove slab from available queue 1009 */ 1010 slabp = &zinfo->avail_base; 1011 while ((stmp = *slabp) != slab) 1012 slabp = &stmp->next; 1013 *slabp = slab->next; 1014 1015 if (opt_free || opt_cache == 0) { 1016 /* 1017 * If local caching is disabled cache the 1018 * slab in the depot (or free it). 1019 */ 1020 slabterm(slgd, slab); 1021 } else if (slab->slab_size > BIGSLABSIZE) { 1022 /* 1023 * We do not try to retain large slabs 1024 * in per-thread caches. 1025 */ 1026 slabterm(slgd, slab); 1027 } else if (zinfo->empty_count > opt_cache) { 1028 /* 1029 * We have too many slabs cached, but 1030 * instead of freeing this one free 1031 * an empty slab that's been idle longer. 1032 * 1033 * (empty_count does not change) 1034 */ 1035 stmp = zinfo->empty_base; 1036 slab->state = EMPTY; 1037 slab->next = stmp->next; 1038 zinfo->empty_base = slab; 1039 slabterm(slgd, stmp); 1040 } else { 1041 /* 1042 * Cache the empty slab in our thread local 1043 * empty list. 1044 */ 1045 ++zinfo->empty_count; 1046 slab->state = EMPTY; 1047 slab->next = zinfo->empty_base; 1048 zinfo->empty_base = slab; 1049 } 1050 } 1051 } 1052 done: 1053 ; 1054 } 1055 1056 /* 1057 * Allocate a new slab holding objects of size chunk_size. 1058 */ 1059 static slab_t 1060 slaballoc(int zi, size_t chunking, size_t chunk_size) 1061 { 1062 slglobaldata_t slgd; 1063 slglobaldata_t sldepot; 1064 struct zoneinfo *zinfo; 1065 region_t region; 1066 void *save; 1067 slab_t slab; 1068 slab_t stmp; 1069 size_t slab_desire; 1070 size_t slab_size; 1071 size_t region_mask; 1072 uintptr_t chunk_offset; 1073 ssize_t maxchunks; 1074 ssize_t tmpchunks; 1075 int ispower2; 1076 int power; 1077 int ri; 1078 int rx; 1079 int nswath; 1080 int j; 1081 1082 /* 1083 * First look in the depot. Any given zone in the depot may be 1084 * locked by being set to -1. We have to do this instead of simply 1085 * removing the entire chain because removing the entire chain can 1086 * cause racing threads to allocate local slabs for large objects, 1087 * resulting in a large VSZ. 1088 */ 1089 slgd = &slglobal; 1090 sldepot = slgd->sldepot; 1091 zinfo = &sldepot->zone[zi]; 1092 1093 while ((slab = zinfo->avail_base) != NULL) { 1094 if ((void *)slab == LOCKEDPTR) { 1095 cpu_pause(); 1096 continue; 1097 } 1098 if (atomic_cmpset_ptr(&zinfo->avail_base, slab, LOCKEDPTR)) { 1099 MASSERT(slab->slgd == NULL); 1100 slab->slgd = slgd; 1101 zinfo->avail_base = slab->next; 1102 return(slab); 1103 } 1104 } 1105 1106 /* 1107 * Nothing in the depot, allocate a new slab by locating or assigning 1108 * a region and then using the system virtual memory allocator. 1109 */ 1110 slab = NULL; 1111 1112 /* 1113 * Calculate the start of the data chunks relative to the start 1114 * of the slab. 1115 */ 1116 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1117 ispower2 = 1; 1118 chunk_offset = (sizeof(*slab) + (chunk_size - 1)) & 1119 ~(chunk_size - 1); 1120 } else { 1121 ispower2 = 0; 1122 chunk_offset = sizeof(*slab) + chunking - 1; 1123 chunk_offset -= chunk_offset % chunking; 1124 } 1125 1126 /* 1127 * Calculate a reasonable number of chunks for the slab. 1128 * 1129 * Once initialized the MaxChunks[] array can only ever be 1130 * reinitialized to the same value. 1131 */ 1132 maxchunks = MaxChunks[zi]; 1133 if (maxchunks == 0) { 1134 /* 1135 * First calculate how many chunks would fit in 1/1024 1136 * available memory. This is around 2MB on a 32 bit 1137 * system and 128G on a 64-bit (48-bits available) system. 1138 */ 1139 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1140 (ssize_t)chunk_size; 1141 if (maxchunks <= 0) 1142 maxchunks = 1; 1143 1144 /* 1145 * A slab cannot handle more than MAXCHUNKS chunks, but 1146 * limit us to approximately MAXCHUNKS / 2 here because 1147 * we may have to expand maxchunks when we calculate the 1148 * actual power-of-2 slab. 1149 */ 1150 if (maxchunks > MAXCHUNKS / 2) 1151 maxchunks = MAXCHUNKS / 2; 1152 1153 /* 1154 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1155 * slabs will be created if the allocation does not fit. 1156 */ 1157 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1158 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1159 (ssize_t)chunk_size; 1160 if (tmpchunks <= 0) 1161 tmpchunks = 1; 1162 if (maxchunks > tmpchunks) 1163 maxchunks = tmpchunks; 1164 } 1165 1166 /* 1167 * If the slab calculates to greater than 2MB see if we 1168 * can cut it down to ~2MB. This controls VSZ but has 1169 * no effect on run-time size or performance. 1170 * 1171 * This is very important in case you core dump and also 1172 * important to reduce unnecessary region allocations. 1173 */ 1174 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1175 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1176 (ssize_t)chunk_size; 1177 if (tmpchunks < 1) 1178 tmpchunks = 1; 1179 if (maxchunks > tmpchunks) 1180 maxchunks = tmpchunks; 1181 } 1182 1183 /* 1184 * If the slab calculates to greater than 128K see if we 1185 * can cut it down to ~128K while still maintaining a 1186 * reasonably large number of chunks in each slab. This 1187 * controls VSZ but has no effect on run-time size or 1188 * performance. 1189 * 1190 * This is very important in case you core dump and also 1191 * important to reduce unnecessary region allocations. 1192 */ 1193 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1194 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1195 (ssize_t)chunk_size; 1196 if (tmpchunks < 32) 1197 tmpchunks = 32; 1198 if (maxchunks > tmpchunks) 1199 maxchunks = tmpchunks; 1200 } 1201 1202 MaxChunks[zi] = maxchunks; 1203 } 1204 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1205 1206 /* 1207 * Calculate the actual slab size. maxchunks will be recalculated 1208 * a little later. 1209 */ 1210 slab_desire = chunk_offset + chunk_size * maxchunks; 1211 slab_size = 8 * MAXCHUNKS; 1212 power = 3 + MAXCHUNKS_BITS; 1213 while (slab_size < slab_desire) { 1214 slab_size <<= 1; 1215 ++power; 1216 } 1217 1218 /* 1219 * Do a quick recalculation based on the actual slab size but not 1220 * yet dealing with whether the slab header is in-band or out-of-band. 1221 * The purpose here is to see if we can reasonably reduce slab_size 1222 * to a power of 4 to allow more chunk sizes to use the same slab 1223 * size. 1224 */ 1225 if ((power & 1) && slab_size > 32768) { 1226 maxchunks = (slab_size - chunk_offset) / chunk_size; 1227 if (maxchunks >= MAXCHUNKS / 8) { 1228 slab_size >>= 1; 1229 --power; 1230 } 1231 } 1232 if ((power & 2) && slab_size > 32768 * 4) { 1233 maxchunks = (slab_size - chunk_offset) / chunk_size; 1234 if (maxchunks >= MAXCHUNKS / 4) { 1235 slab_size >>= 2; 1236 power -= 2; 1237 } 1238 } 1239 /* 1240 * This case occurs when the slab_size is larger than 1/1024 available 1241 * UVM. 1242 */ 1243 nswath = slab_size / NREGIONS_SIZE; 1244 if (nswath > NREGIONS) 1245 return (NULL); 1246 1247 1248 /* 1249 * Try to allocate from our current best region for this zi 1250 */ 1251 region_mask = ~(slab_size - 1); 1252 ri = slgd->zone[zi].best_region; 1253 if (Regions[ri].mask == region_mask) { 1254 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1255 goto found; 1256 } 1257 1258 /* 1259 * Try to find an existing region to allocate from. The normal 1260 * case will be for allocations that are less than 1/1024 available 1261 * UVM, which fit into a single Regions[] entry. 1262 */ 1263 while (slab_size <= NREGIONS_SIZE) { 1264 rx = -1; 1265 for (ri = 0; ri < NREGIONS; ++ri) { 1266 if (rx < 0 && Regions[ri].mask == 0) 1267 rx = ri; 1268 if (Regions[ri].mask == region_mask) { 1269 slab = _vmem_alloc(ri, slab_size); 1270 if (slab) { 1271 slgd->zone[zi].best_region = ri; 1272 goto found; 1273 } 1274 } 1275 } 1276 1277 if (rx < 0) 1278 return(NULL); 1279 1280 /* 1281 * This can fail, retry either way 1282 */ 1283 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1284 NULL, 1285 (void *)region_mask); 1286 } 1287 1288 for (;;) { 1289 rx = -1; 1290 for (ri = 0; ri < NREGIONS; ri += nswath) { 1291 if (Regions[ri].mask == region_mask) { 1292 slab = _vmem_alloc(ri, slab_size); 1293 if (slab) { 1294 slgd->zone[zi].best_region = ri; 1295 goto found; 1296 } 1297 } 1298 if (rx < 0) { 1299 for (j = nswath - 1; j >= 0; --j) { 1300 if (Regions[ri+j].mask != 0) 1301 break; 1302 } 1303 if (j < 0) 1304 rx = ri; 1305 } 1306 } 1307 1308 /* 1309 * We found a candidate, try to allocate it backwards so 1310 * another thread racing a slaballoc() does not see the 1311 * mask in the base index position until we are done. 1312 * 1313 * We can safely zero-out any partial allocations because 1314 * the mask is only accessed from the base index. Any other 1315 * threads racing us will fail prior to us clearing the mask. 1316 */ 1317 if (rx < 0) 1318 return(NULL); 1319 for (j = nswath - 1; j >= 0; --j) { 1320 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1321 NULL, (void *)region_mask)) { 1322 while (++j < nswath) 1323 Regions[rx+j].mask = 0; 1324 break; 1325 } 1326 } 1327 /* retry */ 1328 } 1329 1330 /* 1331 * Fill in the new slab in region ri. If the slab_size completely 1332 * fills one or more region slots we move the slab structure out of 1333 * band which should optimize the chunking (particularly for a power 1334 * of 2). 1335 */ 1336 found: 1337 region = &Regions[ri]; 1338 MASSERT(region->slab == NULL); 1339 if (slab_size >= NREGIONS_SIZE) { 1340 save = slab; 1341 slab = memalloc(sizeof(*slab), 0); 1342 bzero(slab, sizeof(*slab)); 1343 slab->chunks = save; 1344 for (j = 0; j < nswath; ++j) 1345 region[j].slab = slab; 1346 chunk_offset = 0; 1347 } else { 1348 bzero(slab, sizeof(*slab)); 1349 slab->chunks = (char *)slab + chunk_offset; 1350 } 1351 1352 /* 1353 * Calculate the start of the chunks memory and recalculate the 1354 * actual number of chunks the slab can hold. 1355 */ 1356 maxchunks = (slab_size - chunk_offset) / chunk_size; 1357 if (maxchunks > MAXCHUNKS) 1358 maxchunks = MAXCHUNKS; 1359 1360 /* 1361 * And fill in the rest 1362 */ 1363 slab->magic = ZALLOC_SLAB_MAGIC; 1364 slab->navail = maxchunks; 1365 slab->nmax = maxchunks; 1366 slab->slab_size = slab_size; 1367 slab->chunk_size = chunk_size; 1368 slab->zone_index = zi; 1369 slab->slgd = slgd; 1370 slab->state = UNKNOWN; 1371 slab->region = region; 1372 1373 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1374 if (ri + LONG_BITS <= maxchunks) 1375 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1376 else 1377 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1378 (1LU << (maxchunks - ri)) - 1; 1379 } 1380 return (slab); 1381 } 1382 1383 /* 1384 * Free a slab. 1385 */ 1386 static void 1387 slabfree(slab_t slab) 1388 { 1389 int nswath; 1390 int j; 1391 1392 if (slab->region->slab == slab) { 1393 /* 1394 * Out-of-band slab. 1395 */ 1396 nswath = slab->slab_size / NREGIONS_SIZE; 1397 for (j = 0; j < nswath; ++j) 1398 slab->region[j].slab = NULL; 1399 slab->magic = 0; 1400 _vmem_free(slab->chunks, slab->slab_size); 1401 memfree(slab, 0); 1402 } else { 1403 /* 1404 * In-band slab. 1405 */ 1406 slab->magic = 0; 1407 _vmem_free(slab, slab->slab_size); 1408 } 1409 } 1410 1411 /* 1412 * Terminate a slab's use in the current thread. The slab may still have 1413 * outstanding allocations and thus not be deallocatable. 1414 */ 1415 static void 1416 slabterm(slglobaldata_t slgd, slab_t slab) 1417 { 1418 slglobaldata_t sldepot = slgd->sldepot; 1419 struct zoneinfo *zinfo; 1420 slab_t dnext; 1421 int zi = slab->zone_index; 1422 1423 slab->slgd = NULL; 1424 --slgd->nslabs; 1425 zinfo = &sldepot->zone[zi]; 1426 1427 /* 1428 * If the slab can be freed and the depot is either locked or not 1429 * empty, then free the slab. 1430 */ 1431 if (slab->navail == slab->nmax && zinfo->avail_base) { 1432 slab->state = UNKNOWN; 1433 slabfree(slab); 1434 return; 1435 } 1436 slab->state = AVAIL; 1437 1438 /* 1439 * Link the slab into the depot 1440 */ 1441 for (;;) { 1442 dnext = zinfo->avail_base; 1443 cpu_ccfence(); 1444 if ((void *)dnext == LOCKEDPTR) { 1445 cpu_pause(); 1446 continue; 1447 } 1448 slab->next = dnext; 1449 if (atomic_cmpset_ptr(&zinfo->avail_base, dnext, slab)) 1450 break; 1451 } 1452 } 1453 1454 /* 1455 * _vmem_alloc() 1456 * 1457 * Directly map memory in PAGE_SIZE'd chunks with the specified 1458 * alignment. 1459 * 1460 * Alignment must be a multiple of PAGE_SIZE. 1461 * 1462 * Size must be >= alignment. 1463 */ 1464 static void * 1465 _vmem_alloc(int ri, size_t slab_size) 1466 { 1467 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1468 char *eaddr; 1469 char *addr; 1470 char *save; 1471 uintptr_t excess; 1472 1473 if (slab_size < NREGIONS_SIZE) 1474 eaddr = baddr + NREGIONS_SIZE; 1475 else 1476 eaddr = baddr + slab_size; 1477 1478 /* 1479 * This usually just works but might not. 1480 */ 1481 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1482 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1483 if (addr == MAP_FAILED) { 1484 return (NULL); 1485 } 1486 if (addr < baddr || addr + slab_size > eaddr) { 1487 munmap(addr, slab_size); 1488 return (NULL); 1489 } 1490 1491 /* 1492 * Check alignment. The misaligned offset is also the excess 1493 * amount. If misaligned unmap the excess so we have a chance of 1494 * mapping at the next alignment point and recursively try again. 1495 * 1496 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1497 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1498 * xxxxxxxxx final excess calculation 1499 * ^ returned address 1500 */ 1501 excess = (uintptr_t)addr & (slab_size - 1); 1502 while (excess) { 1503 excess = slab_size - excess; 1504 save = addr; 1505 1506 munmap(save + excess, slab_size - excess); 1507 addr = _vmem_alloc(ri, slab_size); 1508 munmap(save, excess); 1509 if (addr == NULL) 1510 return (NULL); 1511 if (addr < baddr || addr + slab_size > eaddr) { 1512 munmap(addr, slab_size); 1513 return (NULL); 1514 } 1515 excess = (uintptr_t)addr & (slab_size - 1); 1516 } 1517 return (addr); 1518 } 1519 1520 /* 1521 * _vmem_free() 1522 * 1523 * Free a chunk of memory allocated with _vmem_alloc() 1524 */ 1525 static void 1526 _vmem_free(void *ptr, size_t size) 1527 { 1528 munmap(ptr, size); 1529 } 1530 1531 /* 1532 * Panic on fatal conditions 1533 */ 1534 static void 1535 _mpanic(const char *ctl, ...) 1536 { 1537 va_list va; 1538 1539 if (malloc_panic == 0) { 1540 malloc_panic = 1; 1541 va_start(va, ctl); 1542 vfprintf(stderr, ctl, va); 1543 fprintf(stderr, "\n"); 1544 fflush(stderr); 1545 va_end(va); 1546 } 1547 abort(); 1548 } 1549