1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator drop-in replacement for 38 * the libc malloc(). The slab algorithm has been adjusted to support dynamic 39 * sizing of slabs which effectively allows slabs to be used for allocations of 40 * any size. Because of this we neither have a small-block allocator or a 41 * big-block allocator and the code paths are simplified to the point where 42 * allocations, caching, and freeing, is screaming fast. 43 * 44 * There is very little interaction between threads. A global depot accessed 45 * via atomic cmpxchg instructions (only! no spinlocks!) is used as a 46 * catch-all and to deal with thread exits and such. 47 * 48 * To support dynamic slab sizing available user virtual memory is broken 49 * down into ~1024 regions. Each region has fixed slab size whos value is 50 * set when the region is opened up for use. The free() path simply applies 51 * a mask based on the region to the pointer to acquire the base of the 52 * governing slab structure. 53 * 54 * Regions[NREGIONS] (1024) 55 * 56 * Slab management and locking is done on a per-zone basis. 57 * 58 * Alloc Size Chunking Number of zones 59 * 0-127 8 16 60 * 128-255 16 8 61 * 256-511 32 8 62 * 512-1023 64 8 63 * 1024-2047 128 8 64 * 2048-4095 256 8 65 * 4096-8191 512 8 66 * 8192-16383 1024 8 67 * 16384-32767 2048 8 68 * 32768-65535 4096 8 69 * ... continues unlimited ... 4 zones 70 * 71 * For a 2^63 memory space each doubling >= 64K is broken down into 72 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 73 * 74 * API FEATURES AND SIDE EFFECTS 75 * 76 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 77 * Above that power-of-2 sized allocations are page-aligned. Non 78 * power-of-2 sized allocations are aligned the same as the chunk 79 * size for their zone. 80 * + malloc(0) returns a special non-NULL value 81 * + ability to allocate arbitrarily large chunks of memory 82 * + realloc will reuse the passed pointer if possible, within the 83 * limitations of the zone chunking. 84 * 85 * FUTURE FEATURES 86 * 87 * + [better] garbage collection 88 * + better initial sizing. 89 * 90 * TUNING 91 * 92 * The value of the environment variable MALLOC_OPTIONS is a character string 93 * containing various flags to tune nmalloc. Upper case letters enabled 94 * or increase the feature, lower case disables or decreases the feature. 95 * 96 * U Enable UTRACE for all operations, observable with ktrace. 97 * Diasbled by default. 98 * 99 * Z Zero out allocations, otherwise allocations (except for 100 * calloc) will contain garbage. 101 * Disabled by default. 102 * 103 * H Pass a hint with madvise() about unused pages. 104 * Disabled by default. 105 * Not currently implemented. 106 * 107 * F Disable local per-thread caching. 108 * Disabled by default. 109 * 110 * C Increase (decrease) how much excess cache to retain. 111 * Set to 4 by default. 112 */ 113 114 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 115 116 #ifndef STANDALONE_DEBUG 117 #include "libc_private.h" 118 #endif 119 120 #include <sys/param.h> 121 #include <sys/types.h> 122 #include <sys/mman.h> 123 #include <sys/queue.h> 124 #include <sys/uio.h> 125 #include <sys/ktrace.h> 126 #include <stdio.h> 127 #include <stdint.h> 128 #include <stdlib.h> 129 #include <stdarg.h> 130 #include <stddef.h> 131 #include <unistd.h> 132 #include <string.h> 133 #include <fcntl.h> 134 #include <errno.h> 135 #include <pthread.h> 136 #include <limits.h> 137 138 #include <machine/atomic.h> 139 #include <machine/cpufunc.h> 140 141 #ifdef STANDALONE_DEBUG 142 void _nmalloc_thr_init(void); 143 #else 144 #include "spinlock.h" 145 #include "un-namespace.h" 146 #endif 147 148 #ifndef MAP_SIZEALIGN 149 #define MAP_SIZEALIGN 0 150 #endif 151 152 #if SSIZE_MAX == 0x7FFFFFFF 153 #define ADDRBITS 32 154 #define UVM_BITS 32 /* worst case */ 155 #else 156 #define ADDRBITS 64 157 #define UVM_BITS 48 /* worst case XXX */ 158 #endif 159 160 #if LONG_MAX == 0x7FFFFFFF 161 #define LONG_BITS 32 162 #define LONG_BITS_SHIFT 5 163 #else 164 #define LONG_BITS 64 165 #define LONG_BITS_SHIFT 6 166 #endif 167 168 #define LOCKEDPTR ((void *)(intptr_t)-1) 169 170 /* 171 * Regions[] 172 */ 173 #define NREGIONS_BITS 10 174 #define NREGIONS (1 << NREGIONS_BITS) 175 #define NREGIONS_MASK (NREGIONS - 1) 176 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 177 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 178 179 typedef struct region *region_t; 180 typedef struct slglobaldata *slglobaldata_t; 181 typedef struct slab *slab_t; 182 183 struct region { 184 uintptr_t mask; 185 slab_t slab; /* conditional out of band slab */ 186 }; 187 188 static struct region Regions[NREGIONS]; 189 190 /* 191 * Number of chunking zones available 192 */ 193 #define CHUNKFACTOR 8 194 #if ADDRBITS == 32 195 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 196 #else 197 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 198 #endif 199 200 static int MaxChunks[NZONES]; 201 202 #define NDEPOTS 8 /* must be power of 2 */ 203 204 /* 205 * Maximum number of chunks per slab, governed by the allocation bitmap in 206 * each slab. The maximum is reduced for large chunk sizes. 207 */ 208 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 209 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 210 #define LITSLABSIZE (32 * 1024) 211 #define NOMSLABSIZE (2 * 1024 * 1024) 212 #define BIGSLABSIZE (128 * 1024 * 1024) 213 214 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 215 216 TAILQ_HEAD(slab_list, slab); 217 218 /* 219 * A slab structure 220 */ 221 struct slab { 222 struct slab *next; /* slabs with available space */ 223 TAILQ_ENTRY(slab) entry; 224 int32_t magic; /* magic number for sanity check */ 225 u_int navail; /* number of free elements available */ 226 u_int nmax; 227 u_int free_bit; /* free hint bitno */ 228 u_int free_index; /* free hint index */ 229 u_long bitmap[LONG_BITS]; /* free chunks */ 230 size_t slab_size; /* size of entire slab */ 231 size_t chunk_size; /* chunk size for validation */ 232 int zone_index; 233 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 234 int flags; 235 region_t region; /* related region */ 236 char *chunks; /* chunk base */ 237 slglobaldata_t slgd; 238 }; 239 240 /* 241 * per-thread data 242 */ 243 struct slglobaldata { 244 struct zoneinfo { 245 slab_t avail_base; 246 slab_t empty_base; 247 int best_region; 248 int empty_count; 249 } zone[NZONES]; 250 struct slab_list full_zones; /* via entry */ 251 int masked; 252 int biggest_index; 253 size_t nslabs; 254 slglobaldata_t sldepot; 255 }; 256 257 #define SLAB_ZEROD 0x0001 258 259 /* 260 * Misc constants. Note that allocations that are exact multiples of 261 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 262 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 263 */ 264 #define MIN_CHUNK_SIZE 8 /* in bytes */ 265 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 266 267 #define SAFLAG_ZERO 0x00000001 268 269 /* 270 * The WEIRD_ADDR is used as known text to copy into free objects to 271 * try to create deterministic failure cases if the data is accessed after 272 * free. 273 * 274 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 275 * not be larger then 64. 276 */ 277 #define WEIRD_ADDR 0xdeadc0de 278 #define MAX_COPY sizeof(weirdary) 279 280 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 281 282 /* 283 * Thread control 284 */ 285 286 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 287 _mpanic("assertion: %s in %s", \ 288 #exp, __func__); \ 289 } while (0) 290 291 /* With this attribute set, do not require a function call for accessing 292 * this variable when the code is compiled -fPIC */ 293 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 294 295 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 296 static pthread_key_t thread_malloc_key; 297 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 298 static struct slglobaldata sldepots[NDEPOTS]; 299 300 static int opt_madvise = 0; 301 static int opt_free = 0; 302 static int opt_cache = 4; 303 static int opt_utrace = 0; 304 static int g_malloc_flags = 0; 305 static int malloc_panic; 306 static int malloc_started = 0; 307 308 static const int32_t weirdary[16] = { 309 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 310 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 311 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 312 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 313 }; 314 315 static void *memalloc(size_t size, int flags); 316 static void *memrealloc(void *ptr, size_t size); 317 static void memfree(void *ptr, int); 318 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 319 static void slabfree(slab_t slab); 320 static void slabterm(slglobaldata_t slgd, slab_t slab); 321 static void *_vmem_alloc(int ri, size_t slab_size); 322 static void _vmem_free(void *ptr, size_t slab_size); 323 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 324 #ifndef STANDALONE_DEBUG 325 static void malloc_init(void) __constructor(101); 326 #else 327 static void malloc_init(void) __constructor(101); 328 #endif 329 330 331 struct nmalloc_utrace { 332 void *p; 333 size_t s; 334 void *r; 335 }; 336 337 #define UTRACE(a, b, c) \ 338 if (opt_utrace) { \ 339 struct nmalloc_utrace ut = { \ 340 .p = (a), \ 341 .s = (b), \ 342 .r = (c) \ 343 }; \ 344 utrace(&ut, sizeof(ut)); \ 345 } 346 347 #ifdef INVARIANTS 348 /* 349 * If enabled any memory allocated without M_ZERO is initialized to -1. 350 */ 351 static int use_malloc_pattern; 352 #endif 353 354 static void 355 malloc_init(void) 356 { 357 const char *p = NULL; 358 static spinlock_t malloc_init_lock; 359 360 if (malloc_started) 361 return; 362 363 if (__isthreaded) { 364 _SPINLOCK(&malloc_init_lock); 365 if (malloc_started) { 366 _SPINUNLOCK(&malloc_init_lock); 367 return; 368 } 369 } 370 371 Regions[0].mask = -1; /* disallow activity in lowest region */ 372 373 if (issetugid() == 0) 374 p = getenv("MALLOC_OPTIONS"); 375 376 for (; p != NULL && *p != '\0'; p++) { 377 switch(*p) { 378 case 'u': 379 opt_utrace = 0; 380 break; 381 case 'U': 382 opt_utrace = 1; 383 break; 384 case 'h': 385 opt_madvise = 0; 386 break; 387 case 'H': 388 opt_madvise = 1; 389 break; 390 case 'c': 391 if (opt_cache > 0) 392 --opt_cache; 393 break; 394 case 'C': 395 ++opt_cache; 396 break; 397 case 'f': 398 opt_free = 0; 399 break; 400 case 'F': 401 opt_free = 1; 402 break; 403 case 'z': 404 g_malloc_flags = 0; 405 break; 406 case 'Z': 407 g_malloc_flags = SAFLAG_ZERO; 408 break; 409 default: 410 break; 411 } 412 } 413 414 UTRACE((void *) -1, 0, NULL); 415 _nmalloc_thr_init(); 416 malloc_started = 1; 417 418 if (__isthreaded) 419 _SPINUNLOCK(&malloc_init_lock); 420 } 421 422 /* 423 * We have to install a handler for nmalloc thread teardowns when 424 * the thread is created. We cannot delay this because destructors in 425 * sophisticated userland programs can call malloc() for the first time 426 * during their thread exit. 427 * 428 * This routine is called directly from pthreads. 429 */ 430 static void _nmalloc_thr_init_once(void); 431 static void _nmalloc_thr_destructor(void *thrp); 432 433 void 434 _nmalloc_thr_init(void) 435 { 436 static int did_init; 437 static int SLGI; 438 int slgi; 439 440 slgi = SLGI++; 441 cpu_ccfence(); 442 TAILQ_INIT(&slglobal.full_zones); 443 slglobal.sldepot = &sldepots[slgi & (NDEPOTS - 1)]; 444 445 if (slglobal.masked) 446 return; 447 448 slglobal.masked = 1; 449 if (did_init == 0) { 450 did_init = 1; 451 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 452 } 453 pthread_setspecific(thread_malloc_key, &slglobal); 454 slglobal.masked = 0; 455 } 456 457 /* 458 * Called just once 459 */ 460 static void 461 _nmalloc_thr_init_once(void) 462 { 463 int error; 464 465 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 466 if (error) 467 abort(); 468 } 469 470 /* 471 * Called for each thread undergoing exit 472 * 473 * Move all of the thread's slabs into a depot. 474 */ 475 static void 476 _nmalloc_thr_destructor(void *thrp) 477 { 478 slglobaldata_t slgd = thrp; 479 slab_t slab; 480 int i; 481 482 slgd->masked = 1; 483 484 for (i = 0; i <= slgd->biggest_index; i++) { 485 while ((slab = slgd->zone[i].empty_base) != NULL) { 486 slgd->zone[i].empty_base = slab->next; 487 slabterm(slgd, slab); 488 } 489 490 while ((slab = slgd->zone[i].avail_base) != NULL) { 491 slgd->zone[i].avail_base = slab->next; 492 slabterm(slgd, slab); 493 } 494 495 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 496 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 497 slabterm(slgd, slab); 498 } 499 } 500 } 501 502 /* 503 * Calculate the zone index for the allocation request size and set the 504 * allocation request size to that particular zone's chunk size. 505 */ 506 static __inline int 507 zoneindex(size_t *bytes, size_t *chunking) 508 { 509 size_t n = (size_t)*bytes; 510 size_t x; 511 size_t c; 512 int i; 513 514 if (n < 128) { 515 *bytes = n = (n + 7) & ~7; 516 *chunking = 8; 517 return(n / 8); /* 8 byte chunks, 16 zones */ 518 } 519 if (n < 4096) { 520 x = 256; 521 c = x / (CHUNKFACTOR * 2); 522 i = 16; 523 } else { 524 x = 8192; 525 c = x / (CHUNKFACTOR * 2); 526 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 527 } 528 while (n >= x) { 529 x <<= 1; 530 c <<= 1; 531 i += CHUNKFACTOR; 532 if (x == 0) 533 _mpanic("slaballoc: byte value too high"); 534 } 535 *bytes = n = roundup2(n, c); 536 *chunking = c; 537 return (i + n / c - CHUNKFACTOR); 538 #if 0 539 *bytes = n = (n + c - 1) & ~(c - 1); 540 *chunking = c; 541 return (n / c + i); 542 543 if (n < 256) { 544 *bytes = n = (n + 15) & ~15; 545 *chunking = 16; 546 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 547 } 548 if (n < 8192) { 549 if (n < 512) { 550 *bytes = n = (n + 31) & ~31; 551 *chunking = 32; 552 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 553 } 554 if (n < 1024) { 555 *bytes = n = (n + 63) & ~63; 556 *chunking = 64; 557 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 558 } 559 if (n < 2048) { 560 *bytes = n = (n + 127) & ~127; 561 *chunking = 128; 562 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 563 } 564 if (n < 4096) { 565 *bytes = n = (n + 255) & ~255; 566 *chunking = 256; 567 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 568 } 569 *bytes = n = (n + 511) & ~511; 570 *chunking = 512; 571 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 572 } 573 if (n < 16384) { 574 *bytes = n = (n + 1023) & ~1023; 575 *chunking = 1024; 576 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 577 } 578 if (n < 32768) { /* 16384-32767 */ 579 *bytes = n = (n + 2047) & ~2047; 580 *chunking = 2048; 581 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 582 } 583 if (n < 65536) { 584 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 585 *chunking = 4096; 586 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 587 } 588 589 x = 131072; 590 c = 8192; 591 i = CHUNKINGLO*10 - 1; 592 593 while (n >= x) { 594 x <<= 1; 595 c <<= 1; 596 i += CHUNKINGHI; 597 if (x == 0) 598 _mpanic("slaballoc: byte value too high"); 599 } 600 *bytes = n = (n + c - 1) & ~(c - 1); 601 *chunking = c; 602 return (n / c + i); 603 #endif 604 } 605 606 /* 607 * malloc() - call internal slab allocator 608 */ 609 void * 610 malloc(size_t size) 611 { 612 void *ptr; 613 614 ptr = memalloc(size, 0); 615 if (ptr == NULL) 616 errno = ENOMEM; 617 else 618 UTRACE(0, size, ptr); 619 return(ptr); 620 } 621 622 /* 623 * calloc() - call internal slab allocator 624 */ 625 void * 626 calloc(size_t number, size_t size) 627 { 628 void *ptr; 629 630 ptr = memalloc(number * size, SAFLAG_ZERO); 631 if (ptr == NULL) 632 errno = ENOMEM; 633 else 634 UTRACE(0, number * size, ptr); 635 return(ptr); 636 } 637 638 /* 639 * realloc() (SLAB ALLOCATOR) 640 * 641 * We do not attempt to optimize this routine beyond reusing the same 642 * pointer if the new size fits within the chunking of the old pointer's 643 * zone. 644 */ 645 void * 646 realloc(void *ptr, size_t size) 647 { 648 void *ret; 649 650 if (ptr == NULL) 651 ret = memalloc(size, 0); 652 else 653 ret = memrealloc(ptr, size); 654 if (ret == NULL) 655 errno = ENOMEM; 656 else 657 UTRACE(ptr, size, ret); 658 return(ret); 659 } 660 661 /* 662 * posix_memalign() 663 * 664 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 665 * is a power of 2 >= sizeof(void *). 666 * 667 * The slab allocator will allocate on power-of-2 boundaries up to at least 668 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 669 * matching the requirements. 670 */ 671 int 672 posix_memalign(void **memptr, size_t alignment, size_t size) 673 { 674 /* 675 * OpenGroup spec issue 6 checks 676 */ 677 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 678 *memptr = NULL; 679 return(EINVAL); 680 } 681 if (alignment < sizeof(void *)) { 682 *memptr = NULL; 683 return(EINVAL); 684 } 685 686 /* 687 * XXX for now just find the nearest power of 2 >= size and also 688 * >= alignment and allocate that. 689 */ 690 while (alignment < size) { 691 alignment <<= 1; 692 if (alignment == 0) 693 _mpanic("posix_memalign: byte value too high"); 694 } 695 *memptr = memalloc(alignment, 0); 696 return(*memptr ? 0 : ENOMEM); 697 } 698 699 /* 700 * free() (SLAB ALLOCATOR) - do the obvious 701 */ 702 void 703 free(void *ptr) 704 { 705 if (ptr) { 706 UTRACE(ptr, 0, 0); 707 memfree(ptr, 0); 708 } 709 } 710 711 /* 712 * memalloc() (SLAB ALLOCATOR) 713 * 714 * Allocate memory via the slab allocator. 715 */ 716 static void * 717 memalloc(size_t size, int flags) 718 { 719 slglobaldata_t slgd; 720 struct zoneinfo *zinfo; 721 slab_t slab; 722 size_t chunking; 723 int bmi; 724 int bno; 725 u_long *bmp; 726 int zi; 727 #ifdef INVARIANTS 728 int i; 729 #endif 730 size_t off; 731 char *obj; 732 733 if (!malloc_started) 734 malloc_init(); 735 736 /* 737 * If 0 bytes is requested we have to return a unique pointer, allocate 738 * at least one byte. 739 */ 740 if (size == 0) 741 size = 1; 742 743 /* Capture global flags */ 744 flags |= g_malloc_flags; 745 746 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 747 zi = zoneindex(&size, &chunking); 748 MASSERT(zi < NZONES); 749 if (size == 0) 750 return(NULL); 751 752 /* 753 * Locate a slab with available space. If no slabs are available 754 * back-off to the empty list and if we still come up dry allocate 755 * a new slab (which will try the depot first). 756 */ 757 retry: 758 slgd = &slglobal; 759 zinfo = &slgd->zone[zi]; 760 if ((slab = zinfo->avail_base) == NULL) { 761 if ((slab = zinfo->empty_base) == NULL) { 762 /* 763 * Still dry 764 */ 765 slab = slaballoc(zi, chunking, size); 766 if (slab == NULL) 767 return(NULL); 768 slab->next = zinfo->avail_base; 769 zinfo->avail_base = slab; 770 slab->state = AVAIL; 771 if (slgd->biggest_index < zi) 772 slgd->biggest_index = zi; 773 ++slgd->nslabs; 774 } else { 775 /* 776 * Pulled from empty list 777 */ 778 zinfo->empty_base = slab->next; 779 slab->next = zinfo->avail_base; 780 zinfo->avail_base = slab; 781 slab->state = AVAIL; 782 --zinfo->empty_count; 783 } 784 } 785 786 /* 787 * Allocate a chunk out of the slab. HOT PATH 788 * 789 * Only the thread owning the slab can allocate out of it. 790 * 791 * NOTE: The last bit in the bitmap is always marked allocated so 792 * we cannot overflow here. 793 */ 794 bno = slab->free_bit; 795 bmi = slab->free_index; 796 bmp = &slab->bitmap[bmi]; 797 if (*bmp & (1LU << bno)) { 798 atomic_clear_long(bmp, 1LU << bno); 799 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 800 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 801 atomic_add_int(&slab->navail, -1); 802 if (flags & SAFLAG_ZERO) 803 bzero(obj, size); 804 return (obj); 805 } 806 807 /* 808 * Allocate a chunk out of a slab. COLD PATH 809 */ 810 if (slab->navail == 0) { 811 zinfo->avail_base = slab->next; 812 slab->state = FULL; 813 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 814 goto retry; 815 } 816 817 while (bmi < LONG_BITS) { 818 bmp = &slab->bitmap[bmi]; 819 if (*bmp) { 820 bno = bsflong(*bmp); 821 atomic_clear_long(bmp, 1LU << bno); 822 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 823 size; 824 slab->free_index = bmi; 825 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 826 atomic_add_int(&slab->navail, -1); 827 if (flags & SAFLAG_ZERO) 828 bzero(obj, size); 829 return (obj); 830 } 831 ++bmi; 832 } 833 bmi = 0; 834 while (bmi < LONG_BITS) { 835 bmp = &slab->bitmap[bmi]; 836 if (*bmp) { 837 bno = bsflong(*bmp); 838 atomic_clear_long(bmp, 1LU << bno); 839 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 840 size; 841 slab->free_index = bmi; 842 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 843 atomic_add_int(&slab->navail, -1); 844 if (flags & SAFLAG_ZERO) 845 bzero(obj, size); 846 return (obj); 847 } 848 ++bmi; 849 } 850 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 851 /* not reached */ 852 return NULL; 853 } 854 855 /* 856 * Reallocate memory within the chunk 857 */ 858 static void * 859 memrealloc(void *ptr, size_t nsize) 860 { 861 region_t region; 862 slab_t slab; 863 size_t osize; 864 char *obj; 865 int flags = 0; 866 867 /* 868 * If 0 bytes is requested we have to return a unique pointer, allocate 869 * at least one byte. 870 */ 871 if (nsize == 0) 872 nsize = 1; 873 874 /* Capture global flags */ 875 flags |= g_malloc_flags; 876 877 /* 878 * Locate the zone by looking up the dynamic slab size mask based 879 * on the memory region the allocation resides in. 880 */ 881 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 882 if ((slab = region->slab) == NULL) 883 slab = (void *)((uintptr_t)ptr & region->mask); 884 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 885 osize = slab->chunk_size; 886 if (nsize <= osize) { 887 if (osize < 32 || nsize >= osize / 2) { 888 obj = ptr; 889 if ((flags & SAFLAG_ZERO) && nsize < osize) 890 bzero(obj + nsize, osize - nsize); 891 return(obj); 892 } 893 } 894 895 /* 896 * Otherwise resize the object 897 */ 898 obj = memalloc(nsize, 0); 899 if (obj) { 900 if (nsize > osize) 901 nsize = osize; 902 bcopy(ptr, obj, nsize); 903 memfree(ptr, 0); 904 } 905 return (obj); 906 } 907 908 /* 909 * free (SLAB ALLOCATOR) 910 * 911 * Free a memory block previously allocated by malloc. 912 * 913 * MPSAFE 914 */ 915 static void 916 memfree(void *ptr, int flags) 917 { 918 region_t region; 919 slglobaldata_t slgd; 920 slab_t slab; 921 slab_t stmp; 922 slab_t *slabp; 923 char *obj; 924 int bmi; 925 int bno; 926 u_long *bmp; 927 928 /* 929 * Locate the zone by looking up the dynamic slab size mask based 930 * on the memory region the allocation resides in. 931 * 932 * WARNING! The slab may be owned by another thread! 933 */ 934 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 935 if ((slab = region->slab) == NULL) 936 slab = (void *)((uintptr_t)ptr & region->mask); 937 MASSERT(slab != NULL); 938 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 939 940 #ifdef INVARIANTS 941 /* 942 * Put weird data into the memory to detect modifications after 943 * freeing, illegal pointer use after freeing (we should fault on 944 * the odd address), and so forth. 945 */ 946 if (slab->chunk_size < sizeof(weirdary)) 947 bcopy(weirdary, ptr, slab->chunk_size); 948 else 949 bcopy(weirdary, ptr, sizeof(weirdary)); 950 #endif 951 952 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 953 bmi = bno >> LONG_BITS_SHIFT; 954 bno &= (LONG_BITS - 1); 955 bmp = &slab->bitmap[bmi]; 956 957 MASSERT(bmi >= 0 && bmi < slab->nmax); 958 MASSERT((*bmp & (1LU << bno)) == 0); 959 atomic_set_long(bmp, 1LU << bno); 960 atomic_add_int(&slab->navail, 1); 961 962 /* 963 * We can only do the following if we own the slab 964 */ 965 slgd = &slglobal; 966 if (slab->slgd == slgd) { 967 struct zoneinfo *zinfo; 968 969 if (slab->free_index > bmi) { 970 slab->free_index = bmi; 971 slab->free_bit = bno; 972 } else if (slab->free_index == bmi && 973 slab->free_bit > bno) { 974 slab->free_bit = bno; 975 } 976 zinfo = &slgd->zone[slab->zone_index]; 977 978 /* 979 * Freeing an object from a full slab will move it to the 980 * available list. If the available list already has a 981 * slab we terminate the full slab instead, moving it to 982 * the depot. 983 */ 984 if (slab->state == FULL) { 985 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 986 if (zinfo->avail_base == NULL) { 987 slab->state = AVAIL; 988 stmp = zinfo->avail_base; 989 slab->next = stmp; 990 zinfo->avail_base = slab; 991 } else { 992 slabterm(slgd, slab); 993 goto done; 994 } 995 } 996 997 /* 998 * If the slab becomes completely empty dispose of it in 999 * some manner. By default each thread caches up to 4 1000 * empty slabs. Only small slabs are cached. 1001 */ 1002 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1003 /* 1004 * Remove slab from available queue 1005 */ 1006 slabp = &zinfo->avail_base; 1007 while ((stmp = *slabp) != slab) 1008 slabp = &stmp->next; 1009 *slabp = slab->next; 1010 1011 if (opt_free || opt_cache == 0) { 1012 /* 1013 * If local caching is disabled cache the 1014 * slab in the depot (or free it). 1015 */ 1016 slabterm(slgd, slab); 1017 } else if (slab->slab_size > BIGSLABSIZE) { 1018 /* 1019 * We do not try to retain large slabs 1020 * in per-thread caches. 1021 */ 1022 slabterm(slgd, slab); 1023 } else if (zinfo->empty_count > opt_cache) { 1024 /* 1025 * We have too many slabs cached, but 1026 * instead of freeing this one free 1027 * an empty slab that's been idle longer. 1028 * 1029 * (empty_count does not change) 1030 */ 1031 stmp = zinfo->empty_base; 1032 slab->state = EMPTY; 1033 slab->next = stmp->next; 1034 zinfo->empty_base = slab; 1035 slabterm(slgd, stmp); 1036 } else { 1037 /* 1038 * Cache the empty slab in our thread local 1039 * empty list. 1040 */ 1041 ++zinfo->empty_count; 1042 slab->state = EMPTY; 1043 slab->next = zinfo->empty_base; 1044 zinfo->empty_base = slab; 1045 } 1046 } 1047 } 1048 done: 1049 ; 1050 } 1051 1052 /* 1053 * Allocate a new slab holding objects of size chunk_size. 1054 */ 1055 static slab_t 1056 slaballoc(int zi, size_t chunking, size_t chunk_size) 1057 { 1058 slglobaldata_t slgd; 1059 slglobaldata_t sldepot; 1060 struct zoneinfo *zinfo; 1061 region_t region; 1062 void *save; 1063 slab_t slab; 1064 slab_t stmp; 1065 size_t slab_desire; 1066 size_t slab_size; 1067 size_t region_mask; 1068 uintptr_t chunk_offset; 1069 ssize_t maxchunks; 1070 ssize_t tmpchunks; 1071 int ispower2; 1072 int power; 1073 int ri; 1074 int rx; 1075 int nswath; 1076 int j; 1077 1078 /* 1079 * First look in the depot. Any given zone in the depot may be 1080 * locked by being set to -1. We have to do this instead of simply 1081 * removing the entire chain because removing the entire chain can 1082 * cause racing threads to allocate local slabs for large objects, 1083 * resulting in a large VSZ. 1084 */ 1085 slgd = &slglobal; 1086 sldepot = slgd->sldepot; 1087 zinfo = &sldepot->zone[zi]; 1088 1089 while ((slab = zinfo->avail_base) != NULL) { 1090 if ((void *)slab == LOCKEDPTR) { 1091 cpu_pause(); 1092 continue; 1093 } 1094 if (atomic_cmpset_ptr(&zinfo->avail_base, slab, LOCKEDPTR)) { 1095 MASSERT(slab->slgd == NULL); 1096 slab->slgd = slgd; 1097 zinfo->avail_base = slab->next; 1098 return(slab); 1099 } 1100 } 1101 1102 /* 1103 * Nothing in the depot, allocate a new slab by locating or assigning 1104 * a region and then using the system virtual memory allocator. 1105 */ 1106 slab = NULL; 1107 1108 /* 1109 * Calculate the start of the data chunks relative to the start 1110 * of the slab. 1111 */ 1112 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1113 ispower2 = 1; 1114 chunk_offset = roundup2(sizeof(*slab), chunk_size); 1115 } else { 1116 ispower2 = 0; 1117 chunk_offset = sizeof(*slab) + chunking - 1; 1118 chunk_offset -= chunk_offset % chunking; 1119 } 1120 1121 /* 1122 * Calculate a reasonable number of chunks for the slab. 1123 * 1124 * Once initialized the MaxChunks[] array can only ever be 1125 * reinitialized to the same value. 1126 */ 1127 maxchunks = MaxChunks[zi]; 1128 if (maxchunks == 0) { 1129 /* 1130 * First calculate how many chunks would fit in 1/1024 1131 * available memory. This is around 2MB on a 32 bit 1132 * system and 128G on a 64-bit (48-bits available) system. 1133 */ 1134 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1135 (ssize_t)chunk_size; 1136 if (maxchunks <= 0) 1137 maxchunks = 1; 1138 1139 /* 1140 * A slab cannot handle more than MAXCHUNKS chunks, but 1141 * limit us to approximately MAXCHUNKS / 2 here because 1142 * we may have to expand maxchunks when we calculate the 1143 * actual power-of-2 slab. 1144 */ 1145 if (maxchunks > MAXCHUNKS / 2) 1146 maxchunks = MAXCHUNKS / 2; 1147 1148 /* 1149 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1150 * slabs will be created if the allocation does not fit. 1151 */ 1152 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1153 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1154 (ssize_t)chunk_size; 1155 if (tmpchunks <= 0) 1156 tmpchunks = 1; 1157 if (maxchunks > tmpchunks) 1158 maxchunks = tmpchunks; 1159 } 1160 1161 /* 1162 * If the slab calculates to greater than 2MB see if we 1163 * can cut it down to ~2MB. This controls VSZ but has 1164 * no effect on run-time size or performance. 1165 * 1166 * This is very important in case you core dump and also 1167 * important to reduce unnecessary region allocations. 1168 */ 1169 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1170 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1171 (ssize_t)chunk_size; 1172 if (tmpchunks < 1) 1173 tmpchunks = 1; 1174 if (maxchunks > tmpchunks) 1175 maxchunks = tmpchunks; 1176 } 1177 1178 /* 1179 * If the slab calculates to greater than 128K see if we 1180 * can cut it down to ~128K while still maintaining a 1181 * reasonably large number of chunks in each slab. This 1182 * controls VSZ but has no effect on run-time size or 1183 * performance. 1184 * 1185 * This is very important in case you core dump and also 1186 * important to reduce unnecessary region allocations. 1187 */ 1188 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1189 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1190 (ssize_t)chunk_size; 1191 if (tmpchunks < 32) 1192 tmpchunks = 32; 1193 if (maxchunks > tmpchunks) 1194 maxchunks = tmpchunks; 1195 } 1196 1197 MaxChunks[zi] = maxchunks; 1198 } 1199 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1200 1201 /* 1202 * Calculate the actual slab size. maxchunks will be recalculated 1203 * a little later. 1204 */ 1205 slab_desire = chunk_offset + chunk_size * maxchunks; 1206 slab_size = 8 * MAXCHUNKS; 1207 power = 3 + MAXCHUNKS_BITS; 1208 while (slab_size < slab_desire) { 1209 slab_size <<= 1; 1210 ++power; 1211 } 1212 1213 /* 1214 * Do a quick recalculation based on the actual slab size but not 1215 * yet dealing with whether the slab header is in-band or out-of-band. 1216 * The purpose here is to see if we can reasonably reduce slab_size 1217 * to a power of 4 to allow more chunk sizes to use the same slab 1218 * size. 1219 */ 1220 if ((power & 1) && slab_size > 32768) { 1221 maxchunks = (slab_size - chunk_offset) / chunk_size; 1222 if (maxchunks >= MAXCHUNKS / 8) { 1223 slab_size >>= 1; 1224 --power; 1225 } 1226 } 1227 if ((power & 2) && slab_size > 32768 * 4) { 1228 maxchunks = (slab_size - chunk_offset) / chunk_size; 1229 if (maxchunks >= MAXCHUNKS / 4) { 1230 slab_size >>= 2; 1231 power -= 2; 1232 } 1233 } 1234 /* 1235 * This case occurs when the slab_size is larger than 1/1024 available 1236 * UVM. 1237 */ 1238 nswath = slab_size / NREGIONS_SIZE; 1239 if (nswath > NREGIONS) 1240 return (NULL); 1241 1242 1243 /* 1244 * Try to allocate from our current best region for this zi 1245 */ 1246 region_mask = ~(slab_size - 1); 1247 ri = slgd->zone[zi].best_region; 1248 if (Regions[ri].mask == region_mask) { 1249 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1250 goto found; 1251 } 1252 1253 /* 1254 * Try to find an existing region to allocate from. The normal 1255 * case will be for allocations that are less than 1/1024 available 1256 * UVM, which fit into a single Regions[] entry. 1257 */ 1258 while (slab_size <= NREGIONS_SIZE) { 1259 rx = -1; 1260 for (ri = 0; ri < NREGIONS; ++ri) { 1261 if (rx < 0 && Regions[ri].mask == 0) 1262 rx = ri; 1263 if (Regions[ri].mask == region_mask) { 1264 slab = _vmem_alloc(ri, slab_size); 1265 if (slab) { 1266 slgd->zone[zi].best_region = ri; 1267 goto found; 1268 } 1269 } 1270 } 1271 1272 if (rx < 0) 1273 return(NULL); 1274 1275 /* 1276 * This can fail, retry either way 1277 */ 1278 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1279 NULL, 1280 (void *)region_mask); 1281 } 1282 1283 for (;;) { 1284 rx = -1; 1285 for (ri = 0; ri < NREGIONS; ri += nswath) { 1286 if (Regions[ri].mask == region_mask) { 1287 slab = _vmem_alloc(ri, slab_size); 1288 if (slab) { 1289 slgd->zone[zi].best_region = ri; 1290 goto found; 1291 } 1292 } 1293 if (rx < 0) { 1294 for (j = nswath - 1; j >= 0; --j) { 1295 if (Regions[ri+j].mask != 0) 1296 break; 1297 } 1298 if (j < 0) 1299 rx = ri; 1300 } 1301 } 1302 1303 /* 1304 * We found a candidate, try to allocate it backwards so 1305 * another thread racing a slaballoc() does not see the 1306 * mask in the base index position until we are done. 1307 * 1308 * We can safely zero-out any partial allocations because 1309 * the mask is only accessed from the base index. Any other 1310 * threads racing us will fail prior to us clearing the mask. 1311 */ 1312 if (rx < 0) 1313 return(NULL); 1314 for (j = nswath - 1; j >= 0; --j) { 1315 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1316 NULL, (void *)region_mask)) { 1317 while (++j < nswath) 1318 Regions[rx+j].mask = 0; 1319 break; 1320 } 1321 } 1322 /* retry */ 1323 } 1324 1325 /* 1326 * Fill in the new slab in region ri. If the slab_size completely 1327 * fills one or more region slots we move the slab structure out of 1328 * band which should optimize the chunking (particularly for a power 1329 * of 2). 1330 */ 1331 found: 1332 region = &Regions[ri]; 1333 MASSERT(region->slab == NULL); 1334 if (slab_size >= NREGIONS_SIZE) { 1335 save = slab; 1336 slab = memalloc(sizeof(*slab), 0); 1337 bzero(slab, sizeof(*slab)); 1338 slab->chunks = save; 1339 for (j = 0; j < nswath; ++j) 1340 region[j].slab = slab; 1341 chunk_offset = 0; 1342 } else { 1343 bzero(slab, sizeof(*slab)); 1344 slab->chunks = (char *)slab + chunk_offset; 1345 } 1346 1347 /* 1348 * Calculate the start of the chunks memory and recalculate the 1349 * actual number of chunks the slab can hold. 1350 */ 1351 maxchunks = (slab_size - chunk_offset) / chunk_size; 1352 if (maxchunks > MAXCHUNKS) 1353 maxchunks = MAXCHUNKS; 1354 1355 /* 1356 * And fill in the rest 1357 */ 1358 slab->magic = ZALLOC_SLAB_MAGIC; 1359 slab->navail = maxchunks; 1360 slab->nmax = maxchunks; 1361 slab->slab_size = slab_size; 1362 slab->chunk_size = chunk_size; 1363 slab->zone_index = zi; 1364 slab->slgd = slgd; 1365 slab->state = UNKNOWN; 1366 slab->region = region; 1367 1368 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1369 if (ri + LONG_BITS <= maxchunks) 1370 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1371 else 1372 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1373 (1LU << (maxchunks - ri)) - 1; 1374 } 1375 return (slab); 1376 } 1377 1378 /* 1379 * Free a slab. 1380 */ 1381 static void 1382 slabfree(slab_t slab) 1383 { 1384 int nswath; 1385 int j; 1386 1387 if (slab->region->slab == slab) { 1388 /* 1389 * Out-of-band slab. 1390 */ 1391 nswath = slab->slab_size / NREGIONS_SIZE; 1392 for (j = 0; j < nswath; ++j) 1393 slab->region[j].slab = NULL; 1394 slab->magic = 0; 1395 _vmem_free(slab->chunks, slab->slab_size); 1396 memfree(slab, 0); 1397 } else { 1398 /* 1399 * In-band slab. 1400 */ 1401 slab->magic = 0; 1402 _vmem_free(slab, slab->slab_size); 1403 } 1404 } 1405 1406 /* 1407 * Terminate a slab's use in the current thread. The slab may still have 1408 * outstanding allocations and thus not be deallocatable. 1409 */ 1410 static void 1411 slabterm(slglobaldata_t slgd, slab_t slab) 1412 { 1413 slglobaldata_t sldepot = slgd->sldepot; 1414 struct zoneinfo *zinfo; 1415 slab_t dnext; 1416 int zi = slab->zone_index; 1417 1418 slab->slgd = NULL; 1419 --slgd->nslabs; 1420 zinfo = &sldepot->zone[zi]; 1421 1422 /* 1423 * If the slab can be freed and the depot is either locked or not 1424 * empty, then free the slab. 1425 */ 1426 if (slab->navail == slab->nmax && zinfo->avail_base) { 1427 slab->state = UNKNOWN; 1428 slabfree(slab); 1429 return; 1430 } 1431 slab->state = AVAIL; 1432 1433 /* 1434 * Link the slab into the depot 1435 */ 1436 for (;;) { 1437 dnext = zinfo->avail_base; 1438 cpu_ccfence(); 1439 if ((void *)dnext == LOCKEDPTR) { 1440 cpu_pause(); 1441 continue; 1442 } 1443 slab->next = dnext; 1444 if (atomic_cmpset_ptr(&zinfo->avail_base, dnext, slab)) 1445 break; 1446 } 1447 } 1448 1449 /* 1450 * _vmem_alloc() 1451 * 1452 * Directly map memory in PAGE_SIZE'd chunks with the specified 1453 * alignment. 1454 * 1455 * Alignment must be a multiple of PAGE_SIZE. 1456 * 1457 * Size must be >= alignment. 1458 */ 1459 static void * 1460 _vmem_alloc(int ri, size_t slab_size) 1461 { 1462 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1463 char *eaddr; 1464 char *addr; 1465 char *save; 1466 uintptr_t excess; 1467 1468 if (slab_size < NREGIONS_SIZE) 1469 eaddr = baddr + NREGIONS_SIZE; 1470 else 1471 eaddr = baddr + slab_size; 1472 1473 /* 1474 * This usually just works but might not. 1475 */ 1476 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1477 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1478 if (addr == MAP_FAILED) { 1479 return (NULL); 1480 } 1481 if (addr < baddr || addr + slab_size > eaddr) { 1482 munmap(addr, slab_size); 1483 return (NULL); 1484 } 1485 1486 /* 1487 * Check alignment. The misaligned offset is also the excess 1488 * amount. If misaligned unmap the excess so we have a chance of 1489 * mapping at the next alignment point and recursively try again. 1490 * 1491 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1492 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1493 * xxxxxxxxx final excess calculation 1494 * ^ returned address 1495 */ 1496 excess = (uintptr_t)addr & (slab_size - 1); 1497 while (excess) { 1498 excess = slab_size - excess; 1499 save = addr; 1500 1501 munmap(save + excess, slab_size - excess); 1502 addr = _vmem_alloc(ri, slab_size); 1503 munmap(save, excess); 1504 if (addr == NULL) 1505 return (NULL); 1506 if (addr < baddr || addr + slab_size > eaddr) { 1507 munmap(addr, slab_size); 1508 return (NULL); 1509 } 1510 excess = (uintptr_t)addr & (slab_size - 1); 1511 } 1512 return (addr); 1513 } 1514 1515 /* 1516 * _vmem_free() 1517 * 1518 * Free a chunk of memory allocated with _vmem_alloc() 1519 */ 1520 static void 1521 _vmem_free(void *ptr, size_t size) 1522 { 1523 munmap(ptr, size); 1524 } 1525 1526 /* 1527 * Panic on fatal conditions 1528 */ 1529 static void 1530 _mpanic(const char *ctl, ...) 1531 { 1532 va_list va; 1533 1534 if (malloc_panic == 0) { 1535 malloc_panic = 1; 1536 va_start(va, ctl); 1537 vfprintf(stderr, ctl, va); 1538 fprintf(stderr, "\n"); 1539 fflush(stderr); 1540 va_end(va); 1541 } 1542 abort(); 1543 } 1544