1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator as a drop-in replacement 38 * for the libc malloc(). The slab algorithm has been adjusted to support 39 * dynamic sizing of slabs which effectively allows slabs to be used for 40 * allocations of any size. Because of this we neither have a small-block 41 * allocator or a big-block allocator and the code paths are simplified. 42 * 43 * To support dynamic slab sizing available user virtual memory is broken 44 * down into ~1024 regions. Each region has fixed slab size whos value is 45 * set when the region is opened up for use. The free() path simply applies 46 * a mask based on the region to the pointer to acquire the base of the 47 * governing slab structure. 48 * 49 * Regions[NREGIONS] (1024) 50 * 51 * Slab management and locking is done on a per-zone basis. 52 * 53 * Alloc Size Chunking Number of zones 54 * 0-127 8 16 55 * 128-255 16 8 56 * 256-511 32 8 57 * 512-1023 64 8 58 * 1024-2047 128 8 59 * 2048-4095 256 8 60 * 4096-8191 512 8 61 * 8192-16383 1024 8 62 * 16384-32767 2048 8 63 * 32768-65535 4096 8 64 * ... continues forever ... 4 zones 65 * 66 * For a 2^63 memory space each doubling >= 64K is broken down into 67 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 68 * 69 * API FEATURES AND SIDE EFFECTS 70 * 71 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 72 * Above that power-of-2 sized allocations are page-aligned. Non 73 * power-of-2 sized allocations are aligned the same as the chunk 74 * size for their zone. 75 * + ability to allocate arbitrarily large chunks of memory 76 * + realloc will reuse the passed pointer if possible, within the 77 * limitations of the zone chunking. 78 * 79 * On top of the slab allocator we also implement a 16-entry-per-thread 80 * magazine cache for allocations <= NOMSLABSIZE. 81 * 82 * FUTURE FEATURES 83 * 84 * + [better] garbage collection 85 * + better initial sizing. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. Upper case letters enabled 91 * or increase the feature, lower case disables or decreases the feature. 92 * 93 * U Enable UTRACE for all operations, observable with ktrace. 94 * Diasbled by default. 95 * 96 * Z Zero out allocations, otherwise allocations (except for 97 * calloc) will contain garbage. 98 * Disabled by default. 99 * 100 * H Pass a hint with madvise() about unused pages. 101 * Disabled by default. 102 * Not currently implemented. 103 * 104 * F Disable local per-thread caching. 105 * Disabled by default. 106 * 107 * C Increase (decrease) how much excess cache to retain. 108 * Set to 4 by default. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 112 113 #ifndef STANDALONE_DEBUG 114 #include "libc_private.h" 115 #endif 116 117 #include <sys/param.h> 118 #include <sys/types.h> 119 #include <sys/mman.h> 120 #include <sys/queue.h> 121 #include <sys/ktrace.h> 122 #include <stdio.h> 123 #include <stdint.h> 124 #include <stdlib.h> 125 #include <stdarg.h> 126 #include <stddef.h> 127 #include <unistd.h> 128 #include <string.h> 129 #include <fcntl.h> 130 #include <errno.h> 131 #include <pthread.h> 132 #include <limits.h> 133 134 #include <machine/atomic.h> 135 #include <machine/cpufunc.h> 136 137 #ifdef STANDALONE_DEBUG 138 void _nmalloc_thr_init(void); 139 #else 140 #include "spinlock.h" 141 #include "un-namespace.h" 142 #endif 143 144 #ifndef MAP_SIZEALIGN 145 #define MAP_SIZEALIGN 0 146 #endif 147 148 #if SSIZE_MAX == 0x7FFFFFFF 149 #define ADDRBITS 32 150 #define UVM_BITS 32 /* worst case */ 151 #else 152 #define ADDRBITS 64 153 #define UVM_BITS 48 /* worst case XXX */ 154 #endif 155 156 #if LONG_MAX == 0x7FFFFFFF 157 #define LONG_BITS 32 158 #define LONG_BITS_SHIFT 5 159 #else 160 #define LONG_BITS 64 161 #define LONG_BITS_SHIFT 6 162 #endif 163 164 #define LOCKEDPTR ((void *)(intptr_t)-1) 165 166 /* 167 * Regions[] 168 */ 169 #define NREGIONS_BITS 10 170 #define NREGIONS (1 << NREGIONS_BITS) 171 #define NREGIONS_MASK (NREGIONS - 1) 172 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 173 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 174 175 typedef struct region *region_t; 176 typedef struct slglobaldata *slglobaldata_t; 177 typedef struct slab *slab_t; 178 179 struct region { 180 uintptr_t mask; 181 slab_t slab; /* conditional out of band slab */ 182 }; 183 184 static struct region Regions[NREGIONS]; 185 186 /* 187 * Number of chunking zones available 188 */ 189 #define CHUNKFACTOR 8 190 #if ADDRBITS == 32 191 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 192 #else 193 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 194 #endif 195 196 static int MaxChunks[NZONES]; 197 198 #define NDEPOTS 8 /* must be power of 2 */ 199 200 /* 201 * Maximum number of chunks per slab, governed by the allocation bitmap in 202 * each slab. The maximum is reduced for large chunk sizes. 203 */ 204 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 205 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 206 #define LITSLABSIZE (32 * 1024) 207 #define NOMSLABSIZE (2 * 1024 * 1024) 208 #define BIGSLABSIZE (128 * 1024 * 1024) 209 210 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 211 212 TAILQ_HEAD(slab_list, slab); 213 214 /* 215 * A slab structure 216 */ 217 struct slab { 218 struct slab *next; /* slabs with available space */ 219 TAILQ_ENTRY(slab) entry; 220 int32_t magic; /* magic number for sanity check */ 221 u_int navail; /* number of free elements available */ 222 u_int nmax; 223 u_int free_bit; /* free hint bitno */ 224 u_int free_index; /* free hint index */ 225 u_long bitmap[LONG_BITS]; /* free chunks */ 226 size_t slab_size; /* size of entire slab */ 227 size_t chunk_size; /* chunk size for validation */ 228 int zone_index; 229 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 230 int flags; 231 region_t region; /* related region */ 232 char *chunks; /* chunk base */ 233 slglobaldata_t slgd; /* localized to thread else NULL */ 234 }; 235 236 /* 237 * per-thread data + global depot 238 * 239 * NOTE: The magazine shortcut is only used for per-thread data. 240 */ 241 #define NMAGSHORTCUT 16 242 243 struct slglobaldata { 244 spinlock_t lock; /* only used by slglobaldepot */ 245 struct zoneinfo { 246 slab_t avail_base; 247 slab_t empty_base; 248 int best_region; 249 int mag_index; 250 int avail_count; 251 int empty_count; 252 void *mag_shortcut[NMAGSHORTCUT]; 253 } zone[NZONES]; 254 struct slab_list full_zones; /* via entry */ 255 int masked; 256 int biggest_index; 257 size_t nslabs; 258 }; 259 260 #define SLAB_ZEROD 0x0001 261 262 /* 263 * Misc constants. Note that allocations that are exact multiples of 264 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 265 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 266 */ 267 #define MIN_CHUNK_SIZE 8 /* in bytes */ 268 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 269 270 #define SAFLAG_ZERO 0x00000001 271 272 /* 273 * The WEIRD_ADDR is used as known text to copy into free objects to 274 * try to create deterministic failure cases if the data is accessed after 275 * free. 276 * 277 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 278 * not be larger then 64. 279 */ 280 #ifdef INVARIANTS 281 #define WEIRD_ADDR 0xdeadc0de 282 #endif 283 284 /* 285 * Thread control 286 */ 287 288 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 289 _mpanic("assertion: %s in %s", \ 290 #exp, __func__); \ 291 } while (0) 292 293 /* 294 * With this attribute set, do not require a function call for accessing 295 * this variable when the code is compiled -fPIC. 296 * 297 * Must be empty for libc_rtld (similar to __thread) 298 */ 299 #if defined(__LIBC_RTLD) 300 #define TLS_ATTRIBUTE 301 #else 302 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 303 #endif 304 305 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 306 static pthread_key_t thread_malloc_key; 307 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 308 static struct slglobaldata slglobaldepot; 309 310 static int opt_madvise = 0; 311 static int opt_free = 0; 312 static int opt_cache = 4; 313 static int opt_utrace = 0; 314 static int g_malloc_flags = 0; 315 static int malloc_panic; 316 317 #ifdef INVARIANTS 318 static const int32_t weirdary[16] = { 319 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 320 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 321 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 322 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 323 }; 324 #endif 325 326 static void *memalloc(size_t size, int flags); 327 static void *memrealloc(void *ptr, size_t size); 328 static void memfree(void *ptr, int); 329 static int memalign(void **memptr, size_t alignment, size_t size); 330 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 331 static void slabfree(slab_t slab); 332 static void slabterm(slglobaldata_t slgd, slab_t slab); 333 static void *_vmem_alloc(int ri, size_t slab_size); 334 static void _vmem_free(void *ptr, size_t slab_size); 335 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 336 #ifndef STANDALONE_DEBUG 337 static void malloc_init(void) __constructor(101); 338 #else 339 static void malloc_init(void) __constructor(101); 340 #endif 341 342 343 struct nmalloc_utrace { 344 void *p; 345 size_t s; 346 void *r; 347 }; 348 349 #define UTRACE(a, b, c) \ 350 if (opt_utrace) { \ 351 struct nmalloc_utrace ut = { \ 352 .p = (a), \ 353 .s = (b), \ 354 .r = (c) \ 355 }; \ 356 utrace(&ut, sizeof(ut)); \ 357 } 358 359 #ifdef INVARIANTS 360 /* 361 * If enabled any memory allocated without M_ZERO is initialized to -1. 362 */ 363 static int use_malloc_pattern; 364 #endif 365 366 static void 367 malloc_init(void) 368 { 369 const char *p = NULL; 370 371 TAILQ_INIT(&slglobal.full_zones); 372 373 Regions[0].mask = -1; /* disallow activity in lowest region */ 374 375 if (issetugid() == 0) 376 p = getenv("MALLOC_OPTIONS"); 377 378 for (; p != NULL && *p != '\0'; p++) { 379 switch(*p) { 380 case 'u': 381 opt_utrace = 0; 382 break; 383 case 'U': 384 opt_utrace = 1; 385 break; 386 case 'h': 387 opt_madvise = 0; 388 break; 389 case 'H': 390 opt_madvise = 1; 391 break; 392 case 'c': 393 if (opt_cache > 0) 394 --opt_cache; 395 break; 396 case 'C': 397 ++opt_cache; 398 break; 399 case 'f': 400 opt_free = 0; 401 break; 402 case 'F': 403 opt_free = 1; 404 break; 405 case 'z': 406 g_malloc_flags = 0; 407 break; 408 case 'Z': 409 g_malloc_flags = SAFLAG_ZERO; 410 break; 411 default: 412 break; 413 } 414 } 415 416 UTRACE((void *) -1, 0, NULL); 417 } 418 419 /* 420 * We have to install a handler for nmalloc thread teardowns when 421 * the thread is created. We cannot delay this because destructors in 422 * sophisticated userland programs can call malloc() for the first time 423 * during their thread exit. 424 * 425 * This routine is called directly from pthreads. 426 */ 427 static void _nmalloc_thr_init_once(void); 428 static void _nmalloc_thr_destructor(void *thrp); 429 430 void 431 _nmalloc_thr_init(void) 432 { 433 static int did_init; 434 435 TAILQ_INIT(&slglobal.full_zones); 436 437 if (slglobal.masked) 438 return; 439 440 slglobal.masked = 1; 441 if (did_init == 0) { 442 did_init = 1; 443 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 444 } 445 pthread_setspecific(thread_malloc_key, &slglobal); 446 slglobal.masked = 0; 447 } 448 449 void 450 _nmalloc_thr_prepfork(void) 451 { 452 if (__isthreaded) 453 _SPINLOCK(&slglobaldepot.lock); 454 } 455 456 void 457 _nmalloc_thr_parentfork(void) 458 { 459 if (__isthreaded) 460 _SPINUNLOCK(&slglobaldepot.lock); 461 } 462 463 void 464 _nmalloc_thr_childfork(void) 465 { 466 if (__isthreaded) 467 _SPINUNLOCK(&slglobaldepot.lock); 468 } 469 470 /* 471 * Called just once 472 */ 473 static void 474 _nmalloc_thr_init_once(void) 475 { 476 int error; 477 478 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 479 if (error) 480 abort(); 481 } 482 483 /* 484 * Called for each thread undergoing exit 485 * 486 * Move all of the thread's slabs into a depot. 487 */ 488 static void 489 _nmalloc_thr_destructor(void *thrp) 490 { 491 slglobaldata_t slgd = thrp; 492 struct zoneinfo *zinfo; 493 slab_t slab; 494 void *ptr; 495 int i; 496 int j; 497 498 slgd->masked = 1; 499 500 for (i = 0; i <= slgd->biggest_index; i++) { 501 zinfo = &slgd->zone[i]; 502 503 while ((j = zinfo->mag_index) > 0) { 504 --j; 505 ptr = zinfo->mag_shortcut[j]; 506 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 507 zinfo->mag_index = j; 508 memfree(ptr, 0); 509 } 510 511 while ((slab = zinfo->empty_base) != NULL) { 512 zinfo->empty_base = slab->next; 513 --zinfo->empty_count; 514 slabterm(slgd, slab); 515 } 516 517 while ((slab = zinfo->avail_base) != NULL) { 518 zinfo->avail_base = slab->next; 519 --zinfo->avail_count; 520 slabterm(slgd, slab); 521 } 522 523 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 524 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 525 slabterm(slgd, slab); 526 } 527 } 528 } 529 530 /* 531 * Calculate the zone index for the allocation request size and set the 532 * allocation request size to that particular zone's chunk size. 533 * 534 * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform 535 * with malloc requirements for intel/amd. 536 */ 537 static __inline int 538 zoneindex(size_t *bytes, size_t *chunking) 539 { 540 size_t n = (size_t)*bytes; 541 size_t x; 542 size_t c; 543 int i; 544 545 if (n < 128) { 546 if (n < 16) { 547 *bytes = n = (n + 7) & ~7; 548 *chunking = 8; 549 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 550 } else { 551 *bytes = n = (n + 15) & ~15; 552 *chunking = 16; 553 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 554 } 555 } 556 if (n < 4096) { 557 x = 256; 558 c = x / (CHUNKFACTOR * 2); 559 i = 16; 560 } else { 561 x = 8192; 562 c = x / (CHUNKFACTOR * 2); 563 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 564 } 565 while (n >= x) { 566 x <<= 1; 567 c <<= 1; 568 i += CHUNKFACTOR; 569 if (x == 0) 570 _mpanic("slaballoc: byte value too high"); 571 } 572 *bytes = n = roundup2(n, c); 573 *chunking = c; 574 return (i + n / c - CHUNKFACTOR); 575 #if 0 576 *bytes = n = (n + c - 1) & ~(c - 1); 577 *chunking = c; 578 return (n / c + i); 579 580 if (n < 256) { 581 *bytes = n = (n + 15) & ~15; 582 *chunking = 16; 583 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 584 } 585 if (n < 8192) { 586 if (n < 512) { 587 *bytes = n = (n + 31) & ~31; 588 *chunking = 32; 589 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 590 } 591 if (n < 1024) { 592 *bytes = n = (n + 63) & ~63; 593 *chunking = 64; 594 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 595 } 596 if (n < 2048) { 597 *bytes = n = (n + 127) & ~127; 598 *chunking = 128; 599 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 600 } 601 if (n < 4096) { 602 *bytes = n = (n + 255) & ~255; 603 *chunking = 256; 604 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 605 } 606 *bytes = n = (n + 511) & ~511; 607 *chunking = 512; 608 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 609 } 610 if (n < 16384) { 611 *bytes = n = (n + 1023) & ~1023; 612 *chunking = 1024; 613 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 614 } 615 if (n < 32768) { /* 16384-32767 */ 616 *bytes = n = (n + 2047) & ~2047; 617 *chunking = 2048; 618 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 619 } 620 if (n < 65536) { 621 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 622 *chunking = 4096; 623 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 624 } 625 626 x = 131072; 627 c = 8192; 628 i = CHUNKINGLO*10 - 1; 629 630 while (n >= x) { 631 x <<= 1; 632 c <<= 1; 633 i += CHUNKINGHI; 634 if (x == 0) 635 _mpanic("slaballoc: byte value too high"); 636 } 637 *bytes = n = (n + c - 1) & ~(c - 1); 638 *chunking = c; 639 return (n / c + i); 640 #endif 641 } 642 643 /* 644 * malloc() - call internal slab allocator 645 */ 646 void * 647 __malloc(size_t size) 648 { 649 void *ptr; 650 651 ptr = memalloc(size, 0); 652 if (ptr == NULL) 653 errno = ENOMEM; 654 else 655 UTRACE(0, size, ptr); 656 return(ptr); 657 } 658 659 /* 660 * calloc() - call internal slab allocator 661 */ 662 void * 663 __calloc(size_t number, size_t size) 664 { 665 void *ptr; 666 667 ptr = memalloc(number * size, SAFLAG_ZERO); 668 if (ptr == NULL) 669 errno = ENOMEM; 670 else 671 UTRACE(0, number * size, ptr); 672 return(ptr); 673 } 674 675 /* 676 * realloc() (SLAB ALLOCATOR) 677 * 678 * We do not attempt to optimize this routine beyond reusing the same 679 * pointer if the new size fits within the chunking of the old pointer's 680 * zone. 681 */ 682 void * 683 __realloc(void *ptr, size_t size) 684 { 685 void *ret; 686 687 if (ptr == NULL) 688 ret = memalloc(size, 0); 689 else 690 ret = memrealloc(ptr, size); 691 if (ret == NULL) 692 errno = ENOMEM; 693 else 694 UTRACE(ptr, size, ret); 695 return(ret); 696 } 697 698 /* 699 * aligned_alloc() 700 * 701 * Allocate (size) bytes with a alignment of (alignment). 702 */ 703 void * 704 __aligned_alloc(size_t alignment, size_t size) 705 { 706 void *ptr; 707 int rc; 708 709 ptr = NULL; 710 rc = memalign(&ptr, alignment, size); 711 if (rc) 712 errno = rc; 713 714 return (ptr); 715 } 716 717 /* 718 * posix_memalign() 719 * 720 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 721 * is a power of 2 >= sizeof(void *). 722 */ 723 int 724 __posix_memalign(void **memptr, size_t alignment, size_t size) 725 { 726 int rc; 727 728 /* 729 * OpenGroup spec issue 6 check 730 */ 731 if (alignment < sizeof(void *)) { 732 *memptr = NULL; 733 return(EINVAL); 734 } 735 736 rc = memalign(memptr, alignment, size); 737 738 return (rc); 739 } 740 741 /* 742 * The slab allocator will allocate on power-of-2 boundaries up to at least 743 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 744 * matching the requirements. 745 */ 746 static int 747 memalign(void **memptr, size_t alignment, size_t size) 748 { 749 750 if (alignment < 1) { 751 *memptr = NULL; 752 return(EINVAL); 753 } 754 755 /* 756 * OpenGroup spec issue 6 check 757 */ 758 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 759 *memptr = NULL; 760 return(EINVAL); 761 } 762 763 /* 764 * XXX for now just find the nearest power of 2 >= size and also 765 * >= alignment and allocate that. 766 */ 767 while (alignment < size) { 768 alignment <<= 1; 769 if (alignment == 0) 770 _mpanic("posix_memalign: byte value too high"); 771 } 772 *memptr = memalloc(alignment, 0); 773 return(*memptr ? 0 : ENOMEM); 774 } 775 776 /* 777 * free() (SLAB ALLOCATOR) - do the obvious 778 */ 779 void 780 __free(void *ptr) 781 { 782 if (ptr) { 783 UTRACE(ptr, 0, 0); 784 memfree(ptr, 0); 785 } 786 } 787 788 /* 789 * memalloc() (SLAB ALLOCATOR) 790 * 791 * Allocate memory via the slab allocator. 792 */ 793 static void * 794 memalloc(size_t size, int flags) 795 { 796 slglobaldata_t slgd; 797 struct zoneinfo *zinfo; 798 slab_t slab; 799 size_t chunking; 800 int bmi; 801 int bno; 802 u_long *bmp; 803 int zi; 804 #ifdef INVARIANTS 805 int i; 806 #endif 807 int j; 808 char *obj; 809 810 /* 811 * If 0 bytes is requested we have to return a unique pointer, allocate 812 * at least one byte. 813 */ 814 if (size == 0) 815 size = 1; 816 817 /* Capture global flags */ 818 flags |= g_malloc_flags; 819 820 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 821 zi = zoneindex(&size, &chunking); 822 MASSERT(zi < NZONES); 823 if (size == 0) 824 return(NULL); 825 826 /* 827 * Try magazine shortcut first 828 */ 829 slgd = &slglobal; 830 zinfo = &slgd->zone[zi]; 831 832 if ((j = zinfo->mag_index) != 0) { 833 zinfo->mag_index = --j; 834 obj = zinfo->mag_shortcut[j]; 835 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 836 if (flags & SAFLAG_ZERO) 837 bzero(obj, size); 838 return obj; 839 } 840 841 /* 842 * Locate a slab with available space. If no slabs are available 843 * back-off to the empty list and if we still come up dry allocate 844 * a new slab (which will try the depot first). 845 */ 846 retry: 847 if ((slab = zinfo->avail_base) == NULL) { 848 if ((slab = zinfo->empty_base) == NULL) { 849 /* 850 * Still dry 851 */ 852 slab = slaballoc(zi, chunking, size); 853 if (slab == NULL) 854 return(NULL); 855 slab->next = zinfo->avail_base; 856 zinfo->avail_base = slab; 857 ++zinfo->avail_count; 858 slab->state = AVAIL; 859 if (slgd->biggest_index < zi) 860 slgd->biggest_index = zi; 861 ++slgd->nslabs; 862 } else { 863 /* 864 * Pulled from empty list 865 */ 866 zinfo->empty_base = slab->next; 867 slab->next = zinfo->avail_base; 868 zinfo->avail_base = slab; 869 ++zinfo->avail_count; 870 slab->state = AVAIL; 871 --zinfo->empty_count; 872 } 873 } 874 875 /* 876 * Allocate a chunk out of the slab. HOT PATH 877 * 878 * Only the thread owning the slab can allocate out of it. 879 * 880 * NOTE: The last bit in the bitmap is always marked allocated so 881 * we cannot overflow here. 882 */ 883 bno = slab->free_bit; 884 bmi = slab->free_index; 885 bmp = &slab->bitmap[bmi]; 886 if (*bmp & (1LU << bno)) { 887 atomic_clear_long(bmp, 1LU << bno); 888 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 889 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 890 atomic_add_int(&slab->navail, -1); 891 if (flags & SAFLAG_ZERO) 892 bzero(obj, size); 893 return (obj); 894 } 895 896 /* 897 * Allocate a chunk out of a slab. COLD PATH 898 */ 899 if (slab->navail == 0) { 900 zinfo->avail_base = slab->next; 901 --zinfo->avail_count; 902 slab->state = FULL; 903 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 904 goto retry; 905 } 906 907 while (bmi < LONG_BITS) { 908 bmp = &slab->bitmap[bmi]; 909 if (*bmp) { 910 bno = bsflong(*bmp); 911 atomic_clear_long(bmp, 1LU << bno); 912 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 913 size; 914 slab->free_index = bmi; 915 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 916 atomic_add_int(&slab->navail, -1); 917 if (flags & SAFLAG_ZERO) 918 bzero(obj, size); 919 return (obj); 920 } 921 ++bmi; 922 } 923 bmi = 0; 924 while (bmi < LONG_BITS) { 925 bmp = &slab->bitmap[bmi]; 926 if (*bmp) { 927 bno = bsflong(*bmp); 928 atomic_clear_long(bmp, 1LU << bno); 929 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 930 size; 931 slab->free_index = bmi; 932 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 933 atomic_add_int(&slab->navail, -1); 934 if (flags & SAFLAG_ZERO) 935 bzero(obj, size); 936 return (obj); 937 } 938 ++bmi; 939 } 940 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 941 /* not reached */ 942 return NULL; 943 } 944 945 /* 946 * Reallocate memory within the chunk 947 */ 948 static void * 949 memrealloc(void *ptr, size_t nsize) 950 { 951 region_t region; 952 slab_t slab; 953 size_t osize; 954 char *obj; 955 int flags = 0; 956 957 /* 958 * If 0 bytes is requested we have to return a unique pointer, allocate 959 * at least one byte. 960 */ 961 if (nsize == 0) 962 nsize = 1; 963 964 /* Capture global flags */ 965 flags |= g_malloc_flags; 966 967 /* 968 * Locate the zone by looking up the dynamic slab size mask based 969 * on the memory region the allocation resides in. 970 */ 971 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 972 if ((slab = region->slab) == NULL) 973 slab = (void *)((uintptr_t)ptr & region->mask); 974 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 975 osize = slab->chunk_size; 976 if (nsize <= osize) { 977 if (osize < 32 || nsize >= osize / 2) { 978 obj = ptr; 979 if ((flags & SAFLAG_ZERO) && nsize < osize) 980 bzero(obj + nsize, osize - nsize); 981 return(obj); 982 } 983 } 984 985 /* 986 * Otherwise resize the object 987 */ 988 obj = memalloc(nsize, 0); 989 if (obj) { 990 if (nsize > osize) 991 nsize = osize; 992 bcopy(ptr, obj, nsize); 993 memfree(ptr, 0); 994 } 995 return (obj); 996 } 997 998 /* 999 * free (SLAB ALLOCATOR) 1000 * 1001 * Free a memory block previously allocated by malloc. 1002 * 1003 * MPSAFE 1004 */ 1005 static void 1006 memfree(void *ptr, int flags) 1007 { 1008 region_t region; 1009 slglobaldata_t slgd; 1010 slab_t slab; 1011 slab_t stmp; 1012 slab_t *slabp; 1013 int bmi; 1014 int bno; 1015 int j; 1016 u_long *bmp; 1017 1018 /* 1019 * Locate the zone by looking up the dynamic slab size mask based 1020 * on the memory region the allocation resides in. 1021 * 1022 * WARNING! The slab may be owned by another thread! 1023 */ 1024 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 1025 if ((slab = region->slab) == NULL) 1026 slab = (void *)((uintptr_t)ptr & region->mask); 1027 MASSERT(slab != NULL); 1028 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 1029 1030 #ifdef INVARIANTS 1031 /* 1032 * Put weird data into the memory to detect modifications after 1033 * freeing, illegal pointer use after freeing (we should fault on 1034 * the odd address), and so forth. 1035 */ 1036 if (slab->chunk_size < sizeof(weirdary)) 1037 bcopy(weirdary, ptr, slab->chunk_size); 1038 else 1039 bcopy(weirdary, ptr, sizeof(weirdary)); 1040 #endif 1041 slgd = &slglobal; 1042 1043 /* 1044 * Use mag_shortcut[] when possible 1045 */ 1046 if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) { 1047 struct zoneinfo *zinfo; 1048 1049 zinfo = &slgd->zone[slab->zone_index]; 1050 j = zinfo->mag_index; 1051 if (j < NMAGSHORTCUT) { 1052 zinfo->mag_shortcut[j] = ptr; 1053 zinfo->mag_index = j + 1; 1054 return; 1055 } 1056 } 1057 1058 /* 1059 * Free to slab and increment navail. We can delay incrementing 1060 * navail to prevent the slab from being destroyed out from under 1061 * us while we do other optimizations. 1062 */ 1063 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 1064 bmi = bno >> LONG_BITS_SHIFT; 1065 bno &= (LONG_BITS - 1); 1066 bmp = &slab->bitmap[bmi]; 1067 1068 MASSERT(bmi >= 0 && bmi < slab->nmax); 1069 MASSERT((*bmp & (1LU << bno)) == 0); 1070 atomic_set_long(bmp, 1LU << bno); 1071 1072 if (slab->slgd == slgd) { 1073 /* 1074 * We can only do the following if we own the slab. Note 1075 * that navail can be incremented by any thread even if 1076 * we own the slab. 1077 */ 1078 struct zoneinfo *zinfo; 1079 1080 atomic_add_int(&slab->navail, 1); 1081 if (slab->free_index > bmi) { 1082 slab->free_index = bmi; 1083 slab->free_bit = bno; 1084 } else if (slab->free_index == bmi && 1085 slab->free_bit > bno) { 1086 slab->free_bit = bno; 1087 } 1088 zinfo = &slgd->zone[slab->zone_index]; 1089 1090 /* 1091 * Freeing an object from a full slab makes it less than 1092 * full. The slab must be moved to the available list. 1093 * 1094 * If the available list has too many slabs, release some 1095 * to the depot. 1096 */ 1097 if (slab->state == FULL) { 1098 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 1099 slab->state = AVAIL; 1100 stmp = zinfo->avail_base; 1101 slab->next = stmp; 1102 zinfo->avail_base = slab; 1103 ++zinfo->avail_count; 1104 while (zinfo->avail_count > opt_cache) { 1105 slab = zinfo->avail_base; 1106 zinfo->avail_base = slab->next; 1107 --zinfo->avail_count; 1108 slabterm(slgd, slab); 1109 } 1110 goto done; 1111 } 1112 1113 /* 1114 * If the slab becomes completely empty dispose of it in 1115 * some manner. By default each thread caches up to 4 1116 * empty slabs. Only small slabs are cached. 1117 */ 1118 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1119 /* 1120 * Remove slab from available queue 1121 */ 1122 slabp = &zinfo->avail_base; 1123 while ((stmp = *slabp) != slab) 1124 slabp = &stmp->next; 1125 *slabp = slab->next; 1126 --zinfo->avail_count; 1127 1128 if (opt_free || opt_cache == 0) { 1129 /* 1130 * If local caching is disabled cache the 1131 * slab in the depot (or free it). 1132 */ 1133 slabterm(slgd, slab); 1134 } else if (slab->slab_size > BIGSLABSIZE) { 1135 /* 1136 * We do not try to retain large slabs 1137 * in per-thread caches. 1138 */ 1139 slabterm(slgd, slab); 1140 } else if (zinfo->empty_count > opt_cache) { 1141 /* 1142 * We have too many slabs cached, but 1143 * instead of freeing this one free 1144 * an empty slab that's been idle longer. 1145 * 1146 * (empty_count does not change) 1147 */ 1148 stmp = zinfo->empty_base; 1149 slab->state = EMPTY; 1150 slab->next = stmp->next; 1151 zinfo->empty_base = slab; 1152 slabterm(slgd, stmp); 1153 } else { 1154 /* 1155 * Cache the empty slab in our thread local 1156 * empty list. 1157 */ 1158 ++zinfo->empty_count; 1159 slab->state = EMPTY; 1160 slab->next = zinfo->empty_base; 1161 zinfo->empty_base = slab; 1162 } 1163 } 1164 } else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1165 slglobaldata_t sldepot; 1166 1167 /* 1168 * If freeing to a slab owned by the global depot, and 1169 * the slab becomes completely EMPTY, try to move it to 1170 * the correct list. 1171 */ 1172 sldepot = &slglobaldepot; 1173 if (__isthreaded) 1174 _SPINLOCK(&sldepot->lock); 1175 if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1176 struct zoneinfo *zinfo; 1177 1178 /* 1179 * Move the slab to the empty list 1180 */ 1181 MASSERT(slab->state == AVAIL); 1182 atomic_add_int(&slab->navail, 1); 1183 zinfo = &sldepot->zone[slab->zone_index]; 1184 slabp = &zinfo->avail_base; 1185 while (slab != *slabp) 1186 slabp = &(*slabp)->next; 1187 *slabp = slab->next; 1188 --zinfo->avail_count; 1189 1190 /* 1191 * Clean out excessive empty entries from the 1192 * depot. 1193 */ 1194 slab->state = EMPTY; 1195 slab->next = zinfo->empty_base; 1196 zinfo->empty_base = slab; 1197 ++zinfo->empty_count; 1198 while (zinfo->empty_count > opt_cache) { 1199 slab = zinfo->empty_base; 1200 zinfo->empty_base = slab->next; 1201 --zinfo->empty_count; 1202 slab->state = UNKNOWN; 1203 if (__isthreaded) 1204 _SPINUNLOCK(&sldepot->lock); 1205 slabfree(slab); 1206 if (__isthreaded) 1207 _SPINLOCK(&sldepot->lock); 1208 } 1209 } else { 1210 atomic_add_int(&slab->navail, 1); 1211 } 1212 if (__isthreaded) 1213 _SPINUNLOCK(&sldepot->lock); 1214 } else { 1215 /* 1216 * We can't act on the slab other than by adjusting navail 1217 * (and the bitmap which we did in the common code at the 1218 * top). 1219 */ 1220 atomic_add_int(&slab->navail, 1); 1221 } 1222 done: 1223 ; 1224 } 1225 1226 /* 1227 * Allocate a new slab holding objects of size chunk_size. 1228 */ 1229 static slab_t 1230 slaballoc(int zi, size_t chunking, size_t chunk_size) 1231 { 1232 slglobaldata_t slgd; 1233 slglobaldata_t sldepot; 1234 struct zoneinfo *zinfo; 1235 region_t region; 1236 void *save; 1237 slab_t slab; 1238 size_t slab_desire; 1239 size_t slab_size; 1240 size_t region_mask; 1241 uintptr_t chunk_offset; 1242 ssize_t maxchunks; 1243 ssize_t tmpchunks; 1244 int ispower2; 1245 int power; 1246 int ri; 1247 int rx; 1248 int nswath; 1249 int j; 1250 1251 /* 1252 * First look in the depot. Any given zone in the depot may be 1253 * locked by being set to -1. We have to do this instead of simply 1254 * removing the entire chain because removing the entire chain can 1255 * cause racing threads to allocate local slabs for large objects, 1256 * resulting in a large VSZ. 1257 */ 1258 slgd = &slglobal; 1259 sldepot = &slglobaldepot; 1260 zinfo = &sldepot->zone[zi]; 1261 1262 if (zinfo->avail_base) { 1263 if (__isthreaded) 1264 _SPINLOCK(&sldepot->lock); 1265 slab = zinfo->avail_base; 1266 if (slab) { 1267 MASSERT(slab->slgd == NULL); 1268 slab->slgd = slgd; 1269 zinfo->avail_base = slab->next; 1270 --zinfo->avail_count; 1271 if (__isthreaded) 1272 _SPINUNLOCK(&sldepot->lock); 1273 return slab; 1274 } 1275 if (__isthreaded) 1276 _SPINUNLOCK(&sldepot->lock); 1277 } 1278 1279 /* 1280 * Nothing in the depot, allocate a new slab by locating or assigning 1281 * a region and then using the system virtual memory allocator. 1282 */ 1283 slab = NULL; 1284 1285 /* 1286 * Calculate the start of the data chunks relative to the start 1287 * of the slab. If chunk_size is a power of 2 we guarantee 1288 * power of 2 alignment. If it is not we guarantee alignment 1289 * to the chunk size. 1290 */ 1291 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1292 ispower2 = 1; 1293 chunk_offset = roundup2(sizeof(*slab), chunk_size); 1294 } else { 1295 ispower2 = 0; 1296 chunk_offset = sizeof(*slab) + chunking - 1; 1297 chunk_offset -= chunk_offset % chunking; 1298 } 1299 1300 /* 1301 * Calculate a reasonable number of chunks for the slab. 1302 * 1303 * Once initialized the MaxChunks[] array can only ever be 1304 * reinitialized to the same value. 1305 */ 1306 maxchunks = MaxChunks[zi]; 1307 if (maxchunks == 0) { 1308 /* 1309 * First calculate how many chunks would fit in 1/1024 1310 * available memory. This is around 2MB on a 32 bit 1311 * system and 128G on a 64-bit (48-bits available) system. 1312 */ 1313 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1314 (ssize_t)chunk_size; 1315 if (maxchunks <= 0) 1316 maxchunks = 1; 1317 1318 /* 1319 * A slab cannot handle more than MAXCHUNKS chunks, but 1320 * limit us to approximately MAXCHUNKS / 2 here because 1321 * we may have to expand maxchunks when we calculate the 1322 * actual power-of-2 slab. 1323 */ 1324 if (maxchunks > MAXCHUNKS / 2) 1325 maxchunks = MAXCHUNKS / 2; 1326 1327 /* 1328 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1329 * slabs will be created if the allocation does not fit. 1330 */ 1331 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1332 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1333 (ssize_t)chunk_size; 1334 if (tmpchunks <= 0) 1335 tmpchunks = 1; 1336 if (maxchunks > tmpchunks) 1337 maxchunks = tmpchunks; 1338 } 1339 1340 /* 1341 * If the slab calculates to greater than 2MB see if we 1342 * can cut it down to ~2MB. This controls VSZ but has 1343 * no effect on run-time size or performance. 1344 * 1345 * This is very important in case you core dump and also 1346 * important to reduce unnecessary region allocations. 1347 */ 1348 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1349 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1350 (ssize_t)chunk_size; 1351 if (tmpchunks < 1) 1352 tmpchunks = 1; 1353 if (maxchunks > tmpchunks) 1354 maxchunks = tmpchunks; 1355 } 1356 1357 /* 1358 * If the slab calculates to greater than 128K see if we 1359 * can cut it down to ~128K while still maintaining a 1360 * reasonably large number of chunks in each slab. This 1361 * controls VSZ but has no effect on run-time size or 1362 * performance. 1363 * 1364 * This is very important in case you core dump and also 1365 * important to reduce unnecessary region allocations. 1366 */ 1367 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1368 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1369 (ssize_t)chunk_size; 1370 if (tmpchunks < 32) 1371 tmpchunks = 32; 1372 if (maxchunks > tmpchunks) 1373 maxchunks = tmpchunks; 1374 } 1375 1376 MaxChunks[zi] = maxchunks; 1377 } 1378 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1379 1380 /* 1381 * Calculate the actual slab size. maxchunks will be recalculated 1382 * a little later. 1383 */ 1384 slab_desire = chunk_offset + chunk_size * maxchunks; 1385 slab_size = 8 * MAXCHUNKS; 1386 power = 3 + MAXCHUNKS_BITS; 1387 while (slab_size < slab_desire) { 1388 slab_size <<= 1; 1389 ++power; 1390 } 1391 1392 /* 1393 * Do a quick recalculation based on the actual slab size but not 1394 * yet dealing with whether the slab header is in-band or out-of-band. 1395 * The purpose here is to see if we can reasonably reduce slab_size 1396 * to a power of 4 to allow more chunk sizes to use the same slab 1397 * size. 1398 */ 1399 if ((power & 1) && slab_size > 32768) { 1400 maxchunks = (slab_size - chunk_offset) / chunk_size; 1401 if (maxchunks >= MAXCHUNKS / 8) { 1402 slab_size >>= 1; 1403 --power; 1404 } 1405 } 1406 if ((power & 2) && slab_size > 32768 * 4) { 1407 maxchunks = (slab_size - chunk_offset) / chunk_size; 1408 if (maxchunks >= MAXCHUNKS / 4) { 1409 slab_size >>= 2; 1410 power -= 2; 1411 } 1412 } 1413 /* 1414 * This case occurs when the slab_size is larger than 1/1024 available 1415 * UVM. 1416 */ 1417 nswath = slab_size / NREGIONS_SIZE; 1418 if (nswath > NREGIONS) 1419 return (NULL); 1420 1421 1422 /* 1423 * Try to allocate from our current best region for this zi 1424 */ 1425 region_mask = ~(slab_size - 1); 1426 ri = slgd->zone[zi].best_region; 1427 if (Regions[ri].mask == region_mask) { 1428 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1429 goto found; 1430 } 1431 1432 /* 1433 * Try to find an existing region to allocate from. The normal 1434 * case will be for allocations that are less than 1/1024 available 1435 * UVM, which fit into a single Regions[] entry. 1436 */ 1437 while (slab_size <= NREGIONS_SIZE) { 1438 rx = -1; 1439 for (ri = 0; ri < NREGIONS; ++ri) { 1440 if (rx < 0 && Regions[ri].mask == 0) 1441 rx = ri; 1442 if (Regions[ri].mask == region_mask) { 1443 slab = _vmem_alloc(ri, slab_size); 1444 if (slab) { 1445 slgd->zone[zi].best_region = ri; 1446 goto found; 1447 } 1448 } 1449 } 1450 1451 if (rx < 0) 1452 return(NULL); 1453 1454 /* 1455 * This can fail, retry either way 1456 */ 1457 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1458 NULL, 1459 (void *)region_mask); 1460 } 1461 1462 for (;;) { 1463 rx = -1; 1464 for (ri = 0; ri < NREGIONS; ri += nswath) { 1465 if (Regions[ri].mask == region_mask) { 1466 slab = _vmem_alloc(ri, slab_size); 1467 if (slab) { 1468 slgd->zone[zi].best_region = ri; 1469 goto found; 1470 } 1471 } 1472 if (rx < 0) { 1473 for (j = nswath - 1; j >= 0; --j) { 1474 if (Regions[ri+j].mask != 0) 1475 break; 1476 } 1477 if (j < 0) 1478 rx = ri; 1479 } 1480 } 1481 1482 /* 1483 * We found a candidate, try to allocate it backwards so 1484 * another thread racing a slaballoc() does not see the 1485 * mask in the base index position until we are done. 1486 * 1487 * We can safely zero-out any partial allocations because 1488 * the mask is only accessed from the base index. Any other 1489 * threads racing us will fail prior to us clearing the mask. 1490 */ 1491 if (rx < 0) 1492 return(NULL); 1493 for (j = nswath - 1; j >= 0; --j) { 1494 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1495 NULL, (void *)region_mask)) { 1496 while (++j < nswath) 1497 Regions[rx+j].mask = 0; 1498 break; 1499 } 1500 } 1501 /* retry */ 1502 } 1503 1504 /* 1505 * Fill in the new slab in region ri. If the slab_size completely 1506 * fills one or more region slots we move the slab structure out of 1507 * band which should optimize the chunking (particularly for a power 1508 * of 2). 1509 */ 1510 found: 1511 region = &Regions[ri]; 1512 MASSERT(region->slab == NULL); 1513 if (slab_size >= NREGIONS_SIZE) { 1514 save = slab; 1515 slab = memalloc(sizeof(*slab), 0); 1516 bzero(slab, sizeof(*slab)); 1517 slab->chunks = save; 1518 for (j = 0; j < nswath; ++j) 1519 region[j].slab = slab; 1520 chunk_offset = 0; 1521 } else { 1522 bzero(slab, sizeof(*slab)); 1523 slab->chunks = (char *)slab + chunk_offset; 1524 } 1525 1526 /* 1527 * Calculate the start of the chunks memory and recalculate the 1528 * actual number of chunks the slab can hold. 1529 */ 1530 maxchunks = (slab_size - chunk_offset) / chunk_size; 1531 if (maxchunks > MAXCHUNKS) 1532 maxchunks = MAXCHUNKS; 1533 1534 /* 1535 * And fill in the rest 1536 */ 1537 slab->magic = ZALLOC_SLAB_MAGIC; 1538 slab->navail = maxchunks; 1539 slab->nmax = maxchunks; 1540 slab->slab_size = slab_size; 1541 slab->chunk_size = chunk_size; 1542 slab->zone_index = zi; 1543 slab->slgd = slgd; 1544 slab->state = UNKNOWN; 1545 slab->region = region; 1546 1547 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1548 if (ri + LONG_BITS <= maxchunks) 1549 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1550 else 1551 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1552 (1LU << (maxchunks - ri)) - 1; 1553 } 1554 return (slab); 1555 } 1556 1557 /* 1558 * Free a slab. 1559 */ 1560 static void 1561 slabfree(slab_t slab) 1562 { 1563 int nswath; 1564 int j; 1565 1566 if (slab->region->slab == slab) { 1567 /* 1568 * Out-of-band slab. 1569 */ 1570 nswath = slab->slab_size / NREGIONS_SIZE; 1571 for (j = 0; j < nswath; ++j) 1572 slab->region[j].slab = NULL; 1573 slab->magic = 0; 1574 _vmem_free(slab->chunks, slab->slab_size); 1575 memfree(slab, 0); 1576 } else { 1577 /* 1578 * In-band slab. 1579 */ 1580 slab->magic = 0; 1581 _vmem_free(slab, slab->slab_size); 1582 } 1583 } 1584 1585 /* 1586 * Terminate a slab's use in the current thread. The slab may still have 1587 * outstanding allocations and thus not be deallocatable. 1588 */ 1589 static void 1590 slabterm(slglobaldata_t slgd, slab_t slab) 1591 { 1592 slglobaldata_t sldepot; 1593 struct zoneinfo *zinfo; 1594 int zi = slab->zone_index; 1595 1596 slab->slgd = NULL; 1597 --slgd->nslabs; 1598 sldepot = &slglobaldepot; 1599 zinfo = &sldepot->zone[zi]; 1600 1601 /* 1602 * Move the slab to the avail list or the empty list. 1603 */ 1604 if (__isthreaded) 1605 _SPINLOCK(&sldepot->lock); 1606 if (slab->navail == slab->nmax) { 1607 slab->state = EMPTY; 1608 slab->next = zinfo->empty_base; 1609 zinfo->empty_base = slab; 1610 ++zinfo->empty_count; 1611 } else { 1612 slab->state = AVAIL; 1613 slab->next = zinfo->avail_base; 1614 zinfo->avail_base = slab; 1615 ++zinfo->avail_count; 1616 } 1617 1618 /* 1619 * Clean extra slabs out of the empty list 1620 */ 1621 while (zinfo->empty_count > opt_cache) { 1622 slab = zinfo->empty_base; 1623 zinfo->empty_base = slab->next; 1624 --zinfo->empty_count; 1625 slab->state = UNKNOWN; 1626 if (__isthreaded) 1627 _SPINUNLOCK(&sldepot->lock); 1628 slabfree(slab); 1629 if (__isthreaded) 1630 _SPINLOCK(&sldepot->lock); 1631 } 1632 if (__isthreaded) 1633 _SPINUNLOCK(&sldepot->lock); 1634 } 1635 1636 /* 1637 * _vmem_alloc() 1638 * 1639 * Directly map memory in PAGE_SIZE'd chunks with the specified 1640 * alignment. 1641 * 1642 * Alignment must be a multiple of PAGE_SIZE. 1643 * 1644 * Size must be >= alignment. 1645 */ 1646 static void * 1647 _vmem_alloc(int ri, size_t slab_size) 1648 { 1649 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1650 char *eaddr; 1651 char *addr; 1652 char *save; 1653 uintptr_t excess; 1654 1655 if (slab_size < NREGIONS_SIZE) 1656 eaddr = baddr + NREGIONS_SIZE; 1657 else 1658 eaddr = baddr + slab_size; 1659 1660 /* 1661 * This usually just works but might not. 1662 */ 1663 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1664 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1665 if (addr == MAP_FAILED) { 1666 return (NULL); 1667 } 1668 if (addr < baddr || addr + slab_size > eaddr) { 1669 munmap(addr, slab_size); 1670 return (NULL); 1671 } 1672 1673 /* 1674 * Check alignment. The misaligned offset is also the excess 1675 * amount. If misaligned unmap the excess so we have a chance of 1676 * mapping at the next alignment point and recursively try again. 1677 * 1678 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1679 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1680 * xxxxxxxxx final excess calculation 1681 * ^ returned address 1682 */ 1683 excess = (uintptr_t)addr & (slab_size - 1); 1684 while (excess) { 1685 excess = slab_size - excess; 1686 save = addr; 1687 1688 munmap(save + excess, slab_size - excess); 1689 addr = _vmem_alloc(ri, slab_size); 1690 munmap(save, excess); 1691 if (addr == NULL) 1692 return (NULL); 1693 if (addr < baddr || addr + slab_size > eaddr) { 1694 munmap(addr, slab_size); 1695 return (NULL); 1696 } 1697 excess = (uintptr_t)addr & (slab_size - 1); 1698 } 1699 return (addr); 1700 } 1701 1702 /* 1703 * _vmem_free() 1704 * 1705 * Free a chunk of memory allocated with _vmem_alloc() 1706 */ 1707 static void 1708 _vmem_free(void *ptr, size_t size) 1709 { 1710 munmap(ptr, size); 1711 } 1712 1713 /* 1714 * Panic on fatal conditions 1715 */ 1716 static void 1717 _mpanic(const char *ctl, ...) 1718 { 1719 va_list va; 1720 1721 if (malloc_panic == 0) { 1722 malloc_panic = 1; 1723 va_start(va, ctl); 1724 vfprintf(stderr, ctl, va); 1725 fprintf(stderr, "\n"); 1726 fflush(stderr); 1727 va_end(va); 1728 } 1729 abort(); 1730 } 1731 1732 __weak_reference(__aligned_alloc, aligned_alloc); 1733 __weak_reference(__malloc, malloc); 1734 __weak_reference(__calloc, calloc); 1735 __weak_reference(__posix_memalign, posix_memalign); 1736 __weak_reference(__realloc, realloc); 1737 __weak_reference(__free, free); 1738