1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator as a drop-in replacement 38 * for the libc malloc(). The slab algorithm has been adjusted to support 39 * dynamic sizing of slabs which effectively allows slabs to be used for 40 * allocations of any size. Because of this we neither have a small-block 41 * allocator or a big-block allocator and the code paths are simplified. 42 * 43 * To support dynamic slab sizing available user virtual memory is broken 44 * down into ~1024 regions. Each region has fixed slab size whos value is 45 * set when the region is opened up for use. The free() path simply applies 46 * a mask based on the region to the pointer to acquire the base of the 47 * governing slab structure. 48 * 49 * Regions[NREGIONS] (1024) 50 * 51 * Slab management and locking is done on a per-zone basis. 52 * 53 * Alloc Size Chunking Number of zones 54 * 0-127 8 16 55 * 128-255 16 8 56 * 256-511 32 8 57 * 512-1023 64 8 58 * 1024-2047 128 8 59 * 2048-4095 256 8 60 * 4096-8191 512 8 61 * 8192-16383 1024 8 62 * 16384-32767 2048 8 63 * 32768-65535 4096 8 64 * ... continues forever ... 4 zones 65 * 66 * For a 2^63 memory space each doubling >= 64K is broken down into 67 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 68 * 69 * API FEATURES AND SIDE EFFECTS 70 * 71 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 72 * Above that power-of-2 sized allocations are page-aligned. Non 73 * power-of-2 sized allocations are aligned the same as the chunk 74 * size for their zone. 75 * + ability to allocate arbitrarily large chunks of memory 76 * + realloc will reuse the passed pointer if possible, within the 77 * limitations of the zone chunking. 78 * 79 * On top of the slab allocator we also implement a 16-entry-per-thread 80 * magazine cache for allocations <= NOMSLABSIZE. 81 * 82 * FUTURE FEATURES 83 * 84 * + [better] garbage collection 85 * + better initial sizing. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. Upper case letters enabled 91 * or increase the feature, lower case disables or decreases the feature. 92 * 93 * U Enable UTRACE for all operations, observable with ktrace. 94 * Diasbled by default. 95 * 96 * Z Zero out allocations, otherwise allocations (except for 97 * calloc) will contain garbage. 98 * Disabled by default. 99 * 100 * H Pass a hint with madvise() about unused pages. 101 * Disabled by default. 102 * Not currently implemented. 103 * 104 * F Disable local per-thread caching. 105 * Disabled by default. 106 * 107 * C Increase (decrease) how much excess cache to retain. 108 * Set to 4 by default. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 112 113 #ifndef STANDALONE_DEBUG 114 #include "libc_private.h" 115 #endif 116 117 #include <sys/param.h> 118 #include <sys/types.h> 119 #include <sys/mman.h> 120 #include <sys/queue.h> 121 #include <sys/uio.h> 122 #include <sys/ktrace.h> 123 #include <stdio.h> 124 #include <stdint.h> 125 #include <stdlib.h> 126 #include <stdarg.h> 127 #include <stddef.h> 128 #include <unistd.h> 129 #include <string.h> 130 #include <fcntl.h> 131 #include <errno.h> 132 #include <pthread.h> 133 #include <limits.h> 134 135 #include <machine/atomic.h> 136 #include <machine/cpufunc.h> 137 138 #ifdef STANDALONE_DEBUG 139 void _nmalloc_thr_init(void); 140 #else 141 #include "spinlock.h" 142 #include "un-namespace.h" 143 #endif 144 145 #ifndef MAP_SIZEALIGN 146 #define MAP_SIZEALIGN 0 147 #endif 148 149 #if SSIZE_MAX == 0x7FFFFFFF 150 #define ADDRBITS 32 151 #define UVM_BITS 32 /* worst case */ 152 #else 153 #define ADDRBITS 64 154 #define UVM_BITS 48 /* worst case XXX */ 155 #endif 156 157 #if LONG_MAX == 0x7FFFFFFF 158 #define LONG_BITS 32 159 #define LONG_BITS_SHIFT 5 160 #else 161 #define LONG_BITS 64 162 #define LONG_BITS_SHIFT 6 163 #endif 164 165 #define LOCKEDPTR ((void *)(intptr_t)-1) 166 167 /* 168 * Regions[] 169 */ 170 #define NREGIONS_BITS 10 171 #define NREGIONS (1 << NREGIONS_BITS) 172 #define NREGIONS_MASK (NREGIONS - 1) 173 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 174 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 175 176 typedef struct region *region_t; 177 typedef struct slglobaldata *slglobaldata_t; 178 typedef struct slab *slab_t; 179 180 struct region { 181 uintptr_t mask; 182 slab_t slab; /* conditional out of band slab */ 183 }; 184 185 static struct region Regions[NREGIONS]; 186 187 /* 188 * Number of chunking zones available 189 */ 190 #define CHUNKFACTOR 8 191 #if ADDRBITS == 32 192 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 193 #else 194 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 195 #endif 196 197 static int MaxChunks[NZONES]; 198 199 #define NDEPOTS 8 /* must be power of 2 */ 200 201 /* 202 * Maximum number of chunks per slab, governed by the allocation bitmap in 203 * each slab. The maximum is reduced for large chunk sizes. 204 */ 205 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 206 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 207 #define LITSLABSIZE (32 * 1024) 208 #define NOMSLABSIZE (2 * 1024 * 1024) 209 #define BIGSLABSIZE (128 * 1024 * 1024) 210 211 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 212 213 TAILQ_HEAD(slab_list, slab); 214 215 /* 216 * A slab structure 217 */ 218 struct slab { 219 struct slab *next; /* slabs with available space */ 220 TAILQ_ENTRY(slab) entry; 221 int32_t magic; /* magic number for sanity check */ 222 u_int navail; /* number of free elements available */ 223 u_int nmax; 224 u_int free_bit; /* free hint bitno */ 225 u_int free_index; /* free hint index */ 226 u_long bitmap[LONG_BITS]; /* free chunks */ 227 size_t slab_size; /* size of entire slab */ 228 size_t chunk_size; /* chunk size for validation */ 229 int zone_index; 230 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 231 int flags; 232 region_t region; /* related region */ 233 char *chunks; /* chunk base */ 234 slglobaldata_t slgd; /* localized to thread else NULL */ 235 }; 236 237 /* 238 * per-thread data + global depot 239 * 240 * NOTE: The magazine shortcut is only used for per-thread data. 241 */ 242 #define NMAGSHORTCUT 16 243 244 struct slglobaldata { 245 spinlock_t lock; /* only used by slglobaldepot */ 246 struct zoneinfo { 247 slab_t avail_base; 248 slab_t empty_base; 249 int best_region; 250 int mag_index; 251 int avail_count; 252 int empty_count; 253 void *mag_shortcut[NMAGSHORTCUT]; 254 } zone[NZONES]; 255 struct slab_list full_zones; /* via entry */ 256 int masked; 257 int biggest_index; 258 size_t nslabs; 259 }; 260 261 #define SLAB_ZEROD 0x0001 262 263 /* 264 * Misc constants. Note that allocations that are exact multiples of 265 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 266 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 267 */ 268 #define MIN_CHUNK_SIZE 8 /* in bytes */ 269 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 270 271 #define SAFLAG_ZERO 0x00000001 272 273 /* 274 * The WEIRD_ADDR is used as known text to copy into free objects to 275 * try to create deterministic failure cases if the data is accessed after 276 * free. 277 * 278 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 279 * not be larger then 64. 280 */ 281 #ifdef INVARIANTS 282 #define WEIRD_ADDR 0xdeadc0de 283 #endif 284 285 /* 286 * Thread control 287 */ 288 289 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 290 _mpanic("assertion: %s in %s", \ 291 #exp, __func__); \ 292 } while (0) 293 294 /* 295 * With this attribute set, do not require a function call for accessing 296 * this variable when the code is compiled -fPIC. 297 * 298 * Must be empty for libc_rtld (similar to __thread) 299 */ 300 #if defined(__LIBC_RTLD) 301 #define TLS_ATTRIBUTE 302 #else 303 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 304 #endif 305 306 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 307 static pthread_key_t thread_malloc_key; 308 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 309 static struct slglobaldata slglobaldepot; 310 311 static int opt_madvise = 0; 312 static int opt_free = 0; 313 static int opt_cache = 4; 314 static int opt_utrace = 0; 315 static int g_malloc_flags = 0; 316 static int malloc_panic; 317 318 #ifdef INVARIANTS 319 static const int32_t weirdary[16] = { 320 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 321 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 322 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 323 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 324 }; 325 #endif 326 327 static void *memalloc(size_t size, int flags); 328 static void *memrealloc(void *ptr, size_t size); 329 static void memfree(void *ptr, int); 330 static int memalign(void **memptr, size_t alignment, size_t size); 331 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 332 static void slabfree(slab_t slab); 333 static void slabterm(slglobaldata_t slgd, slab_t slab); 334 static void *_vmem_alloc(int ri, size_t slab_size); 335 static void _vmem_free(void *ptr, size_t slab_size); 336 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 337 #ifndef STANDALONE_DEBUG 338 static void malloc_init(void) __constructor(101); 339 #else 340 static void malloc_init(void) __constructor(101); 341 #endif 342 343 344 struct nmalloc_utrace { 345 void *p; 346 size_t s; 347 void *r; 348 }; 349 350 #define UTRACE(a, b, c) \ 351 if (opt_utrace) { \ 352 struct nmalloc_utrace ut = { \ 353 .p = (a), \ 354 .s = (b), \ 355 .r = (c) \ 356 }; \ 357 utrace(&ut, sizeof(ut)); \ 358 } 359 360 #ifdef INVARIANTS 361 /* 362 * If enabled any memory allocated without M_ZERO is initialized to -1. 363 */ 364 static int use_malloc_pattern; 365 #endif 366 367 static void 368 malloc_init(void) 369 { 370 const char *p = NULL; 371 372 TAILQ_INIT(&slglobal.full_zones); 373 374 Regions[0].mask = -1; /* disallow activity in lowest region */ 375 376 if (issetugid() == 0) 377 p = getenv("MALLOC_OPTIONS"); 378 379 for (; p != NULL && *p != '\0'; p++) { 380 switch(*p) { 381 case 'u': 382 opt_utrace = 0; 383 break; 384 case 'U': 385 opt_utrace = 1; 386 break; 387 case 'h': 388 opt_madvise = 0; 389 break; 390 case 'H': 391 opt_madvise = 1; 392 break; 393 case 'c': 394 if (opt_cache > 0) 395 --opt_cache; 396 break; 397 case 'C': 398 ++opt_cache; 399 break; 400 case 'f': 401 opt_free = 0; 402 break; 403 case 'F': 404 opt_free = 1; 405 break; 406 case 'z': 407 g_malloc_flags = 0; 408 break; 409 case 'Z': 410 g_malloc_flags = SAFLAG_ZERO; 411 break; 412 default: 413 break; 414 } 415 } 416 417 UTRACE((void *) -1, 0, NULL); 418 } 419 420 /* 421 * We have to install a handler for nmalloc thread teardowns when 422 * the thread is created. We cannot delay this because destructors in 423 * sophisticated userland programs can call malloc() for the first time 424 * during their thread exit. 425 * 426 * This routine is called directly from pthreads. 427 */ 428 static void _nmalloc_thr_init_once(void); 429 static void _nmalloc_thr_destructor(void *thrp); 430 431 void 432 _nmalloc_thr_init(void) 433 { 434 static int did_init; 435 436 TAILQ_INIT(&slglobal.full_zones); 437 438 if (slglobal.masked) 439 return; 440 441 slglobal.masked = 1; 442 if (did_init == 0) { 443 did_init = 1; 444 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 445 } 446 pthread_setspecific(thread_malloc_key, &slglobal); 447 slglobal.masked = 0; 448 } 449 450 void 451 _nmalloc_thr_prepfork(void) 452 { 453 if (__isthreaded) 454 _SPINLOCK(&slglobaldepot.lock); 455 } 456 457 void 458 _nmalloc_thr_parentfork(void) 459 { 460 if (__isthreaded) 461 _SPINUNLOCK(&slglobaldepot.lock); 462 } 463 464 void 465 _nmalloc_thr_childfork(void) 466 { 467 if (__isthreaded) 468 _SPINUNLOCK(&slglobaldepot.lock); 469 } 470 471 /* 472 * Called just once 473 */ 474 static void 475 _nmalloc_thr_init_once(void) 476 { 477 int error; 478 479 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 480 if (error) 481 abort(); 482 } 483 484 /* 485 * Called for each thread undergoing exit 486 * 487 * Move all of the thread's slabs into a depot. 488 */ 489 static void 490 _nmalloc_thr_destructor(void *thrp) 491 { 492 slglobaldata_t slgd = thrp; 493 struct zoneinfo *zinfo; 494 slab_t slab; 495 void *ptr; 496 int i; 497 int j; 498 499 slgd->masked = 1; 500 501 for (i = 0; i <= slgd->biggest_index; i++) { 502 zinfo = &slgd->zone[i]; 503 504 while ((j = zinfo->mag_index) > 0) { 505 --j; 506 ptr = zinfo->mag_shortcut[j]; 507 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 508 zinfo->mag_index = j; 509 memfree(ptr, 0); 510 } 511 512 while ((slab = zinfo->empty_base) != NULL) { 513 zinfo->empty_base = slab->next; 514 --zinfo->empty_count; 515 slabterm(slgd, slab); 516 } 517 518 while ((slab = zinfo->avail_base) != NULL) { 519 zinfo->avail_base = slab->next; 520 --zinfo->avail_count; 521 slabterm(slgd, slab); 522 } 523 524 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 525 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 526 slabterm(slgd, slab); 527 } 528 } 529 } 530 531 /* 532 * Calculate the zone index for the allocation request size and set the 533 * allocation request size to that particular zone's chunk size. 534 * 535 * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform 536 * with malloc requirements for intel/amd. 537 */ 538 static __inline int 539 zoneindex(size_t *bytes, size_t *chunking) 540 { 541 size_t n = (size_t)*bytes; 542 size_t x; 543 size_t c; 544 int i; 545 546 if (n < 128) { 547 if (n < 16) { 548 *bytes = n = (n + 7) & ~7; 549 *chunking = 8; 550 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 551 } else { 552 *bytes = n = (n + 15) & ~15; 553 *chunking = 16; 554 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 555 } 556 } 557 if (n < 4096) { 558 x = 256; 559 c = x / (CHUNKFACTOR * 2); 560 i = 16; 561 } else { 562 x = 8192; 563 c = x / (CHUNKFACTOR * 2); 564 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 565 } 566 while (n >= x) { 567 x <<= 1; 568 c <<= 1; 569 i += CHUNKFACTOR; 570 if (x == 0) 571 _mpanic("slaballoc: byte value too high"); 572 } 573 *bytes = n = roundup2(n, c); 574 *chunking = c; 575 return (i + n / c - CHUNKFACTOR); 576 #if 0 577 *bytes = n = (n + c - 1) & ~(c - 1); 578 *chunking = c; 579 return (n / c + i); 580 581 if (n < 256) { 582 *bytes = n = (n + 15) & ~15; 583 *chunking = 16; 584 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 585 } 586 if (n < 8192) { 587 if (n < 512) { 588 *bytes = n = (n + 31) & ~31; 589 *chunking = 32; 590 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 591 } 592 if (n < 1024) { 593 *bytes = n = (n + 63) & ~63; 594 *chunking = 64; 595 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 596 } 597 if (n < 2048) { 598 *bytes = n = (n + 127) & ~127; 599 *chunking = 128; 600 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 601 } 602 if (n < 4096) { 603 *bytes = n = (n + 255) & ~255; 604 *chunking = 256; 605 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 606 } 607 *bytes = n = (n + 511) & ~511; 608 *chunking = 512; 609 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 610 } 611 if (n < 16384) { 612 *bytes = n = (n + 1023) & ~1023; 613 *chunking = 1024; 614 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 615 } 616 if (n < 32768) { /* 16384-32767 */ 617 *bytes = n = (n + 2047) & ~2047; 618 *chunking = 2048; 619 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 620 } 621 if (n < 65536) { 622 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 623 *chunking = 4096; 624 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 625 } 626 627 x = 131072; 628 c = 8192; 629 i = CHUNKINGLO*10 - 1; 630 631 while (n >= x) { 632 x <<= 1; 633 c <<= 1; 634 i += CHUNKINGHI; 635 if (x == 0) 636 _mpanic("slaballoc: byte value too high"); 637 } 638 *bytes = n = (n + c - 1) & ~(c - 1); 639 *chunking = c; 640 return (n / c + i); 641 #endif 642 } 643 644 /* 645 * malloc() - call internal slab allocator 646 */ 647 void * 648 __malloc(size_t size) 649 { 650 void *ptr; 651 652 ptr = memalloc(size, 0); 653 if (ptr == NULL) 654 errno = ENOMEM; 655 else 656 UTRACE(0, size, ptr); 657 return(ptr); 658 } 659 660 /* 661 * calloc() - call internal slab allocator 662 */ 663 void * 664 __calloc(size_t number, size_t size) 665 { 666 void *ptr; 667 668 ptr = memalloc(number * size, SAFLAG_ZERO); 669 if (ptr == NULL) 670 errno = ENOMEM; 671 else 672 UTRACE(0, number * size, ptr); 673 return(ptr); 674 } 675 676 /* 677 * realloc() (SLAB ALLOCATOR) 678 * 679 * We do not attempt to optimize this routine beyond reusing the same 680 * pointer if the new size fits within the chunking of the old pointer's 681 * zone. 682 */ 683 void * 684 __realloc(void *ptr, size_t size) 685 { 686 void *ret; 687 688 if (ptr == NULL) 689 ret = memalloc(size, 0); 690 else 691 ret = memrealloc(ptr, size); 692 if (ret == NULL) 693 errno = ENOMEM; 694 else 695 UTRACE(ptr, size, ret); 696 return(ret); 697 } 698 699 /* 700 * aligned_alloc() 701 * 702 * Allocate (size) bytes with a alignment of (alignment). 703 */ 704 void * 705 __aligned_alloc(size_t alignment, size_t size) 706 { 707 void *ptr; 708 int rc; 709 710 ptr = NULL; 711 rc = memalign(&ptr, alignment, size); 712 if (rc) 713 errno = rc; 714 715 return (ptr); 716 } 717 718 /* 719 * posix_memalign() 720 * 721 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 722 * is a power of 2 >= sizeof(void *). 723 */ 724 int 725 __posix_memalign(void **memptr, size_t alignment, size_t size) 726 { 727 int rc; 728 729 /* 730 * OpenGroup spec issue 6 check 731 */ 732 if (alignment < sizeof(void *)) { 733 *memptr = NULL; 734 return(EINVAL); 735 } 736 737 rc = memalign(memptr, alignment, size); 738 739 return (rc); 740 } 741 742 /* 743 * The slab allocator will allocate on power-of-2 boundaries up to at least 744 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 745 * matching the requirements. 746 */ 747 static int 748 memalign(void **memptr, size_t alignment, size_t size) 749 { 750 751 if (alignment < 1) { 752 *memptr = NULL; 753 return(EINVAL); 754 } 755 756 /* 757 * OpenGroup spec issue 6 check 758 */ 759 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 760 *memptr = NULL; 761 return(EINVAL); 762 } 763 764 /* 765 * XXX for now just find the nearest power of 2 >= size and also 766 * >= alignment and allocate that. 767 */ 768 while (alignment < size) { 769 alignment <<= 1; 770 if (alignment == 0) 771 _mpanic("posix_memalign: byte value too high"); 772 } 773 *memptr = memalloc(alignment, 0); 774 return(*memptr ? 0 : ENOMEM); 775 } 776 777 /* 778 * free() (SLAB ALLOCATOR) - do the obvious 779 */ 780 void 781 __free(void *ptr) 782 { 783 if (ptr) { 784 UTRACE(ptr, 0, 0); 785 memfree(ptr, 0); 786 } 787 } 788 789 /* 790 * memalloc() (SLAB ALLOCATOR) 791 * 792 * Allocate memory via the slab allocator. 793 */ 794 static void * 795 memalloc(size_t size, int flags) 796 { 797 slglobaldata_t slgd; 798 struct zoneinfo *zinfo; 799 slab_t slab; 800 size_t chunking; 801 int bmi; 802 int bno; 803 u_long *bmp; 804 int zi; 805 #ifdef INVARIANTS 806 int i; 807 #endif 808 int j; 809 char *obj; 810 811 /* 812 * If 0 bytes is requested we have to return a unique pointer, allocate 813 * at least one byte. 814 */ 815 if (size == 0) 816 size = 1; 817 818 /* Capture global flags */ 819 flags |= g_malloc_flags; 820 821 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 822 zi = zoneindex(&size, &chunking); 823 MASSERT(zi < NZONES); 824 if (size == 0) 825 return(NULL); 826 827 /* 828 * Try magazine shortcut first 829 */ 830 slgd = &slglobal; 831 zinfo = &slgd->zone[zi]; 832 833 if ((j = zinfo->mag_index) != 0) { 834 zinfo->mag_index = --j; 835 obj = zinfo->mag_shortcut[j]; 836 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 837 if (flags & SAFLAG_ZERO) 838 bzero(obj, size); 839 return obj; 840 } 841 842 /* 843 * Locate a slab with available space. If no slabs are available 844 * back-off to the empty list and if we still come up dry allocate 845 * a new slab (which will try the depot first). 846 */ 847 retry: 848 if ((slab = zinfo->avail_base) == NULL) { 849 if ((slab = zinfo->empty_base) == NULL) { 850 /* 851 * Still dry 852 */ 853 slab = slaballoc(zi, chunking, size); 854 if (slab == NULL) 855 return(NULL); 856 slab->next = zinfo->avail_base; 857 zinfo->avail_base = slab; 858 ++zinfo->avail_count; 859 slab->state = AVAIL; 860 if (slgd->biggest_index < zi) 861 slgd->biggest_index = zi; 862 ++slgd->nslabs; 863 } else { 864 /* 865 * Pulled from empty list 866 */ 867 zinfo->empty_base = slab->next; 868 slab->next = zinfo->avail_base; 869 zinfo->avail_base = slab; 870 ++zinfo->avail_count; 871 slab->state = AVAIL; 872 --zinfo->empty_count; 873 } 874 } 875 876 /* 877 * Allocate a chunk out of the slab. HOT PATH 878 * 879 * Only the thread owning the slab can allocate out of it. 880 * 881 * NOTE: The last bit in the bitmap is always marked allocated so 882 * we cannot overflow here. 883 */ 884 bno = slab->free_bit; 885 bmi = slab->free_index; 886 bmp = &slab->bitmap[bmi]; 887 if (*bmp & (1LU << bno)) { 888 atomic_clear_long(bmp, 1LU << bno); 889 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 890 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 891 atomic_add_int(&slab->navail, -1); 892 if (flags & SAFLAG_ZERO) 893 bzero(obj, size); 894 return (obj); 895 } 896 897 /* 898 * Allocate a chunk out of a slab. COLD PATH 899 */ 900 if (slab->navail == 0) { 901 zinfo->avail_base = slab->next; 902 --zinfo->avail_count; 903 slab->state = FULL; 904 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 905 goto retry; 906 } 907 908 while (bmi < LONG_BITS) { 909 bmp = &slab->bitmap[bmi]; 910 if (*bmp) { 911 bno = bsflong(*bmp); 912 atomic_clear_long(bmp, 1LU << bno); 913 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 914 size; 915 slab->free_index = bmi; 916 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 917 atomic_add_int(&slab->navail, -1); 918 if (flags & SAFLAG_ZERO) 919 bzero(obj, size); 920 return (obj); 921 } 922 ++bmi; 923 } 924 bmi = 0; 925 while (bmi < LONG_BITS) { 926 bmp = &slab->bitmap[bmi]; 927 if (*bmp) { 928 bno = bsflong(*bmp); 929 atomic_clear_long(bmp, 1LU << bno); 930 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 931 size; 932 slab->free_index = bmi; 933 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 934 atomic_add_int(&slab->navail, -1); 935 if (flags & SAFLAG_ZERO) 936 bzero(obj, size); 937 return (obj); 938 } 939 ++bmi; 940 } 941 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 942 /* not reached */ 943 return NULL; 944 } 945 946 /* 947 * Reallocate memory within the chunk 948 */ 949 static void * 950 memrealloc(void *ptr, size_t nsize) 951 { 952 region_t region; 953 slab_t slab; 954 size_t osize; 955 char *obj; 956 int flags = 0; 957 958 /* 959 * If 0 bytes is requested we have to return a unique pointer, allocate 960 * at least one byte. 961 */ 962 if (nsize == 0) 963 nsize = 1; 964 965 /* Capture global flags */ 966 flags |= g_malloc_flags; 967 968 /* 969 * Locate the zone by looking up the dynamic slab size mask based 970 * on the memory region the allocation resides in. 971 */ 972 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 973 if ((slab = region->slab) == NULL) 974 slab = (void *)((uintptr_t)ptr & region->mask); 975 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 976 osize = slab->chunk_size; 977 if (nsize <= osize) { 978 if (osize < 32 || nsize >= osize / 2) { 979 obj = ptr; 980 if ((flags & SAFLAG_ZERO) && nsize < osize) 981 bzero(obj + nsize, osize - nsize); 982 return(obj); 983 } 984 } 985 986 /* 987 * Otherwise resize the object 988 */ 989 obj = memalloc(nsize, 0); 990 if (obj) { 991 if (nsize > osize) 992 nsize = osize; 993 bcopy(ptr, obj, nsize); 994 memfree(ptr, 0); 995 } 996 return (obj); 997 } 998 999 /* 1000 * free (SLAB ALLOCATOR) 1001 * 1002 * Free a memory block previously allocated by malloc. 1003 * 1004 * MPSAFE 1005 */ 1006 static void 1007 memfree(void *ptr, int flags) 1008 { 1009 region_t region; 1010 slglobaldata_t slgd; 1011 slab_t slab; 1012 slab_t stmp; 1013 slab_t *slabp; 1014 int bmi; 1015 int bno; 1016 int j; 1017 u_long *bmp; 1018 1019 /* 1020 * Locate the zone by looking up the dynamic slab size mask based 1021 * on the memory region the allocation resides in. 1022 * 1023 * WARNING! The slab may be owned by another thread! 1024 */ 1025 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 1026 if ((slab = region->slab) == NULL) 1027 slab = (void *)((uintptr_t)ptr & region->mask); 1028 MASSERT(slab != NULL); 1029 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 1030 1031 #ifdef INVARIANTS 1032 /* 1033 * Put weird data into the memory to detect modifications after 1034 * freeing, illegal pointer use after freeing (we should fault on 1035 * the odd address), and so forth. 1036 */ 1037 if (slab->chunk_size < sizeof(weirdary)) 1038 bcopy(weirdary, ptr, slab->chunk_size); 1039 else 1040 bcopy(weirdary, ptr, sizeof(weirdary)); 1041 #endif 1042 slgd = &slglobal; 1043 1044 /* 1045 * Use mag_shortcut[] when possible 1046 */ 1047 if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) { 1048 struct zoneinfo *zinfo; 1049 1050 zinfo = &slgd->zone[slab->zone_index]; 1051 j = zinfo->mag_index; 1052 if (j < NMAGSHORTCUT) { 1053 zinfo->mag_shortcut[j] = ptr; 1054 zinfo->mag_index = j + 1; 1055 return; 1056 } 1057 } 1058 1059 /* 1060 * Free to slab and increment navail. We can delay incrementing 1061 * navail to prevent the slab from being destroyed out from under 1062 * us while we do other optimizations. 1063 */ 1064 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 1065 bmi = bno >> LONG_BITS_SHIFT; 1066 bno &= (LONG_BITS - 1); 1067 bmp = &slab->bitmap[bmi]; 1068 1069 MASSERT(bmi >= 0 && bmi < slab->nmax); 1070 MASSERT((*bmp & (1LU << bno)) == 0); 1071 atomic_set_long(bmp, 1LU << bno); 1072 1073 if (slab->slgd == slgd) { 1074 /* 1075 * We can only do the following if we own the slab. Note 1076 * that navail can be incremented by any thread even if 1077 * we own the slab. 1078 */ 1079 struct zoneinfo *zinfo; 1080 1081 atomic_add_int(&slab->navail, 1); 1082 if (slab->free_index > bmi) { 1083 slab->free_index = bmi; 1084 slab->free_bit = bno; 1085 } else if (slab->free_index == bmi && 1086 slab->free_bit > bno) { 1087 slab->free_bit = bno; 1088 } 1089 zinfo = &slgd->zone[slab->zone_index]; 1090 1091 /* 1092 * Freeing an object from a full slab makes it less than 1093 * full. The slab must be moved to the available list. 1094 * 1095 * If the available list has too many slabs, release some 1096 * to the depot. 1097 */ 1098 if (slab->state == FULL) { 1099 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 1100 slab->state = AVAIL; 1101 stmp = zinfo->avail_base; 1102 slab->next = stmp; 1103 zinfo->avail_base = slab; 1104 ++zinfo->avail_count; 1105 while (zinfo->avail_count > opt_cache) { 1106 slab = zinfo->avail_base; 1107 zinfo->avail_base = slab->next; 1108 --zinfo->avail_count; 1109 slabterm(slgd, slab); 1110 } 1111 goto done; 1112 } 1113 1114 /* 1115 * If the slab becomes completely empty dispose of it in 1116 * some manner. By default each thread caches up to 4 1117 * empty slabs. Only small slabs are cached. 1118 */ 1119 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1120 /* 1121 * Remove slab from available queue 1122 */ 1123 slabp = &zinfo->avail_base; 1124 while ((stmp = *slabp) != slab) 1125 slabp = &stmp->next; 1126 *slabp = slab->next; 1127 --zinfo->avail_count; 1128 1129 if (opt_free || opt_cache == 0) { 1130 /* 1131 * If local caching is disabled cache the 1132 * slab in the depot (or free it). 1133 */ 1134 slabterm(slgd, slab); 1135 } else if (slab->slab_size > BIGSLABSIZE) { 1136 /* 1137 * We do not try to retain large slabs 1138 * in per-thread caches. 1139 */ 1140 slabterm(slgd, slab); 1141 } else if (zinfo->empty_count > opt_cache) { 1142 /* 1143 * We have too many slabs cached, but 1144 * instead of freeing this one free 1145 * an empty slab that's been idle longer. 1146 * 1147 * (empty_count does not change) 1148 */ 1149 stmp = zinfo->empty_base; 1150 slab->state = EMPTY; 1151 slab->next = stmp->next; 1152 zinfo->empty_base = slab; 1153 slabterm(slgd, stmp); 1154 } else { 1155 /* 1156 * Cache the empty slab in our thread local 1157 * empty list. 1158 */ 1159 ++zinfo->empty_count; 1160 slab->state = EMPTY; 1161 slab->next = zinfo->empty_base; 1162 zinfo->empty_base = slab; 1163 } 1164 } 1165 } else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1166 slglobaldata_t sldepot; 1167 1168 /* 1169 * If freeing to a slab owned by the global depot, and 1170 * the slab becomes completely EMPTY, try to move it to 1171 * the correct list. 1172 */ 1173 sldepot = &slglobaldepot; 1174 if (__isthreaded) 1175 _SPINLOCK(&sldepot->lock); 1176 if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1177 struct zoneinfo *zinfo; 1178 1179 /* 1180 * Move the slab to the empty list 1181 */ 1182 MASSERT(slab->state == AVAIL); 1183 atomic_add_int(&slab->navail, 1); 1184 zinfo = &sldepot->zone[slab->zone_index]; 1185 slabp = &zinfo->avail_base; 1186 while (slab != *slabp) 1187 slabp = &(*slabp)->next; 1188 *slabp = slab->next; 1189 --zinfo->avail_count; 1190 1191 /* 1192 * Clean out excessive empty entries from the 1193 * depot. 1194 */ 1195 slab->state = EMPTY; 1196 slab->next = zinfo->empty_base; 1197 zinfo->empty_base = slab; 1198 ++zinfo->empty_count; 1199 while (zinfo->empty_count > opt_cache) { 1200 slab = zinfo->empty_base; 1201 zinfo->empty_base = slab->next; 1202 --zinfo->empty_count; 1203 slab->state = UNKNOWN; 1204 if (__isthreaded) 1205 _SPINUNLOCK(&sldepot->lock); 1206 slabfree(slab); 1207 if (__isthreaded) 1208 _SPINLOCK(&sldepot->lock); 1209 } 1210 } else { 1211 atomic_add_int(&slab->navail, 1); 1212 } 1213 if (__isthreaded) 1214 _SPINUNLOCK(&sldepot->lock); 1215 } else { 1216 /* 1217 * We can't act on the slab other than by adjusting navail 1218 * (and the bitmap which we did in the common code at the 1219 * top). 1220 */ 1221 atomic_add_int(&slab->navail, 1); 1222 } 1223 done: 1224 ; 1225 } 1226 1227 /* 1228 * Allocate a new slab holding objects of size chunk_size. 1229 */ 1230 static slab_t 1231 slaballoc(int zi, size_t chunking, size_t chunk_size) 1232 { 1233 slglobaldata_t slgd; 1234 slglobaldata_t sldepot; 1235 struct zoneinfo *zinfo; 1236 region_t region; 1237 void *save; 1238 slab_t slab; 1239 size_t slab_desire; 1240 size_t slab_size; 1241 size_t region_mask; 1242 uintptr_t chunk_offset; 1243 ssize_t maxchunks; 1244 ssize_t tmpchunks; 1245 int ispower2; 1246 int power; 1247 int ri; 1248 int rx; 1249 int nswath; 1250 int j; 1251 1252 /* 1253 * First look in the depot. Any given zone in the depot may be 1254 * locked by being set to -1. We have to do this instead of simply 1255 * removing the entire chain because removing the entire chain can 1256 * cause racing threads to allocate local slabs for large objects, 1257 * resulting in a large VSZ. 1258 */ 1259 slgd = &slglobal; 1260 sldepot = &slglobaldepot; 1261 zinfo = &sldepot->zone[zi]; 1262 1263 if (zinfo->avail_base) { 1264 if (__isthreaded) 1265 _SPINLOCK(&sldepot->lock); 1266 slab = zinfo->avail_base; 1267 if (slab) { 1268 MASSERT(slab->slgd == NULL); 1269 slab->slgd = slgd; 1270 zinfo->avail_base = slab->next; 1271 --zinfo->avail_count; 1272 if (__isthreaded) 1273 _SPINUNLOCK(&sldepot->lock); 1274 return slab; 1275 } 1276 if (__isthreaded) 1277 _SPINUNLOCK(&sldepot->lock); 1278 } 1279 1280 /* 1281 * Nothing in the depot, allocate a new slab by locating or assigning 1282 * a region and then using the system virtual memory allocator. 1283 */ 1284 slab = NULL; 1285 1286 /* 1287 * Calculate the start of the data chunks relative to the start 1288 * of the slab. If chunk_size is a power of 2 we guarantee 1289 * power of 2 alignment. If it is not we guarantee alignment 1290 * to the chunk size. 1291 */ 1292 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1293 ispower2 = 1; 1294 chunk_offset = roundup2(sizeof(*slab), chunk_size); 1295 } else { 1296 ispower2 = 0; 1297 chunk_offset = sizeof(*slab) + chunking - 1; 1298 chunk_offset -= chunk_offset % chunking; 1299 } 1300 1301 /* 1302 * Calculate a reasonable number of chunks for the slab. 1303 * 1304 * Once initialized the MaxChunks[] array can only ever be 1305 * reinitialized to the same value. 1306 */ 1307 maxchunks = MaxChunks[zi]; 1308 if (maxchunks == 0) { 1309 /* 1310 * First calculate how many chunks would fit in 1/1024 1311 * available memory. This is around 2MB on a 32 bit 1312 * system and 128G on a 64-bit (48-bits available) system. 1313 */ 1314 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1315 (ssize_t)chunk_size; 1316 if (maxchunks <= 0) 1317 maxchunks = 1; 1318 1319 /* 1320 * A slab cannot handle more than MAXCHUNKS chunks, but 1321 * limit us to approximately MAXCHUNKS / 2 here because 1322 * we may have to expand maxchunks when we calculate the 1323 * actual power-of-2 slab. 1324 */ 1325 if (maxchunks > MAXCHUNKS / 2) 1326 maxchunks = MAXCHUNKS / 2; 1327 1328 /* 1329 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1330 * slabs will be created if the allocation does not fit. 1331 */ 1332 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1333 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1334 (ssize_t)chunk_size; 1335 if (tmpchunks <= 0) 1336 tmpchunks = 1; 1337 if (maxchunks > tmpchunks) 1338 maxchunks = tmpchunks; 1339 } 1340 1341 /* 1342 * If the slab calculates to greater than 2MB see if we 1343 * can cut it down to ~2MB. This controls VSZ but has 1344 * no effect on run-time size or performance. 1345 * 1346 * This is very important in case you core dump and also 1347 * important to reduce unnecessary region allocations. 1348 */ 1349 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1350 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1351 (ssize_t)chunk_size; 1352 if (tmpchunks < 1) 1353 tmpchunks = 1; 1354 if (maxchunks > tmpchunks) 1355 maxchunks = tmpchunks; 1356 } 1357 1358 /* 1359 * If the slab calculates to greater than 128K see if we 1360 * can cut it down to ~128K while still maintaining a 1361 * reasonably large number of chunks in each slab. This 1362 * controls VSZ but has no effect on run-time size or 1363 * performance. 1364 * 1365 * This is very important in case you core dump and also 1366 * important to reduce unnecessary region allocations. 1367 */ 1368 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1369 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1370 (ssize_t)chunk_size; 1371 if (tmpchunks < 32) 1372 tmpchunks = 32; 1373 if (maxchunks > tmpchunks) 1374 maxchunks = tmpchunks; 1375 } 1376 1377 MaxChunks[zi] = maxchunks; 1378 } 1379 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1380 1381 /* 1382 * Calculate the actual slab size. maxchunks will be recalculated 1383 * a little later. 1384 */ 1385 slab_desire = chunk_offset + chunk_size * maxchunks; 1386 slab_size = 8 * MAXCHUNKS; 1387 power = 3 + MAXCHUNKS_BITS; 1388 while (slab_size < slab_desire) { 1389 slab_size <<= 1; 1390 ++power; 1391 } 1392 1393 /* 1394 * Do a quick recalculation based on the actual slab size but not 1395 * yet dealing with whether the slab header is in-band or out-of-band. 1396 * The purpose here is to see if we can reasonably reduce slab_size 1397 * to a power of 4 to allow more chunk sizes to use the same slab 1398 * size. 1399 */ 1400 if ((power & 1) && slab_size > 32768) { 1401 maxchunks = (slab_size - chunk_offset) / chunk_size; 1402 if (maxchunks >= MAXCHUNKS / 8) { 1403 slab_size >>= 1; 1404 --power; 1405 } 1406 } 1407 if ((power & 2) && slab_size > 32768 * 4) { 1408 maxchunks = (slab_size - chunk_offset) / chunk_size; 1409 if (maxchunks >= MAXCHUNKS / 4) { 1410 slab_size >>= 2; 1411 power -= 2; 1412 } 1413 } 1414 /* 1415 * This case occurs when the slab_size is larger than 1/1024 available 1416 * UVM. 1417 */ 1418 nswath = slab_size / NREGIONS_SIZE; 1419 if (nswath > NREGIONS) 1420 return (NULL); 1421 1422 1423 /* 1424 * Try to allocate from our current best region for this zi 1425 */ 1426 region_mask = ~(slab_size - 1); 1427 ri = slgd->zone[zi].best_region; 1428 if (Regions[ri].mask == region_mask) { 1429 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1430 goto found; 1431 } 1432 1433 /* 1434 * Try to find an existing region to allocate from. The normal 1435 * case will be for allocations that are less than 1/1024 available 1436 * UVM, which fit into a single Regions[] entry. 1437 */ 1438 while (slab_size <= NREGIONS_SIZE) { 1439 rx = -1; 1440 for (ri = 0; ri < NREGIONS; ++ri) { 1441 if (rx < 0 && Regions[ri].mask == 0) 1442 rx = ri; 1443 if (Regions[ri].mask == region_mask) { 1444 slab = _vmem_alloc(ri, slab_size); 1445 if (slab) { 1446 slgd->zone[zi].best_region = ri; 1447 goto found; 1448 } 1449 } 1450 } 1451 1452 if (rx < 0) 1453 return(NULL); 1454 1455 /* 1456 * This can fail, retry either way 1457 */ 1458 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1459 NULL, 1460 (void *)region_mask); 1461 } 1462 1463 for (;;) { 1464 rx = -1; 1465 for (ri = 0; ri < NREGIONS; ri += nswath) { 1466 if (Regions[ri].mask == region_mask) { 1467 slab = _vmem_alloc(ri, slab_size); 1468 if (slab) { 1469 slgd->zone[zi].best_region = ri; 1470 goto found; 1471 } 1472 } 1473 if (rx < 0) { 1474 for (j = nswath - 1; j >= 0; --j) { 1475 if (Regions[ri+j].mask != 0) 1476 break; 1477 } 1478 if (j < 0) 1479 rx = ri; 1480 } 1481 } 1482 1483 /* 1484 * We found a candidate, try to allocate it backwards so 1485 * another thread racing a slaballoc() does not see the 1486 * mask in the base index position until we are done. 1487 * 1488 * We can safely zero-out any partial allocations because 1489 * the mask is only accessed from the base index. Any other 1490 * threads racing us will fail prior to us clearing the mask. 1491 */ 1492 if (rx < 0) 1493 return(NULL); 1494 for (j = nswath - 1; j >= 0; --j) { 1495 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1496 NULL, (void *)region_mask)) { 1497 while (++j < nswath) 1498 Regions[rx+j].mask = 0; 1499 break; 1500 } 1501 } 1502 /* retry */ 1503 } 1504 1505 /* 1506 * Fill in the new slab in region ri. If the slab_size completely 1507 * fills one or more region slots we move the slab structure out of 1508 * band which should optimize the chunking (particularly for a power 1509 * of 2). 1510 */ 1511 found: 1512 region = &Regions[ri]; 1513 MASSERT(region->slab == NULL); 1514 if (slab_size >= NREGIONS_SIZE) { 1515 save = slab; 1516 slab = memalloc(sizeof(*slab), 0); 1517 bzero(slab, sizeof(*slab)); 1518 slab->chunks = save; 1519 for (j = 0; j < nswath; ++j) 1520 region[j].slab = slab; 1521 chunk_offset = 0; 1522 } else { 1523 bzero(slab, sizeof(*slab)); 1524 slab->chunks = (char *)slab + chunk_offset; 1525 } 1526 1527 /* 1528 * Calculate the start of the chunks memory and recalculate the 1529 * actual number of chunks the slab can hold. 1530 */ 1531 maxchunks = (slab_size - chunk_offset) / chunk_size; 1532 if (maxchunks > MAXCHUNKS) 1533 maxchunks = MAXCHUNKS; 1534 1535 /* 1536 * And fill in the rest 1537 */ 1538 slab->magic = ZALLOC_SLAB_MAGIC; 1539 slab->navail = maxchunks; 1540 slab->nmax = maxchunks; 1541 slab->slab_size = slab_size; 1542 slab->chunk_size = chunk_size; 1543 slab->zone_index = zi; 1544 slab->slgd = slgd; 1545 slab->state = UNKNOWN; 1546 slab->region = region; 1547 1548 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1549 if (ri + LONG_BITS <= maxchunks) 1550 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1551 else 1552 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1553 (1LU << (maxchunks - ri)) - 1; 1554 } 1555 return (slab); 1556 } 1557 1558 /* 1559 * Free a slab. 1560 */ 1561 static void 1562 slabfree(slab_t slab) 1563 { 1564 int nswath; 1565 int j; 1566 1567 if (slab->region->slab == slab) { 1568 /* 1569 * Out-of-band slab. 1570 */ 1571 nswath = slab->slab_size / NREGIONS_SIZE; 1572 for (j = 0; j < nswath; ++j) 1573 slab->region[j].slab = NULL; 1574 slab->magic = 0; 1575 _vmem_free(slab->chunks, slab->slab_size); 1576 memfree(slab, 0); 1577 } else { 1578 /* 1579 * In-band slab. 1580 */ 1581 slab->magic = 0; 1582 _vmem_free(slab, slab->slab_size); 1583 } 1584 } 1585 1586 /* 1587 * Terminate a slab's use in the current thread. The slab may still have 1588 * outstanding allocations and thus not be deallocatable. 1589 */ 1590 static void 1591 slabterm(slglobaldata_t slgd, slab_t slab) 1592 { 1593 slglobaldata_t sldepot; 1594 struct zoneinfo *zinfo; 1595 int zi = slab->zone_index; 1596 1597 slab->slgd = NULL; 1598 --slgd->nslabs; 1599 sldepot = &slglobaldepot; 1600 zinfo = &sldepot->zone[zi]; 1601 1602 /* 1603 * Move the slab to the avail list or the empty list. 1604 */ 1605 if (__isthreaded) 1606 _SPINLOCK(&sldepot->lock); 1607 if (slab->navail == slab->nmax) { 1608 slab->state = EMPTY; 1609 slab->next = zinfo->empty_base; 1610 zinfo->empty_base = slab; 1611 ++zinfo->empty_count; 1612 } else { 1613 slab->state = AVAIL; 1614 slab->next = zinfo->avail_base; 1615 zinfo->avail_base = slab; 1616 ++zinfo->avail_count; 1617 } 1618 1619 /* 1620 * Clean extra slabs out of the empty list 1621 */ 1622 while (zinfo->empty_count > opt_cache) { 1623 slab = zinfo->empty_base; 1624 zinfo->empty_base = slab->next; 1625 --zinfo->empty_count; 1626 slab->state = UNKNOWN; 1627 if (__isthreaded) 1628 _SPINUNLOCK(&sldepot->lock); 1629 slabfree(slab); 1630 if (__isthreaded) 1631 _SPINLOCK(&sldepot->lock); 1632 } 1633 if (__isthreaded) 1634 _SPINUNLOCK(&sldepot->lock); 1635 } 1636 1637 /* 1638 * _vmem_alloc() 1639 * 1640 * Directly map memory in PAGE_SIZE'd chunks with the specified 1641 * alignment. 1642 * 1643 * Alignment must be a multiple of PAGE_SIZE. 1644 * 1645 * Size must be >= alignment. 1646 */ 1647 static void * 1648 _vmem_alloc(int ri, size_t slab_size) 1649 { 1650 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1651 char *eaddr; 1652 char *addr; 1653 char *save; 1654 uintptr_t excess; 1655 1656 if (slab_size < NREGIONS_SIZE) 1657 eaddr = baddr + NREGIONS_SIZE; 1658 else 1659 eaddr = baddr + slab_size; 1660 1661 /* 1662 * This usually just works but might not. 1663 */ 1664 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1665 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1666 if (addr == MAP_FAILED) { 1667 return (NULL); 1668 } 1669 if (addr < baddr || addr + slab_size > eaddr) { 1670 munmap(addr, slab_size); 1671 return (NULL); 1672 } 1673 1674 /* 1675 * Check alignment. The misaligned offset is also the excess 1676 * amount. If misaligned unmap the excess so we have a chance of 1677 * mapping at the next alignment point and recursively try again. 1678 * 1679 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1680 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1681 * xxxxxxxxx final excess calculation 1682 * ^ returned address 1683 */ 1684 excess = (uintptr_t)addr & (slab_size - 1); 1685 while (excess) { 1686 excess = slab_size - excess; 1687 save = addr; 1688 1689 munmap(save + excess, slab_size - excess); 1690 addr = _vmem_alloc(ri, slab_size); 1691 munmap(save, excess); 1692 if (addr == NULL) 1693 return (NULL); 1694 if (addr < baddr || addr + slab_size > eaddr) { 1695 munmap(addr, slab_size); 1696 return (NULL); 1697 } 1698 excess = (uintptr_t)addr & (slab_size - 1); 1699 } 1700 return (addr); 1701 } 1702 1703 /* 1704 * _vmem_free() 1705 * 1706 * Free a chunk of memory allocated with _vmem_alloc() 1707 */ 1708 static void 1709 _vmem_free(void *ptr, size_t size) 1710 { 1711 munmap(ptr, size); 1712 } 1713 1714 /* 1715 * Panic on fatal conditions 1716 */ 1717 static void 1718 _mpanic(const char *ctl, ...) 1719 { 1720 va_list va; 1721 1722 if (malloc_panic == 0) { 1723 malloc_panic = 1; 1724 va_start(va, ctl); 1725 vfprintf(stderr, ctl, va); 1726 fprintf(stderr, "\n"); 1727 fflush(stderr); 1728 va_end(va); 1729 } 1730 abort(); 1731 } 1732 1733 __weak_reference(__aligned_alloc, aligned_alloc); 1734 __weak_reference(__malloc, malloc); 1735 __weak_reference(__calloc, calloc); 1736 __weak_reference(__posix_memalign, posix_memalign); 1737 __weak_reference(__realloc, realloc); 1738 __weak_reference(__free, free); 1739