1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator as a drop-in replacement 38 * for the libc malloc(). The slab algorithm has been adjusted to support 39 * dynamic sizing of slabs which effectively allows slabs to be used for 40 * allocations of any size. Because of this we neither have a small-block 41 * allocator or a big-block allocator and the code paths are simplified. 42 * 43 * To support dynamic slab sizing available user virtual memory is broken 44 * down into ~1024 regions. Each region has fixed slab size whos value is 45 * set when the region is opened up for use. The free() path simply applies 46 * a mask based on the region to the pointer to acquire the base of the 47 * governing slab structure. 48 * 49 * Regions[NREGIONS] (1024) 50 * 51 * Slab management and locking is done on a per-zone basis. 52 * 53 * Alloc Size Chunking Number of zones 54 * 0-127 8 16 55 * 128-255 16 8 56 * 256-511 32 8 57 * 512-1023 64 8 58 * 1024-2047 128 8 59 * 2048-4095 256 8 60 * 4096-8191 512 8 61 * 8192-16383 1024 8 62 * 16384-32767 2048 8 63 * 32768-65535 4096 8 64 * ... continues forever ... 4 zones 65 * 66 * For a 2^63 memory space each doubling >= 64K is broken down into 67 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 68 * 69 * API FEATURES AND SIDE EFFECTS 70 * 71 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 72 * Above that power-of-2 sized allocations are page-aligned. Non 73 * power-of-2 sized allocations are aligned the same as the chunk 74 * size for their zone. 75 * + malloc(0) returns a special non-NULL value 76 * + ability to allocate arbitrarily large chunks of memory 77 * + realloc will reuse the passed pointer if possible, within the 78 * limitations of the zone chunking. 79 * 80 * On top of th slab allocator we also implement a 16-entry-per-thread 81 * magazine cache for allocations <= NOMSLABSIZE. 82 * 83 * FUTURE FEATURES 84 * 85 * + [better] garbage collection 86 * + better initial sizing. 87 * 88 * TUNING 89 * 90 * The value of the environment variable MALLOC_OPTIONS is a character string 91 * containing various flags to tune nmalloc. Upper case letters enabled 92 * or increase the feature, lower case disables or decreases the feature. 93 * 94 * U Enable UTRACE for all operations, observable with ktrace. 95 * Diasbled by default. 96 * 97 * Z Zero out allocations, otherwise allocations (except for 98 * calloc) will contain garbage. 99 * Disabled by default. 100 * 101 * H Pass a hint with madvise() about unused pages. 102 * Disabled by default. 103 * Not currently implemented. 104 * 105 * F Disable local per-thread caching. 106 * Disabled by default. 107 * 108 * C Increase (decrease) how much excess cache to retain. 109 * Set to 4 by default. 110 */ 111 112 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 113 114 #ifndef STANDALONE_DEBUG 115 #include "libc_private.h" 116 #endif 117 118 #include <sys/param.h> 119 #include <sys/types.h> 120 #include <sys/mman.h> 121 #include <sys/queue.h> 122 #include <sys/uio.h> 123 #include <sys/ktrace.h> 124 #include <stdio.h> 125 #include <stdint.h> 126 #include <stdlib.h> 127 #include <stdarg.h> 128 #include <stddef.h> 129 #include <unistd.h> 130 #include <string.h> 131 #include <fcntl.h> 132 #include <errno.h> 133 #include <pthread.h> 134 #include <limits.h> 135 136 #include <machine/atomic.h> 137 #include <machine/cpufunc.h> 138 139 #ifdef STANDALONE_DEBUG 140 void _nmalloc_thr_init(void); 141 #else 142 #include "spinlock.h" 143 #include "un-namespace.h" 144 #endif 145 146 #ifndef MAP_SIZEALIGN 147 #define MAP_SIZEALIGN 0 148 #endif 149 150 #if SSIZE_MAX == 0x7FFFFFFF 151 #define ADDRBITS 32 152 #define UVM_BITS 32 /* worst case */ 153 #else 154 #define ADDRBITS 64 155 #define UVM_BITS 48 /* worst case XXX */ 156 #endif 157 158 #if LONG_MAX == 0x7FFFFFFF 159 #define LONG_BITS 32 160 #define LONG_BITS_SHIFT 5 161 #else 162 #define LONG_BITS 64 163 #define LONG_BITS_SHIFT 6 164 #endif 165 166 #define LOCKEDPTR ((void *)(intptr_t)-1) 167 168 /* 169 * Regions[] 170 */ 171 #define NREGIONS_BITS 10 172 #define NREGIONS (1 << NREGIONS_BITS) 173 #define NREGIONS_MASK (NREGIONS - 1) 174 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 175 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 176 177 typedef struct region *region_t; 178 typedef struct slglobaldata *slglobaldata_t; 179 typedef struct slab *slab_t; 180 181 struct region { 182 uintptr_t mask; 183 slab_t slab; /* conditional out of band slab */ 184 }; 185 186 static struct region Regions[NREGIONS]; 187 188 /* 189 * Number of chunking zones available 190 */ 191 #define CHUNKFACTOR 8 192 #if ADDRBITS == 32 193 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 194 #else 195 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 196 #endif 197 198 static int MaxChunks[NZONES]; 199 200 #define NDEPOTS 8 /* must be power of 2 */ 201 202 /* 203 * Maximum number of chunks per slab, governed by the allocation bitmap in 204 * each slab. The maximum is reduced for large chunk sizes. 205 */ 206 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 207 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 208 #define LITSLABSIZE (32 * 1024) 209 #define NOMSLABSIZE (2 * 1024 * 1024) 210 #define BIGSLABSIZE (128 * 1024 * 1024) 211 212 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 213 214 TAILQ_HEAD(slab_list, slab); 215 216 /* 217 * A slab structure 218 */ 219 struct slab { 220 struct slab *next; /* slabs with available space */ 221 TAILQ_ENTRY(slab) entry; 222 int32_t magic; /* magic number for sanity check */ 223 u_int navail; /* number of free elements available */ 224 u_int nmax; 225 u_int free_bit; /* free hint bitno */ 226 u_int free_index; /* free hint index */ 227 u_long bitmap[LONG_BITS]; /* free chunks */ 228 size_t slab_size; /* size of entire slab */ 229 size_t chunk_size; /* chunk size for validation */ 230 int zone_index; 231 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 232 int flags; 233 region_t region; /* related region */ 234 char *chunks; /* chunk base */ 235 slglobaldata_t slgd; /* localized to thread else NULL */ 236 }; 237 238 /* 239 * per-thread data + global depot 240 * 241 * NOTE: The magazine shortcut is only used for per-thread data. 242 */ 243 #define NMAGSHORTCUT 16 244 245 struct slglobaldata { 246 spinlock_t lock; /* only used by slglobaldepot */ 247 struct zoneinfo { 248 slab_t avail_base; 249 slab_t empty_base; 250 int best_region; 251 int mag_index; 252 int avail_count; 253 int empty_count; 254 void *mag_shortcut[NMAGSHORTCUT]; 255 } zone[NZONES]; 256 struct slab_list full_zones; /* via entry */ 257 int masked; 258 int biggest_index; 259 size_t nslabs; 260 }; 261 262 #define SLAB_ZEROD 0x0001 263 264 /* 265 * Misc constants. Note that allocations that are exact multiples of 266 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 267 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 268 */ 269 #define MIN_CHUNK_SIZE 8 /* in bytes */ 270 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 271 272 #define SAFLAG_ZERO 0x00000001 273 274 /* 275 * The WEIRD_ADDR is used as known text to copy into free objects to 276 * try to create deterministic failure cases if the data is accessed after 277 * free. 278 * 279 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 280 * not be larger then 64. 281 */ 282 #define WEIRD_ADDR 0xdeadc0de 283 #define MAX_COPY sizeof(weirdary) 284 285 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 286 287 /* 288 * Thread control 289 */ 290 291 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 292 _mpanic("assertion: %s in %s", \ 293 #exp, __func__); \ 294 } while (0) 295 296 /* 297 * With this attribute set, do not require a function call for accessing 298 * this variable when the code is compiled -fPIC. 299 * 300 * Must be empty for libc_rtld (similar to __thread) 301 */ 302 #if defined(__LIBC_RTLD) 303 #define TLS_ATTRIBUTE 304 #else 305 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 306 #endif 307 308 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 309 static pthread_key_t thread_malloc_key; 310 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 311 static struct slglobaldata slglobaldepot; 312 313 static int opt_madvise = 0; 314 static int opt_free = 0; 315 static int opt_cache = 4; 316 static int opt_utrace = 0; 317 static int g_malloc_flags = 0; 318 static int malloc_panic; 319 320 static const int32_t weirdary[16] = { 321 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 322 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 323 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 324 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 325 }; 326 327 static void *memalloc(size_t size, int flags); 328 static void *memrealloc(void *ptr, size_t size); 329 static void memfree(void *ptr, int); 330 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 331 static void slabfree(slab_t slab); 332 static void slabterm(slglobaldata_t slgd, slab_t slab); 333 static void *_vmem_alloc(int ri, size_t slab_size); 334 static void _vmem_free(void *ptr, size_t slab_size); 335 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 336 #ifndef STANDALONE_DEBUG 337 static void malloc_init(void) __constructor(101); 338 #else 339 static void malloc_init(void) __constructor(101); 340 #endif 341 342 343 struct nmalloc_utrace { 344 void *p; 345 size_t s; 346 void *r; 347 }; 348 349 #define UTRACE(a, b, c) \ 350 if (opt_utrace) { \ 351 struct nmalloc_utrace ut = { \ 352 .p = (a), \ 353 .s = (b), \ 354 .r = (c) \ 355 }; \ 356 utrace(&ut, sizeof(ut)); \ 357 } 358 359 #ifdef INVARIANTS 360 /* 361 * If enabled any memory allocated without M_ZERO is initialized to -1. 362 */ 363 static int use_malloc_pattern; 364 #endif 365 366 static void 367 malloc_init(void) 368 { 369 const char *p = NULL; 370 371 TAILQ_INIT(&slglobal.full_zones); 372 373 Regions[0].mask = -1; /* disallow activity in lowest region */ 374 375 if (issetugid() == 0) 376 p = getenv("MALLOC_OPTIONS"); 377 378 for (; p != NULL && *p != '\0'; p++) { 379 switch(*p) { 380 case 'u': 381 opt_utrace = 0; 382 break; 383 case 'U': 384 opt_utrace = 1; 385 break; 386 case 'h': 387 opt_madvise = 0; 388 break; 389 case 'H': 390 opt_madvise = 1; 391 break; 392 case 'c': 393 if (opt_cache > 0) 394 --opt_cache; 395 break; 396 case 'C': 397 ++opt_cache; 398 break; 399 case 'f': 400 opt_free = 0; 401 break; 402 case 'F': 403 opt_free = 1; 404 break; 405 case 'z': 406 g_malloc_flags = 0; 407 break; 408 case 'Z': 409 g_malloc_flags = SAFLAG_ZERO; 410 break; 411 default: 412 break; 413 } 414 } 415 416 UTRACE((void *) -1, 0, NULL); 417 } 418 419 /* 420 * We have to install a handler for nmalloc thread teardowns when 421 * the thread is created. We cannot delay this because destructors in 422 * sophisticated userland programs can call malloc() for the first time 423 * during their thread exit. 424 * 425 * This routine is called directly from pthreads. 426 */ 427 static void _nmalloc_thr_init_once(void); 428 static void _nmalloc_thr_destructor(void *thrp); 429 430 void 431 _nmalloc_thr_init(void) 432 { 433 static int did_init; 434 435 TAILQ_INIT(&slglobal.full_zones); 436 437 if (slglobal.masked) 438 return; 439 440 slglobal.masked = 1; 441 if (did_init == 0) { 442 did_init = 1; 443 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 444 } 445 pthread_setspecific(thread_malloc_key, &slglobal); 446 slglobal.masked = 0; 447 } 448 449 void 450 _nmalloc_thr_prepfork(void) 451 { 452 if (__isthreaded) 453 _SPINLOCK(&slglobaldepot.lock); 454 } 455 456 void 457 _nmalloc_thr_parentfork(void) 458 { 459 if (__isthreaded) 460 _SPINUNLOCK(&slglobaldepot.lock); 461 } 462 463 void 464 _nmalloc_thr_childfork(void) 465 { 466 if (__isthreaded) 467 _SPINUNLOCK(&slglobaldepot.lock); 468 } 469 470 /* 471 * Called just once 472 */ 473 static void 474 _nmalloc_thr_init_once(void) 475 { 476 int error; 477 478 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 479 if (error) 480 abort(); 481 } 482 483 /* 484 * Called for each thread undergoing exit 485 * 486 * Move all of the thread's slabs into a depot. 487 */ 488 static void 489 _nmalloc_thr_destructor(void *thrp) 490 { 491 slglobaldata_t slgd = thrp; 492 struct zoneinfo *zinfo; 493 slab_t slab; 494 void *ptr; 495 int i; 496 int j; 497 498 slgd->masked = 1; 499 500 for (i = 0; i <= slgd->biggest_index; i++) { 501 zinfo = &slgd->zone[i]; 502 503 while ((j = zinfo->mag_index) > 0) { 504 --j; 505 ptr = zinfo->mag_shortcut[j]; 506 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 507 zinfo->mag_index = j; 508 memfree(ptr, 0); 509 } 510 511 while ((slab = zinfo->empty_base) != NULL) { 512 zinfo->empty_base = slab->next; 513 --zinfo->empty_count; 514 slabterm(slgd, slab); 515 } 516 517 while ((slab = zinfo->avail_base) != NULL) { 518 zinfo->avail_base = slab->next; 519 --zinfo->avail_count; 520 slabterm(slgd, slab); 521 } 522 523 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 524 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 525 slabterm(slgd, slab); 526 } 527 } 528 } 529 530 /* 531 * Calculate the zone index for the allocation request size and set the 532 * allocation request size to that particular zone's chunk size. 533 * 534 * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform 535 * with malloc requirements for intel/amd. 536 */ 537 static __inline int 538 zoneindex(size_t *bytes, size_t *chunking) 539 { 540 size_t n = (size_t)*bytes; 541 size_t x; 542 size_t c; 543 int i; 544 545 if (n < 128) { 546 if (n < 16) { 547 *bytes = n = (n + 7) & ~7; 548 *chunking = 8; 549 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 550 } else { 551 *bytes = n = (n + 15) & ~15; 552 *chunking = 16; 553 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 554 } 555 } 556 if (n < 4096) { 557 x = 256; 558 c = x / (CHUNKFACTOR * 2); 559 i = 16; 560 } else { 561 x = 8192; 562 c = x / (CHUNKFACTOR * 2); 563 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 564 } 565 while (n >= x) { 566 x <<= 1; 567 c <<= 1; 568 i += CHUNKFACTOR; 569 if (x == 0) 570 _mpanic("slaballoc: byte value too high"); 571 } 572 *bytes = n = roundup2(n, c); 573 *chunking = c; 574 return (i + n / c - CHUNKFACTOR); 575 #if 0 576 *bytes = n = (n + c - 1) & ~(c - 1); 577 *chunking = c; 578 return (n / c + i); 579 580 if (n < 256) { 581 *bytes = n = (n + 15) & ~15; 582 *chunking = 16; 583 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 584 } 585 if (n < 8192) { 586 if (n < 512) { 587 *bytes = n = (n + 31) & ~31; 588 *chunking = 32; 589 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 590 } 591 if (n < 1024) { 592 *bytes = n = (n + 63) & ~63; 593 *chunking = 64; 594 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 595 } 596 if (n < 2048) { 597 *bytes = n = (n + 127) & ~127; 598 *chunking = 128; 599 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 600 } 601 if (n < 4096) { 602 *bytes = n = (n + 255) & ~255; 603 *chunking = 256; 604 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 605 } 606 *bytes = n = (n + 511) & ~511; 607 *chunking = 512; 608 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 609 } 610 if (n < 16384) { 611 *bytes = n = (n + 1023) & ~1023; 612 *chunking = 1024; 613 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 614 } 615 if (n < 32768) { /* 16384-32767 */ 616 *bytes = n = (n + 2047) & ~2047; 617 *chunking = 2048; 618 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 619 } 620 if (n < 65536) { 621 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 622 *chunking = 4096; 623 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 624 } 625 626 x = 131072; 627 c = 8192; 628 i = CHUNKINGLO*10 - 1; 629 630 while (n >= x) { 631 x <<= 1; 632 c <<= 1; 633 i += CHUNKINGHI; 634 if (x == 0) 635 _mpanic("slaballoc: byte value too high"); 636 } 637 *bytes = n = (n + c - 1) & ~(c - 1); 638 *chunking = c; 639 return (n / c + i); 640 #endif 641 } 642 643 /* 644 * malloc() - call internal slab allocator 645 */ 646 void * 647 __malloc(size_t size) 648 { 649 void *ptr; 650 651 ptr = memalloc(size, 0); 652 if (ptr == NULL) 653 errno = ENOMEM; 654 else 655 UTRACE(0, size, ptr); 656 return(ptr); 657 } 658 659 /* 660 * calloc() - call internal slab allocator 661 */ 662 void * 663 __calloc(size_t number, size_t size) 664 { 665 void *ptr; 666 667 ptr = memalloc(number * size, SAFLAG_ZERO); 668 if (ptr == NULL) 669 errno = ENOMEM; 670 else 671 UTRACE(0, number * size, ptr); 672 return(ptr); 673 } 674 675 /* 676 * realloc() (SLAB ALLOCATOR) 677 * 678 * We do not attempt to optimize this routine beyond reusing the same 679 * pointer if the new size fits within the chunking of the old pointer's 680 * zone. 681 */ 682 void * 683 __realloc(void *ptr, size_t size) 684 { 685 void *ret; 686 687 if (ptr == NULL) 688 ret = memalloc(size, 0); 689 else 690 ret = memrealloc(ptr, size); 691 if (ret == NULL) 692 errno = ENOMEM; 693 else 694 UTRACE(ptr, size, ret); 695 return(ret); 696 } 697 698 /* 699 * posix_memalign() 700 * 701 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 702 * is a power of 2 >= sizeof(void *). 703 * 704 * The slab allocator will allocate on power-of-2 boundaries up to at least 705 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 706 * matching the requirements. 707 */ 708 int 709 __posix_memalign(void **memptr, size_t alignment, size_t size) 710 { 711 /* 712 * OpenGroup spec issue 6 checks 713 */ 714 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 715 *memptr = NULL; 716 return(EINVAL); 717 } 718 if (alignment < sizeof(void *)) { 719 *memptr = NULL; 720 return(EINVAL); 721 } 722 723 /* 724 * XXX for now just find the nearest power of 2 >= size and also 725 * >= alignment and allocate that. 726 */ 727 while (alignment < size) { 728 alignment <<= 1; 729 if (alignment == 0) 730 _mpanic("posix_memalign: byte value too high"); 731 } 732 *memptr = memalloc(alignment, 0); 733 return(*memptr ? 0 : ENOMEM); 734 } 735 736 /* 737 * free() (SLAB ALLOCATOR) - do the obvious 738 */ 739 void 740 __free(void *ptr) 741 { 742 if (ptr) { 743 UTRACE(ptr, 0, 0); 744 memfree(ptr, 0); 745 } 746 } 747 748 /* 749 * memalloc() (SLAB ALLOCATOR) 750 * 751 * Allocate memory via the slab allocator. 752 */ 753 static void * 754 memalloc(size_t size, int flags) 755 { 756 slglobaldata_t slgd; 757 struct zoneinfo *zinfo; 758 slab_t slab; 759 size_t chunking; 760 int bmi; 761 int bno; 762 u_long *bmp; 763 int zi; 764 #ifdef INVARIANTS 765 int i; 766 #endif 767 int j; 768 char *obj; 769 770 /* 771 * If 0 bytes is requested we have to return a unique pointer, allocate 772 * at least one byte. 773 */ 774 if (size == 0) 775 size = 1; 776 777 /* Capture global flags */ 778 flags |= g_malloc_flags; 779 780 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 781 zi = zoneindex(&size, &chunking); 782 MASSERT(zi < NZONES); 783 if (size == 0) 784 return(NULL); 785 786 /* 787 * Try magazine shortcut first 788 */ 789 slgd = &slglobal; 790 zinfo = &slgd->zone[zi]; 791 792 if ((j = zinfo->mag_index) != 0) { 793 zinfo->mag_index = --j; 794 obj = zinfo->mag_shortcut[j]; 795 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 796 if (flags & SAFLAG_ZERO) 797 bzero(obj, size); 798 return obj; 799 } 800 801 /* 802 * Locate a slab with available space. If no slabs are available 803 * back-off to the empty list and if we still come up dry allocate 804 * a new slab (which will try the depot first). 805 */ 806 retry: 807 if ((slab = zinfo->avail_base) == NULL) { 808 if ((slab = zinfo->empty_base) == NULL) { 809 /* 810 * Still dry 811 */ 812 slab = slaballoc(zi, chunking, size); 813 if (slab == NULL) 814 return(NULL); 815 slab->next = zinfo->avail_base; 816 zinfo->avail_base = slab; 817 ++zinfo->avail_count; 818 slab->state = AVAIL; 819 if (slgd->biggest_index < zi) 820 slgd->biggest_index = zi; 821 ++slgd->nslabs; 822 } else { 823 /* 824 * Pulled from empty list 825 */ 826 zinfo->empty_base = slab->next; 827 slab->next = zinfo->avail_base; 828 zinfo->avail_base = slab; 829 ++zinfo->avail_count; 830 slab->state = AVAIL; 831 --zinfo->empty_count; 832 } 833 } 834 835 /* 836 * Allocate a chunk out of the slab. HOT PATH 837 * 838 * Only the thread owning the slab can allocate out of it. 839 * 840 * NOTE: The last bit in the bitmap is always marked allocated so 841 * we cannot overflow here. 842 */ 843 bno = slab->free_bit; 844 bmi = slab->free_index; 845 bmp = &slab->bitmap[bmi]; 846 if (*bmp & (1LU << bno)) { 847 atomic_clear_long(bmp, 1LU << bno); 848 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 849 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 850 atomic_add_int(&slab->navail, -1); 851 if (flags & SAFLAG_ZERO) 852 bzero(obj, size); 853 return (obj); 854 } 855 856 /* 857 * Allocate a chunk out of a slab. COLD PATH 858 */ 859 if (slab->navail == 0) { 860 zinfo->avail_base = slab->next; 861 --zinfo->avail_count; 862 slab->state = FULL; 863 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 864 goto retry; 865 } 866 867 while (bmi < LONG_BITS) { 868 bmp = &slab->bitmap[bmi]; 869 if (*bmp) { 870 bno = bsflong(*bmp); 871 atomic_clear_long(bmp, 1LU << bno); 872 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 873 size; 874 slab->free_index = bmi; 875 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 876 atomic_add_int(&slab->navail, -1); 877 if (flags & SAFLAG_ZERO) 878 bzero(obj, size); 879 return (obj); 880 } 881 ++bmi; 882 } 883 bmi = 0; 884 while (bmi < LONG_BITS) { 885 bmp = &slab->bitmap[bmi]; 886 if (*bmp) { 887 bno = bsflong(*bmp); 888 atomic_clear_long(bmp, 1LU << bno); 889 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 890 size; 891 slab->free_index = bmi; 892 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 893 atomic_add_int(&slab->navail, -1); 894 if (flags & SAFLAG_ZERO) 895 bzero(obj, size); 896 return (obj); 897 } 898 ++bmi; 899 } 900 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 901 /* not reached */ 902 return NULL; 903 } 904 905 /* 906 * Reallocate memory within the chunk 907 */ 908 static void * 909 memrealloc(void *ptr, size_t nsize) 910 { 911 region_t region; 912 slab_t slab; 913 size_t osize; 914 char *obj; 915 int flags = 0; 916 917 /* 918 * If 0 bytes is requested we have to return a unique pointer, allocate 919 * at least one byte. 920 */ 921 if (nsize == 0) 922 nsize = 1; 923 924 /* Capture global flags */ 925 flags |= g_malloc_flags; 926 927 /* 928 * Locate the zone by looking up the dynamic slab size mask based 929 * on the memory region the allocation resides in. 930 */ 931 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 932 if ((slab = region->slab) == NULL) 933 slab = (void *)((uintptr_t)ptr & region->mask); 934 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 935 osize = slab->chunk_size; 936 if (nsize <= osize) { 937 if (osize < 32 || nsize >= osize / 2) { 938 obj = ptr; 939 if ((flags & SAFLAG_ZERO) && nsize < osize) 940 bzero(obj + nsize, osize - nsize); 941 return(obj); 942 } 943 } 944 945 /* 946 * Otherwise resize the object 947 */ 948 obj = memalloc(nsize, 0); 949 if (obj) { 950 if (nsize > osize) 951 nsize = osize; 952 bcopy(ptr, obj, nsize); 953 memfree(ptr, 0); 954 } 955 return (obj); 956 } 957 958 /* 959 * free (SLAB ALLOCATOR) 960 * 961 * Free a memory block previously allocated by malloc. 962 * 963 * MPSAFE 964 */ 965 static void 966 memfree(void *ptr, int flags) 967 { 968 region_t region; 969 slglobaldata_t slgd; 970 slab_t slab; 971 slab_t stmp; 972 slab_t *slabp; 973 int bmi; 974 int bno; 975 int j; 976 u_long *bmp; 977 978 /* 979 * Locate the zone by looking up the dynamic slab size mask based 980 * on the memory region the allocation resides in. 981 * 982 * WARNING! The slab may be owned by another thread! 983 */ 984 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 985 if ((slab = region->slab) == NULL) 986 slab = (void *)((uintptr_t)ptr & region->mask); 987 MASSERT(slab != NULL); 988 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 989 990 #ifdef INVARIANTS 991 /* 992 * Put weird data into the memory to detect modifications after 993 * freeing, illegal pointer use after freeing (we should fault on 994 * the odd address), and so forth. 995 */ 996 if (slab->chunk_size < sizeof(weirdary)) 997 bcopy(weirdary, ptr, slab->chunk_size); 998 else 999 bcopy(weirdary, ptr, sizeof(weirdary)); 1000 #endif 1001 slgd = &slglobal; 1002 1003 /* 1004 * Use mag_shortcut[] when possible 1005 */ 1006 if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) { 1007 struct zoneinfo *zinfo; 1008 1009 zinfo = &slgd->zone[slab->zone_index]; 1010 j = zinfo->mag_index; 1011 if (j < NMAGSHORTCUT) { 1012 zinfo->mag_shortcut[j] = ptr; 1013 zinfo->mag_index = j + 1; 1014 return; 1015 } 1016 } 1017 1018 /* 1019 * Free to slab and increment navail. We can delay incrementing 1020 * navail to prevent the slab from being destroyed out from under 1021 * us while we do other optimizations. 1022 */ 1023 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 1024 bmi = bno >> LONG_BITS_SHIFT; 1025 bno &= (LONG_BITS - 1); 1026 bmp = &slab->bitmap[bmi]; 1027 1028 MASSERT(bmi >= 0 && bmi < slab->nmax); 1029 MASSERT((*bmp & (1LU << bno)) == 0); 1030 atomic_set_long(bmp, 1LU << bno); 1031 1032 if (slab->slgd == slgd) { 1033 /* 1034 * We can only do the following if we own the slab. Note 1035 * that navail can be incremented by any thread even if 1036 * we own the slab. 1037 */ 1038 struct zoneinfo *zinfo; 1039 1040 atomic_add_int(&slab->navail, 1); 1041 if (slab->free_index > bmi) { 1042 slab->free_index = bmi; 1043 slab->free_bit = bno; 1044 } else if (slab->free_index == bmi && 1045 slab->free_bit > bno) { 1046 slab->free_bit = bno; 1047 } 1048 zinfo = &slgd->zone[slab->zone_index]; 1049 1050 /* 1051 * Freeing an object from a full slab makes it less than 1052 * full. The slab must be moved to the available list. 1053 * 1054 * If the available list has too many slabs, release some 1055 * to the depot. 1056 */ 1057 if (slab->state == FULL) { 1058 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 1059 slab->state = AVAIL; 1060 stmp = zinfo->avail_base; 1061 slab->next = stmp; 1062 zinfo->avail_base = slab; 1063 ++zinfo->avail_count; 1064 while (zinfo->avail_count > opt_cache) { 1065 slab = zinfo->avail_base; 1066 zinfo->avail_base = slab->next; 1067 --zinfo->avail_count; 1068 slabterm(slgd, slab); 1069 } 1070 goto done; 1071 } 1072 1073 /* 1074 * If the slab becomes completely empty dispose of it in 1075 * some manner. By default each thread caches up to 4 1076 * empty slabs. Only small slabs are cached. 1077 */ 1078 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1079 /* 1080 * Remove slab from available queue 1081 */ 1082 slabp = &zinfo->avail_base; 1083 while ((stmp = *slabp) != slab) 1084 slabp = &stmp->next; 1085 *slabp = slab->next; 1086 --zinfo->avail_count; 1087 1088 if (opt_free || opt_cache == 0) { 1089 /* 1090 * If local caching is disabled cache the 1091 * slab in the depot (or free it). 1092 */ 1093 slabterm(slgd, slab); 1094 } else if (slab->slab_size > BIGSLABSIZE) { 1095 /* 1096 * We do not try to retain large slabs 1097 * in per-thread caches. 1098 */ 1099 slabterm(slgd, slab); 1100 } else if (zinfo->empty_count > opt_cache) { 1101 /* 1102 * We have too many slabs cached, but 1103 * instead of freeing this one free 1104 * an empty slab that's been idle longer. 1105 * 1106 * (empty_count does not change) 1107 */ 1108 stmp = zinfo->empty_base; 1109 slab->state = EMPTY; 1110 slab->next = stmp->next; 1111 zinfo->empty_base = slab; 1112 slabterm(slgd, stmp); 1113 } else { 1114 /* 1115 * Cache the empty slab in our thread local 1116 * empty list. 1117 */ 1118 ++zinfo->empty_count; 1119 slab->state = EMPTY; 1120 slab->next = zinfo->empty_base; 1121 zinfo->empty_base = slab; 1122 } 1123 } 1124 } else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1125 slglobaldata_t sldepot; 1126 1127 /* 1128 * If freeing to a slab owned by the global depot, and 1129 * the slab becomes completely EMPTY, try to move it to 1130 * the correct list. 1131 */ 1132 sldepot = &slglobaldepot; 1133 if (__isthreaded) 1134 _SPINLOCK(&sldepot->lock); 1135 if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1136 struct zoneinfo *zinfo; 1137 1138 /* 1139 * Move the slab to the empty list 1140 */ 1141 MASSERT(slab->state == AVAIL); 1142 atomic_add_int(&slab->navail, 1); 1143 zinfo = &sldepot->zone[slab->zone_index]; 1144 slabp = &zinfo->avail_base; 1145 while (slab != *slabp) 1146 slabp = &(*slabp)->next; 1147 *slabp = slab->next; 1148 --zinfo->avail_count; 1149 1150 /* 1151 * Clean out excessive empty entries from the 1152 * depot. 1153 */ 1154 slab->state = EMPTY; 1155 slab->next = zinfo->empty_base; 1156 zinfo->empty_base = slab; 1157 ++zinfo->empty_count; 1158 while (zinfo->empty_count > opt_cache) { 1159 slab = zinfo->empty_base; 1160 zinfo->empty_base = slab->next; 1161 --zinfo->empty_count; 1162 slab->state = UNKNOWN; 1163 if (__isthreaded) 1164 _SPINUNLOCK(&sldepot->lock); 1165 slabfree(slab); 1166 if (__isthreaded) 1167 _SPINLOCK(&sldepot->lock); 1168 } 1169 } else { 1170 atomic_add_int(&slab->navail, 1); 1171 } 1172 if (__isthreaded) 1173 _SPINUNLOCK(&sldepot->lock); 1174 } else { 1175 /* 1176 * We can't act on the slab other than by adjusting navail 1177 * (and the bitmap which we did in the common code at the 1178 * top). 1179 */ 1180 atomic_add_int(&slab->navail, 1); 1181 } 1182 done: 1183 ; 1184 } 1185 1186 /* 1187 * Allocate a new slab holding objects of size chunk_size. 1188 */ 1189 static slab_t 1190 slaballoc(int zi, size_t chunking, size_t chunk_size) 1191 { 1192 slglobaldata_t slgd; 1193 slglobaldata_t sldepot; 1194 struct zoneinfo *zinfo; 1195 region_t region; 1196 void *save; 1197 slab_t slab; 1198 size_t slab_desire; 1199 size_t slab_size; 1200 size_t region_mask; 1201 uintptr_t chunk_offset; 1202 ssize_t maxchunks; 1203 ssize_t tmpchunks; 1204 int ispower2; 1205 int power; 1206 int ri; 1207 int rx; 1208 int nswath; 1209 int j; 1210 1211 /* 1212 * First look in the depot. Any given zone in the depot may be 1213 * locked by being set to -1. We have to do this instead of simply 1214 * removing the entire chain because removing the entire chain can 1215 * cause racing threads to allocate local slabs for large objects, 1216 * resulting in a large VSZ. 1217 */ 1218 slgd = &slglobal; 1219 sldepot = &slglobaldepot; 1220 zinfo = &sldepot->zone[zi]; 1221 1222 if (zinfo->avail_base) { 1223 if (__isthreaded) 1224 _SPINLOCK(&sldepot->lock); 1225 slab = zinfo->avail_base; 1226 if (slab) { 1227 MASSERT(slab->slgd == NULL); 1228 slab->slgd = slgd; 1229 zinfo->avail_base = slab->next; 1230 --zinfo->avail_count; 1231 if (__isthreaded) 1232 _SPINUNLOCK(&sldepot->lock); 1233 return slab; 1234 } 1235 if (__isthreaded) 1236 _SPINUNLOCK(&sldepot->lock); 1237 } 1238 1239 /* 1240 * Nothing in the depot, allocate a new slab by locating or assigning 1241 * a region and then using the system virtual memory allocator. 1242 */ 1243 slab = NULL; 1244 1245 /* 1246 * Calculate the start of the data chunks relative to the start 1247 * of the slab. If chunk_size is a power of 2 we guarantee 1248 * power of 2 alignment. If it is not we guarantee alignment 1249 * to the chunk size. 1250 */ 1251 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1252 ispower2 = 1; 1253 chunk_offset = roundup2(sizeof(*slab), chunk_size); 1254 } else { 1255 ispower2 = 0; 1256 chunk_offset = sizeof(*slab) + chunking - 1; 1257 chunk_offset -= chunk_offset % chunking; 1258 } 1259 1260 /* 1261 * Calculate a reasonable number of chunks for the slab. 1262 * 1263 * Once initialized the MaxChunks[] array can only ever be 1264 * reinitialized to the same value. 1265 */ 1266 maxchunks = MaxChunks[zi]; 1267 if (maxchunks == 0) { 1268 /* 1269 * First calculate how many chunks would fit in 1/1024 1270 * available memory. This is around 2MB on a 32 bit 1271 * system and 128G on a 64-bit (48-bits available) system. 1272 */ 1273 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1274 (ssize_t)chunk_size; 1275 if (maxchunks <= 0) 1276 maxchunks = 1; 1277 1278 /* 1279 * A slab cannot handle more than MAXCHUNKS chunks, but 1280 * limit us to approximately MAXCHUNKS / 2 here because 1281 * we may have to expand maxchunks when we calculate the 1282 * actual power-of-2 slab. 1283 */ 1284 if (maxchunks > MAXCHUNKS / 2) 1285 maxchunks = MAXCHUNKS / 2; 1286 1287 /* 1288 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1289 * slabs will be created if the allocation does not fit. 1290 */ 1291 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1292 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1293 (ssize_t)chunk_size; 1294 if (tmpchunks <= 0) 1295 tmpchunks = 1; 1296 if (maxchunks > tmpchunks) 1297 maxchunks = tmpchunks; 1298 } 1299 1300 /* 1301 * If the slab calculates to greater than 2MB see if we 1302 * can cut it down to ~2MB. This controls VSZ but has 1303 * no effect on run-time size or performance. 1304 * 1305 * This is very important in case you core dump and also 1306 * important to reduce unnecessary region allocations. 1307 */ 1308 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1309 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1310 (ssize_t)chunk_size; 1311 if (tmpchunks < 1) 1312 tmpchunks = 1; 1313 if (maxchunks > tmpchunks) 1314 maxchunks = tmpchunks; 1315 } 1316 1317 /* 1318 * If the slab calculates to greater than 128K see if we 1319 * can cut it down to ~128K while still maintaining a 1320 * reasonably large number of chunks in each slab. This 1321 * controls VSZ but has no effect on run-time size or 1322 * performance. 1323 * 1324 * This is very important in case you core dump and also 1325 * important to reduce unnecessary region allocations. 1326 */ 1327 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1328 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1329 (ssize_t)chunk_size; 1330 if (tmpchunks < 32) 1331 tmpchunks = 32; 1332 if (maxchunks > tmpchunks) 1333 maxchunks = tmpchunks; 1334 } 1335 1336 MaxChunks[zi] = maxchunks; 1337 } 1338 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1339 1340 /* 1341 * Calculate the actual slab size. maxchunks will be recalculated 1342 * a little later. 1343 */ 1344 slab_desire = chunk_offset + chunk_size * maxchunks; 1345 slab_size = 8 * MAXCHUNKS; 1346 power = 3 + MAXCHUNKS_BITS; 1347 while (slab_size < slab_desire) { 1348 slab_size <<= 1; 1349 ++power; 1350 } 1351 1352 /* 1353 * Do a quick recalculation based on the actual slab size but not 1354 * yet dealing with whether the slab header is in-band or out-of-band. 1355 * The purpose here is to see if we can reasonably reduce slab_size 1356 * to a power of 4 to allow more chunk sizes to use the same slab 1357 * size. 1358 */ 1359 if ((power & 1) && slab_size > 32768) { 1360 maxchunks = (slab_size - chunk_offset) / chunk_size; 1361 if (maxchunks >= MAXCHUNKS / 8) { 1362 slab_size >>= 1; 1363 --power; 1364 } 1365 } 1366 if ((power & 2) && slab_size > 32768 * 4) { 1367 maxchunks = (slab_size - chunk_offset) / chunk_size; 1368 if (maxchunks >= MAXCHUNKS / 4) { 1369 slab_size >>= 2; 1370 power -= 2; 1371 } 1372 } 1373 /* 1374 * This case occurs when the slab_size is larger than 1/1024 available 1375 * UVM. 1376 */ 1377 nswath = slab_size / NREGIONS_SIZE; 1378 if (nswath > NREGIONS) 1379 return (NULL); 1380 1381 1382 /* 1383 * Try to allocate from our current best region for this zi 1384 */ 1385 region_mask = ~(slab_size - 1); 1386 ri = slgd->zone[zi].best_region; 1387 if (Regions[ri].mask == region_mask) { 1388 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1389 goto found; 1390 } 1391 1392 /* 1393 * Try to find an existing region to allocate from. The normal 1394 * case will be for allocations that are less than 1/1024 available 1395 * UVM, which fit into a single Regions[] entry. 1396 */ 1397 while (slab_size <= NREGIONS_SIZE) { 1398 rx = -1; 1399 for (ri = 0; ri < NREGIONS; ++ri) { 1400 if (rx < 0 && Regions[ri].mask == 0) 1401 rx = ri; 1402 if (Regions[ri].mask == region_mask) { 1403 slab = _vmem_alloc(ri, slab_size); 1404 if (slab) { 1405 slgd->zone[zi].best_region = ri; 1406 goto found; 1407 } 1408 } 1409 } 1410 1411 if (rx < 0) 1412 return(NULL); 1413 1414 /* 1415 * This can fail, retry either way 1416 */ 1417 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1418 NULL, 1419 (void *)region_mask); 1420 } 1421 1422 for (;;) { 1423 rx = -1; 1424 for (ri = 0; ri < NREGIONS; ri += nswath) { 1425 if (Regions[ri].mask == region_mask) { 1426 slab = _vmem_alloc(ri, slab_size); 1427 if (slab) { 1428 slgd->zone[zi].best_region = ri; 1429 goto found; 1430 } 1431 } 1432 if (rx < 0) { 1433 for (j = nswath - 1; j >= 0; --j) { 1434 if (Regions[ri+j].mask != 0) 1435 break; 1436 } 1437 if (j < 0) 1438 rx = ri; 1439 } 1440 } 1441 1442 /* 1443 * We found a candidate, try to allocate it backwards so 1444 * another thread racing a slaballoc() does not see the 1445 * mask in the base index position until we are done. 1446 * 1447 * We can safely zero-out any partial allocations because 1448 * the mask is only accessed from the base index. Any other 1449 * threads racing us will fail prior to us clearing the mask. 1450 */ 1451 if (rx < 0) 1452 return(NULL); 1453 for (j = nswath - 1; j >= 0; --j) { 1454 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1455 NULL, (void *)region_mask)) { 1456 while (++j < nswath) 1457 Regions[rx+j].mask = 0; 1458 break; 1459 } 1460 } 1461 /* retry */ 1462 } 1463 1464 /* 1465 * Fill in the new slab in region ri. If the slab_size completely 1466 * fills one or more region slots we move the slab structure out of 1467 * band which should optimize the chunking (particularly for a power 1468 * of 2). 1469 */ 1470 found: 1471 region = &Regions[ri]; 1472 MASSERT(region->slab == NULL); 1473 if (slab_size >= NREGIONS_SIZE) { 1474 save = slab; 1475 slab = memalloc(sizeof(*slab), 0); 1476 bzero(slab, sizeof(*slab)); 1477 slab->chunks = save; 1478 for (j = 0; j < nswath; ++j) 1479 region[j].slab = slab; 1480 chunk_offset = 0; 1481 } else { 1482 bzero(slab, sizeof(*slab)); 1483 slab->chunks = (char *)slab + chunk_offset; 1484 } 1485 1486 /* 1487 * Calculate the start of the chunks memory and recalculate the 1488 * actual number of chunks the slab can hold. 1489 */ 1490 maxchunks = (slab_size - chunk_offset) / chunk_size; 1491 if (maxchunks > MAXCHUNKS) 1492 maxchunks = MAXCHUNKS; 1493 1494 /* 1495 * And fill in the rest 1496 */ 1497 slab->magic = ZALLOC_SLAB_MAGIC; 1498 slab->navail = maxchunks; 1499 slab->nmax = maxchunks; 1500 slab->slab_size = slab_size; 1501 slab->chunk_size = chunk_size; 1502 slab->zone_index = zi; 1503 slab->slgd = slgd; 1504 slab->state = UNKNOWN; 1505 slab->region = region; 1506 1507 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1508 if (ri + LONG_BITS <= maxchunks) 1509 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1510 else 1511 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1512 (1LU << (maxchunks - ri)) - 1; 1513 } 1514 return (slab); 1515 } 1516 1517 /* 1518 * Free a slab. 1519 */ 1520 static void 1521 slabfree(slab_t slab) 1522 { 1523 int nswath; 1524 int j; 1525 1526 if (slab->region->slab == slab) { 1527 /* 1528 * Out-of-band slab. 1529 */ 1530 nswath = slab->slab_size / NREGIONS_SIZE; 1531 for (j = 0; j < nswath; ++j) 1532 slab->region[j].slab = NULL; 1533 slab->magic = 0; 1534 _vmem_free(slab->chunks, slab->slab_size); 1535 memfree(slab, 0); 1536 } else { 1537 /* 1538 * In-band slab. 1539 */ 1540 slab->magic = 0; 1541 _vmem_free(slab, slab->slab_size); 1542 } 1543 } 1544 1545 /* 1546 * Terminate a slab's use in the current thread. The slab may still have 1547 * outstanding allocations and thus not be deallocatable. 1548 */ 1549 static void 1550 slabterm(slglobaldata_t slgd, slab_t slab) 1551 { 1552 slglobaldata_t sldepot; 1553 struct zoneinfo *zinfo; 1554 int zi = slab->zone_index; 1555 1556 slab->slgd = NULL; 1557 --slgd->nslabs; 1558 sldepot = &slglobaldepot; 1559 zinfo = &sldepot->zone[zi]; 1560 1561 /* 1562 * Move the slab to the avail list or the empty list. 1563 */ 1564 if (__isthreaded) 1565 _SPINLOCK(&sldepot->lock); 1566 if (slab->navail == slab->nmax) { 1567 slab->state = EMPTY; 1568 slab->next = zinfo->empty_base; 1569 zinfo->empty_base = slab; 1570 ++zinfo->empty_count; 1571 } else { 1572 slab->state = AVAIL; 1573 slab->next = zinfo->avail_base; 1574 zinfo->avail_base = slab; 1575 ++zinfo->avail_count; 1576 } 1577 1578 /* 1579 * Clean extra slabs out of the empty list 1580 */ 1581 while (zinfo->empty_count > opt_cache) { 1582 slab = zinfo->empty_base; 1583 zinfo->empty_base = slab->next; 1584 --zinfo->empty_count; 1585 slab->state = UNKNOWN; 1586 if (__isthreaded) 1587 _SPINUNLOCK(&sldepot->lock); 1588 slabfree(slab); 1589 if (__isthreaded) 1590 _SPINLOCK(&sldepot->lock); 1591 } 1592 if (__isthreaded) 1593 _SPINUNLOCK(&sldepot->lock); 1594 } 1595 1596 /* 1597 * _vmem_alloc() 1598 * 1599 * Directly map memory in PAGE_SIZE'd chunks with the specified 1600 * alignment. 1601 * 1602 * Alignment must be a multiple of PAGE_SIZE. 1603 * 1604 * Size must be >= alignment. 1605 */ 1606 static void * 1607 _vmem_alloc(int ri, size_t slab_size) 1608 { 1609 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1610 char *eaddr; 1611 char *addr; 1612 char *save; 1613 uintptr_t excess; 1614 1615 if (slab_size < NREGIONS_SIZE) 1616 eaddr = baddr + NREGIONS_SIZE; 1617 else 1618 eaddr = baddr + slab_size; 1619 1620 /* 1621 * This usually just works but might not. 1622 */ 1623 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1624 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1625 if (addr == MAP_FAILED) { 1626 return (NULL); 1627 } 1628 if (addr < baddr || addr + slab_size > eaddr) { 1629 munmap(addr, slab_size); 1630 return (NULL); 1631 } 1632 1633 /* 1634 * Check alignment. The misaligned offset is also the excess 1635 * amount. If misaligned unmap the excess so we have a chance of 1636 * mapping at the next alignment point and recursively try again. 1637 * 1638 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1639 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1640 * xxxxxxxxx final excess calculation 1641 * ^ returned address 1642 */ 1643 excess = (uintptr_t)addr & (slab_size - 1); 1644 while (excess) { 1645 excess = slab_size - excess; 1646 save = addr; 1647 1648 munmap(save + excess, slab_size - excess); 1649 addr = _vmem_alloc(ri, slab_size); 1650 munmap(save, excess); 1651 if (addr == NULL) 1652 return (NULL); 1653 if (addr < baddr || addr + slab_size > eaddr) { 1654 munmap(addr, slab_size); 1655 return (NULL); 1656 } 1657 excess = (uintptr_t)addr & (slab_size - 1); 1658 } 1659 return (addr); 1660 } 1661 1662 /* 1663 * _vmem_free() 1664 * 1665 * Free a chunk of memory allocated with _vmem_alloc() 1666 */ 1667 static void 1668 _vmem_free(void *ptr, size_t size) 1669 { 1670 munmap(ptr, size); 1671 } 1672 1673 /* 1674 * Panic on fatal conditions 1675 */ 1676 static void 1677 _mpanic(const char *ctl, ...) 1678 { 1679 va_list va; 1680 1681 if (malloc_panic == 0) { 1682 malloc_panic = 1; 1683 va_start(va, ctl); 1684 vfprintf(stderr, ctl, va); 1685 fprintf(stderr, "\n"); 1686 fflush(stderr); 1687 va_end(va); 1688 } 1689 abort(); 1690 } 1691 1692 __weak_reference(__malloc, malloc); 1693 __weak_reference(__calloc, calloc); 1694 __weak_reference(__posix_memalign, posix_memalign); 1695 __weak_reference(__realloc, realloc); 1696 __weak_reference(__free, free); 1697