1 /* 2 * DMALLOC.C - Dillon's malloc 3 * 4 * Copyright (c) 2011,2017 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com>. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a modified slab allocator as a drop-in replacement 38 * for the libc malloc(). The slab algorithm has been adjusted to support 39 * dynamic sizing of slabs which effectively allows slabs to be used for 40 * allocations of any size. Because of this we neither have a small-block 41 * allocator or a big-block allocator and the code paths are simplified. 42 * 43 * To support dynamic slab sizing available user virtual memory is broken 44 * down into ~1024 regions. Each region has fixed slab size whos value is 45 * set when the region is opened up for use. The free() path simply applies 46 * a mask based on the region to the pointer to acquire the base of the 47 * governing slab structure. 48 * 49 * Regions[NREGIONS] (1024) 50 * 51 * Slab management and locking is done on a per-zone basis. 52 * 53 * Alloc Size Chunking Number of zones 54 * 0-127 8 16 55 * 128-255 16 8 56 * 256-511 32 8 57 * 512-1023 64 8 58 * 1024-2047 128 8 59 * 2048-4095 256 8 60 * 4096-8191 512 8 61 * 8192-16383 1024 8 62 * 16384-32767 2048 8 63 * 32768-65535 4096 8 64 * ... continues forever ... 4 zones 65 * 66 * For a 2^63 memory space each doubling >= 64K is broken down into 67 * 4 chunking zones, so we support 88 + (48 * 4) = 280 zones. 68 * 69 * API FEATURES AND SIDE EFFECTS 70 * 71 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 72 * Above that power-of-2 sized allocations are page-aligned. Non 73 * power-of-2 sized allocations are aligned the same as the chunk 74 * size for their zone. 75 * + ability to allocate arbitrarily large chunks of memory 76 * + realloc will reuse the passed pointer if possible, within the 77 * limitations of the zone chunking. 78 * 79 * On top of the slab allocator we also implement a 16-entry-per-thread 80 * magazine cache for allocations <= NOMSLABSIZE. 81 * 82 * FUTURE FEATURES 83 * 84 * + [better] garbage collection 85 * + better initial sizing. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. Upper case letters enabled 91 * or increase the feature, lower case disables or decreases the feature. 92 * 93 * U Enable UTRACE for all operations, observable with ktrace. 94 * Diasbled by default. 95 * 96 * Z Zero out allocations, otherwise allocations (except for 97 * calloc) will contain garbage. 98 * Disabled by default. 99 * 100 * H Pass a hint with madvise() about unused pages. 101 * Disabled by default. 102 * Not currently implemented. 103 * 104 * F Disable local per-thread caching. 105 * Disabled by default. 106 * 107 * C Increase (decrease) how much excess cache to retain. 108 * Set to 4 by default. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o dmalloc.so dmalloc.c */ 112 113 #ifndef STANDALONE_DEBUG 114 #include "libc_private.h" 115 #endif 116 117 #include <sys/param.h> 118 #include <sys/types.h> 119 #include <sys/mman.h> 120 #include <sys/queue.h> 121 #include <sys/uio.h> 122 #include <sys/ktrace.h> 123 #include <stdio.h> 124 #include <stdint.h> 125 #include <stdlib.h> 126 #include <stdarg.h> 127 #include <stddef.h> 128 #include <unistd.h> 129 #include <string.h> 130 #include <fcntl.h> 131 #include <errno.h> 132 #include <pthread.h> 133 #include <limits.h> 134 135 #include <machine/atomic.h> 136 #include <machine/cpufunc.h> 137 138 #ifdef STANDALONE_DEBUG 139 void _nmalloc_thr_init(void); 140 #else 141 #include "spinlock.h" 142 #include "un-namespace.h" 143 #endif 144 145 #ifndef MAP_SIZEALIGN 146 #define MAP_SIZEALIGN 0 147 #endif 148 149 #if SSIZE_MAX == 0x7FFFFFFF 150 #define ADDRBITS 32 151 #define UVM_BITS 32 /* worst case */ 152 #else 153 #define ADDRBITS 64 154 #define UVM_BITS 48 /* worst case XXX */ 155 #endif 156 157 #if LONG_MAX == 0x7FFFFFFF 158 #define LONG_BITS 32 159 #define LONG_BITS_SHIFT 5 160 #else 161 #define LONG_BITS 64 162 #define LONG_BITS_SHIFT 6 163 #endif 164 165 #define LOCKEDPTR ((void *)(intptr_t)-1) 166 167 /* 168 * Regions[] 169 */ 170 #define NREGIONS_BITS 10 171 #define NREGIONS (1 << NREGIONS_BITS) 172 #define NREGIONS_MASK (NREGIONS - 1) 173 #define NREGIONS_SHIFT (UVM_BITS - NREGIONS_BITS) 174 #define NREGIONS_SIZE (1LU << NREGIONS_SHIFT) 175 176 typedef struct region *region_t; 177 typedef struct slglobaldata *slglobaldata_t; 178 typedef struct slab *slab_t; 179 180 struct region { 181 uintptr_t mask; 182 slab_t slab; /* conditional out of band slab */ 183 }; 184 185 static struct region Regions[NREGIONS]; 186 187 /* 188 * Number of chunking zones available 189 */ 190 #define CHUNKFACTOR 8 191 #if ADDRBITS == 32 192 #define NZONES (16 + 9 * CHUNKFACTOR + 16 * CHUNKFACTOR) 193 #else 194 #define NZONES (16 + 9 * CHUNKFACTOR + 48 * CHUNKFACTOR) 195 #endif 196 197 static int MaxChunks[NZONES]; 198 199 #define NDEPOTS 8 /* must be power of 2 */ 200 201 /* 202 * Maximum number of chunks per slab, governed by the allocation bitmap in 203 * each slab. The maximum is reduced for large chunk sizes. 204 */ 205 #define MAXCHUNKS (LONG_BITS * LONG_BITS) 206 #define MAXCHUNKS_BITS (LONG_BITS_SHIFT * LONG_BITS_SHIFT) 207 #define LITSLABSIZE (32 * 1024) 208 #define NOMSLABSIZE (2 * 1024 * 1024) 209 #define BIGSLABSIZE (128 * 1024 * 1024) 210 211 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 212 213 TAILQ_HEAD(slab_list, slab); 214 215 /* 216 * A slab structure 217 */ 218 struct slab { 219 struct slab *next; /* slabs with available space */ 220 TAILQ_ENTRY(slab) entry; 221 int32_t magic; /* magic number for sanity check */ 222 u_int navail; /* number of free elements available */ 223 u_int nmax; 224 u_int free_bit; /* free hint bitno */ 225 u_int free_index; /* free hint index */ 226 u_long bitmap[LONG_BITS]; /* free chunks */ 227 size_t slab_size; /* size of entire slab */ 228 size_t chunk_size; /* chunk size for validation */ 229 int zone_index; 230 enum { UNKNOWN, AVAIL, EMPTY, FULL } state; 231 int flags; 232 region_t region; /* related region */ 233 char *chunks; /* chunk base */ 234 slglobaldata_t slgd; /* localized to thread else NULL */ 235 }; 236 237 /* 238 * per-thread data + global depot 239 * 240 * NOTE: The magazine shortcut is only used for per-thread data. 241 */ 242 #define NMAGSHORTCUT 16 243 244 struct slglobaldata { 245 spinlock_t lock; /* only used by slglobaldepot */ 246 struct zoneinfo { 247 slab_t avail_base; 248 slab_t empty_base; 249 int best_region; 250 int mag_index; 251 int avail_count; 252 int empty_count; 253 void *mag_shortcut[NMAGSHORTCUT]; 254 } zone[NZONES]; 255 struct slab_list full_zones; /* via entry */ 256 int masked; 257 int biggest_index; 258 size_t nslabs; 259 }; 260 261 #define SLAB_ZEROD 0x0001 262 263 /* 264 * Misc constants. Note that allocations that are exact multiples of 265 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 266 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 267 */ 268 #define MIN_CHUNK_SIZE 8 /* in bytes */ 269 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 270 271 #define SAFLAG_ZERO 0x00000001 272 273 /* 274 * The WEIRD_ADDR is used as known text to copy into free objects to 275 * try to create deterministic failure cases if the data is accessed after 276 * free. 277 * 278 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 279 * not be larger then 64. 280 */ 281 #define WEIRD_ADDR 0xdeadc0de 282 #define MAX_COPY sizeof(weirdary) 283 284 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 285 286 /* 287 * Thread control 288 */ 289 290 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 291 _mpanic("assertion: %s in %s", \ 292 #exp, __func__); \ 293 } while (0) 294 295 /* 296 * With this attribute set, do not require a function call for accessing 297 * this variable when the code is compiled -fPIC. 298 * 299 * Must be empty for libc_rtld (similar to __thread) 300 */ 301 #if defined(__LIBC_RTLD) 302 #define TLS_ATTRIBUTE 303 #else 304 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 305 #endif 306 307 static __thread struct slglobaldata slglobal TLS_ATTRIBUTE; 308 static pthread_key_t thread_malloc_key; 309 static pthread_once_t thread_malloc_once = PTHREAD_ONCE_INIT; 310 static struct slglobaldata slglobaldepot; 311 312 static int opt_madvise = 0; 313 static int opt_free = 0; 314 static int opt_cache = 4; 315 static int opt_utrace = 0; 316 static int g_malloc_flags = 0; 317 static int malloc_panic; 318 319 static const int32_t weirdary[16] = { 320 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 321 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 322 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 323 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 324 }; 325 326 static void *memalloc(size_t size, int flags); 327 static void *memrealloc(void *ptr, size_t size); 328 static void memfree(void *ptr, int); 329 static slab_t slaballoc(int zi, size_t chunking, size_t chunk_size); 330 static void slabfree(slab_t slab); 331 static void slabterm(slglobaldata_t slgd, slab_t slab); 332 static void *_vmem_alloc(int ri, size_t slab_size); 333 static void _vmem_free(void *ptr, size_t slab_size); 334 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 335 #ifndef STANDALONE_DEBUG 336 static void malloc_init(void) __constructor(101); 337 #else 338 static void malloc_init(void) __constructor(101); 339 #endif 340 341 342 struct nmalloc_utrace { 343 void *p; 344 size_t s; 345 void *r; 346 }; 347 348 #define UTRACE(a, b, c) \ 349 if (opt_utrace) { \ 350 struct nmalloc_utrace ut = { \ 351 .p = (a), \ 352 .s = (b), \ 353 .r = (c) \ 354 }; \ 355 utrace(&ut, sizeof(ut)); \ 356 } 357 358 #ifdef INVARIANTS 359 /* 360 * If enabled any memory allocated without M_ZERO is initialized to -1. 361 */ 362 static int use_malloc_pattern; 363 #endif 364 365 static void 366 malloc_init(void) 367 { 368 const char *p = NULL; 369 370 TAILQ_INIT(&slglobal.full_zones); 371 372 Regions[0].mask = -1; /* disallow activity in lowest region */ 373 374 if (issetugid() == 0) 375 p = getenv("MALLOC_OPTIONS"); 376 377 for (; p != NULL && *p != '\0'; p++) { 378 switch(*p) { 379 case 'u': 380 opt_utrace = 0; 381 break; 382 case 'U': 383 opt_utrace = 1; 384 break; 385 case 'h': 386 opt_madvise = 0; 387 break; 388 case 'H': 389 opt_madvise = 1; 390 break; 391 case 'c': 392 if (opt_cache > 0) 393 --opt_cache; 394 break; 395 case 'C': 396 ++opt_cache; 397 break; 398 case 'f': 399 opt_free = 0; 400 break; 401 case 'F': 402 opt_free = 1; 403 break; 404 case 'z': 405 g_malloc_flags = 0; 406 break; 407 case 'Z': 408 g_malloc_flags = SAFLAG_ZERO; 409 break; 410 default: 411 break; 412 } 413 } 414 415 UTRACE((void *) -1, 0, NULL); 416 } 417 418 /* 419 * We have to install a handler for nmalloc thread teardowns when 420 * the thread is created. We cannot delay this because destructors in 421 * sophisticated userland programs can call malloc() for the first time 422 * during their thread exit. 423 * 424 * This routine is called directly from pthreads. 425 */ 426 static void _nmalloc_thr_init_once(void); 427 static void _nmalloc_thr_destructor(void *thrp); 428 429 void 430 _nmalloc_thr_init(void) 431 { 432 static int did_init; 433 434 TAILQ_INIT(&slglobal.full_zones); 435 436 if (slglobal.masked) 437 return; 438 439 slglobal.masked = 1; 440 if (did_init == 0) { 441 did_init = 1; 442 pthread_once(&thread_malloc_once, _nmalloc_thr_init_once); 443 } 444 pthread_setspecific(thread_malloc_key, &slglobal); 445 slglobal.masked = 0; 446 } 447 448 void 449 _nmalloc_thr_prepfork(void) 450 { 451 if (__isthreaded) 452 _SPINLOCK(&slglobaldepot.lock); 453 } 454 455 void 456 _nmalloc_thr_parentfork(void) 457 { 458 if (__isthreaded) 459 _SPINUNLOCK(&slglobaldepot.lock); 460 } 461 462 void 463 _nmalloc_thr_childfork(void) 464 { 465 if (__isthreaded) 466 _SPINUNLOCK(&slglobaldepot.lock); 467 } 468 469 /* 470 * Called just once 471 */ 472 static void 473 _nmalloc_thr_init_once(void) 474 { 475 int error; 476 477 error = pthread_key_create(&thread_malloc_key, _nmalloc_thr_destructor); 478 if (error) 479 abort(); 480 } 481 482 /* 483 * Called for each thread undergoing exit 484 * 485 * Move all of the thread's slabs into a depot. 486 */ 487 static void 488 _nmalloc_thr_destructor(void *thrp) 489 { 490 slglobaldata_t slgd = thrp; 491 struct zoneinfo *zinfo; 492 slab_t slab; 493 void *ptr; 494 int i; 495 int j; 496 497 slgd->masked = 1; 498 499 for (i = 0; i <= slgd->biggest_index; i++) { 500 zinfo = &slgd->zone[i]; 501 502 while ((j = zinfo->mag_index) > 0) { 503 --j; 504 ptr = zinfo->mag_shortcut[j]; 505 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 506 zinfo->mag_index = j; 507 memfree(ptr, 0); 508 } 509 510 while ((slab = zinfo->empty_base) != NULL) { 511 zinfo->empty_base = slab->next; 512 --zinfo->empty_count; 513 slabterm(slgd, slab); 514 } 515 516 while ((slab = zinfo->avail_base) != NULL) { 517 zinfo->avail_base = slab->next; 518 --zinfo->avail_count; 519 slabterm(slgd, slab); 520 } 521 522 while ((slab = TAILQ_FIRST(&slgd->full_zones)) != NULL) { 523 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 524 slabterm(slgd, slab); 525 } 526 } 527 } 528 529 /* 530 * Calculate the zone index for the allocation request size and set the 531 * allocation request size to that particular zone's chunk size. 532 * 533 * Minimum alignment is 16 bytes for allocations >= 16 bytes to conform 534 * with malloc requirements for intel/amd. 535 */ 536 static __inline int 537 zoneindex(size_t *bytes, size_t *chunking) 538 { 539 size_t n = (size_t)*bytes; 540 size_t x; 541 size_t c; 542 int i; 543 544 if (n < 128) { 545 if (n < 16) { 546 *bytes = n = (n + 7) & ~7; 547 *chunking = 8; 548 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 549 } else { 550 *bytes = n = (n + 15) & ~15; 551 *chunking = 16; 552 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 553 } 554 } 555 if (n < 4096) { 556 x = 256; 557 c = x / (CHUNKFACTOR * 2); 558 i = 16; 559 } else { 560 x = 8192; 561 c = x / (CHUNKFACTOR * 2); 562 i = 16 + CHUNKFACTOR * 5; /* 256->512,1024,2048,4096,8192 */ 563 } 564 while (n >= x) { 565 x <<= 1; 566 c <<= 1; 567 i += CHUNKFACTOR; 568 if (x == 0) 569 _mpanic("slaballoc: byte value too high"); 570 } 571 *bytes = n = roundup2(n, c); 572 *chunking = c; 573 return (i + n / c - CHUNKFACTOR); 574 #if 0 575 *bytes = n = (n + c - 1) & ~(c - 1); 576 *chunking = c; 577 return (n / c + i); 578 579 if (n < 256) { 580 *bytes = n = (n + 15) & ~15; 581 *chunking = 16; 582 return(n / (CHUNKINGLO*2) + CHUNKINGLO*1 - 1); 583 } 584 if (n < 8192) { 585 if (n < 512) { 586 *bytes = n = (n + 31) & ~31; 587 *chunking = 32; 588 return(n / (CHUNKINGLO*4) + CHUNKINGLO*2 - 1); 589 } 590 if (n < 1024) { 591 *bytes = n = (n + 63) & ~63; 592 *chunking = 64; 593 return(n / (CHUNKINGLO*8) + CHUNKINGLO*3 - 1); 594 } 595 if (n < 2048) { 596 *bytes = n = (n + 127) & ~127; 597 *chunking = 128; 598 return(n / (CHUNKINGLO*16) + CHUNKINGLO*4 - 1); 599 } 600 if (n < 4096) { 601 *bytes = n = (n + 255) & ~255; 602 *chunking = 256; 603 return(n / (CHUNKINGLO*32) + CHUNKINGLO*5 - 1); 604 } 605 *bytes = n = (n + 511) & ~511; 606 *chunking = 512; 607 return(n / (CHUNKINGLO*64) + CHUNKINGLO*6 - 1); 608 } 609 if (n < 16384) { 610 *bytes = n = (n + 1023) & ~1023; 611 *chunking = 1024; 612 return(n / (CHUNKINGLO*128) + CHUNKINGLO*7 - 1); 613 } 614 if (n < 32768) { /* 16384-32767 */ 615 *bytes = n = (n + 2047) & ~2047; 616 *chunking = 2048; 617 return(n / (CHUNKINGLO*256) + CHUNKINGLO*8 - 1); 618 } 619 if (n < 65536) { 620 *bytes = n = (n + 4095) & ~4095; /* 32768-65535 */ 621 *chunking = 4096; 622 return(n / (CHUNKINGLO*512) + CHUNKINGLO*9 - 1); 623 } 624 625 x = 131072; 626 c = 8192; 627 i = CHUNKINGLO*10 - 1; 628 629 while (n >= x) { 630 x <<= 1; 631 c <<= 1; 632 i += CHUNKINGHI; 633 if (x == 0) 634 _mpanic("slaballoc: byte value too high"); 635 } 636 *bytes = n = (n + c - 1) & ~(c - 1); 637 *chunking = c; 638 return (n / c + i); 639 #endif 640 } 641 642 /* 643 * malloc() - call internal slab allocator 644 */ 645 void * 646 __malloc(size_t size) 647 { 648 void *ptr; 649 650 ptr = memalloc(size, 0); 651 if (ptr == NULL) 652 errno = ENOMEM; 653 else 654 UTRACE(0, size, ptr); 655 return(ptr); 656 } 657 658 /* 659 * calloc() - call internal slab allocator 660 */ 661 void * 662 __calloc(size_t number, size_t size) 663 { 664 void *ptr; 665 666 ptr = memalloc(number * size, SAFLAG_ZERO); 667 if (ptr == NULL) 668 errno = ENOMEM; 669 else 670 UTRACE(0, number * size, ptr); 671 return(ptr); 672 } 673 674 /* 675 * realloc() (SLAB ALLOCATOR) 676 * 677 * We do not attempt to optimize this routine beyond reusing the same 678 * pointer if the new size fits within the chunking of the old pointer's 679 * zone. 680 */ 681 void * 682 __realloc(void *ptr, size_t size) 683 { 684 void *ret; 685 686 if (ptr == NULL) 687 ret = memalloc(size, 0); 688 else 689 ret = memrealloc(ptr, size); 690 if (ret == NULL) 691 errno = ENOMEM; 692 else 693 UTRACE(ptr, size, ret); 694 return(ret); 695 } 696 697 /* 698 * posix_memalign() 699 * 700 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 701 * is a power of 2 >= sizeof(void *). 702 * 703 * The slab allocator will allocate on power-of-2 boundaries up to at least 704 * PAGE_SIZE. Otherwise we use the zoneindex mechanic to find a zone 705 * matching the requirements. 706 */ 707 int 708 __posix_memalign(void **memptr, size_t alignment, size_t size) 709 { 710 /* 711 * OpenGroup spec issue 6 checks 712 */ 713 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 714 *memptr = NULL; 715 return(EINVAL); 716 } 717 if (alignment < sizeof(void *)) { 718 *memptr = NULL; 719 return(EINVAL); 720 } 721 722 /* 723 * XXX for now just find the nearest power of 2 >= size and also 724 * >= alignment and allocate that. 725 */ 726 while (alignment < size) { 727 alignment <<= 1; 728 if (alignment == 0) 729 _mpanic("posix_memalign: byte value too high"); 730 } 731 *memptr = memalloc(alignment, 0); 732 return(*memptr ? 0 : ENOMEM); 733 } 734 735 /* 736 * free() (SLAB ALLOCATOR) - do the obvious 737 */ 738 void 739 __free(void *ptr) 740 { 741 if (ptr) { 742 UTRACE(ptr, 0, 0); 743 memfree(ptr, 0); 744 } 745 } 746 747 /* 748 * memalloc() (SLAB ALLOCATOR) 749 * 750 * Allocate memory via the slab allocator. 751 */ 752 static void * 753 memalloc(size_t size, int flags) 754 { 755 slglobaldata_t slgd; 756 struct zoneinfo *zinfo; 757 slab_t slab; 758 size_t chunking; 759 int bmi; 760 int bno; 761 u_long *bmp; 762 int zi; 763 #ifdef INVARIANTS 764 int i; 765 #endif 766 int j; 767 char *obj; 768 769 /* 770 * If 0 bytes is requested we have to return a unique pointer, allocate 771 * at least one byte. 772 */ 773 if (size == 0) 774 size = 1; 775 776 /* Capture global flags */ 777 flags |= g_malloc_flags; 778 779 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 780 zi = zoneindex(&size, &chunking); 781 MASSERT(zi < NZONES); 782 if (size == 0) 783 return(NULL); 784 785 /* 786 * Try magazine shortcut first 787 */ 788 slgd = &slglobal; 789 zinfo = &slgd->zone[zi]; 790 791 if ((j = zinfo->mag_index) != 0) { 792 zinfo->mag_index = --j; 793 obj = zinfo->mag_shortcut[j]; 794 zinfo->mag_shortcut[j] = NULL; /* SAFETY */ 795 if (flags & SAFLAG_ZERO) 796 bzero(obj, size); 797 return obj; 798 } 799 800 /* 801 * Locate a slab with available space. If no slabs are available 802 * back-off to the empty list and if we still come up dry allocate 803 * a new slab (which will try the depot first). 804 */ 805 retry: 806 if ((slab = zinfo->avail_base) == NULL) { 807 if ((slab = zinfo->empty_base) == NULL) { 808 /* 809 * Still dry 810 */ 811 slab = slaballoc(zi, chunking, size); 812 if (slab == NULL) 813 return(NULL); 814 slab->next = zinfo->avail_base; 815 zinfo->avail_base = slab; 816 ++zinfo->avail_count; 817 slab->state = AVAIL; 818 if (slgd->biggest_index < zi) 819 slgd->biggest_index = zi; 820 ++slgd->nslabs; 821 } else { 822 /* 823 * Pulled from empty list 824 */ 825 zinfo->empty_base = slab->next; 826 slab->next = zinfo->avail_base; 827 zinfo->avail_base = slab; 828 ++zinfo->avail_count; 829 slab->state = AVAIL; 830 --zinfo->empty_count; 831 } 832 } 833 834 /* 835 * Allocate a chunk out of the slab. HOT PATH 836 * 837 * Only the thread owning the slab can allocate out of it. 838 * 839 * NOTE: The last bit in the bitmap is always marked allocated so 840 * we cannot overflow here. 841 */ 842 bno = slab->free_bit; 843 bmi = slab->free_index; 844 bmp = &slab->bitmap[bmi]; 845 if (*bmp & (1LU << bno)) { 846 atomic_clear_long(bmp, 1LU << bno); 847 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * size; 848 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 849 atomic_add_int(&slab->navail, -1); 850 if (flags & SAFLAG_ZERO) 851 bzero(obj, size); 852 return (obj); 853 } 854 855 /* 856 * Allocate a chunk out of a slab. COLD PATH 857 */ 858 if (slab->navail == 0) { 859 zinfo->avail_base = slab->next; 860 --zinfo->avail_count; 861 slab->state = FULL; 862 TAILQ_INSERT_TAIL(&slgd->full_zones, slab, entry); 863 goto retry; 864 } 865 866 while (bmi < LONG_BITS) { 867 bmp = &slab->bitmap[bmi]; 868 if (*bmp) { 869 bno = bsflong(*bmp); 870 atomic_clear_long(bmp, 1LU << bno); 871 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 872 size; 873 slab->free_index = bmi; 874 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 875 atomic_add_int(&slab->navail, -1); 876 if (flags & SAFLAG_ZERO) 877 bzero(obj, size); 878 return (obj); 879 } 880 ++bmi; 881 } 882 bmi = 0; 883 while (bmi < LONG_BITS) { 884 bmp = &slab->bitmap[bmi]; 885 if (*bmp) { 886 bno = bsflong(*bmp); 887 atomic_clear_long(bmp, 1LU << bno); 888 obj = slab->chunks + ((bmi << LONG_BITS_SHIFT) + bno) * 889 size; 890 slab->free_index = bmi; 891 slab->free_bit = (bno + 1) & (LONG_BITS - 1); 892 atomic_add_int(&slab->navail, -1); 893 if (flags & SAFLAG_ZERO) 894 bzero(obj, size); 895 return (obj); 896 } 897 ++bmi; 898 } 899 _mpanic("slaballoc: corrupted zone: navail %d", slab->navail); 900 /* not reached */ 901 return NULL; 902 } 903 904 /* 905 * Reallocate memory within the chunk 906 */ 907 static void * 908 memrealloc(void *ptr, size_t nsize) 909 { 910 region_t region; 911 slab_t slab; 912 size_t osize; 913 char *obj; 914 int flags = 0; 915 916 /* 917 * If 0 bytes is requested we have to return a unique pointer, allocate 918 * at least one byte. 919 */ 920 if (nsize == 0) 921 nsize = 1; 922 923 /* Capture global flags */ 924 flags |= g_malloc_flags; 925 926 /* 927 * Locate the zone by looking up the dynamic slab size mask based 928 * on the memory region the allocation resides in. 929 */ 930 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 931 if ((slab = region->slab) == NULL) 932 slab = (void *)((uintptr_t)ptr & region->mask); 933 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 934 osize = slab->chunk_size; 935 if (nsize <= osize) { 936 if (osize < 32 || nsize >= osize / 2) { 937 obj = ptr; 938 if ((flags & SAFLAG_ZERO) && nsize < osize) 939 bzero(obj + nsize, osize - nsize); 940 return(obj); 941 } 942 } 943 944 /* 945 * Otherwise resize the object 946 */ 947 obj = memalloc(nsize, 0); 948 if (obj) { 949 if (nsize > osize) 950 nsize = osize; 951 bcopy(ptr, obj, nsize); 952 memfree(ptr, 0); 953 } 954 return (obj); 955 } 956 957 /* 958 * free (SLAB ALLOCATOR) 959 * 960 * Free a memory block previously allocated by malloc. 961 * 962 * MPSAFE 963 */ 964 static void 965 memfree(void *ptr, int flags) 966 { 967 region_t region; 968 slglobaldata_t slgd; 969 slab_t slab; 970 slab_t stmp; 971 slab_t *slabp; 972 int bmi; 973 int bno; 974 int j; 975 u_long *bmp; 976 977 /* 978 * Locate the zone by looking up the dynamic slab size mask based 979 * on the memory region the allocation resides in. 980 * 981 * WARNING! The slab may be owned by another thread! 982 */ 983 region = &Regions[((uintptr_t)ptr >> NREGIONS_SHIFT) & NREGIONS_MASK]; 984 if ((slab = region->slab) == NULL) 985 slab = (void *)((uintptr_t)ptr & region->mask); 986 MASSERT(slab != NULL); 987 MASSERT(slab->magic == ZALLOC_SLAB_MAGIC); 988 989 #ifdef INVARIANTS 990 /* 991 * Put weird data into the memory to detect modifications after 992 * freeing, illegal pointer use after freeing (we should fault on 993 * the odd address), and so forth. 994 */ 995 if (slab->chunk_size < sizeof(weirdary)) 996 bcopy(weirdary, ptr, slab->chunk_size); 997 else 998 bcopy(weirdary, ptr, sizeof(weirdary)); 999 #endif 1000 slgd = &slglobal; 1001 1002 /* 1003 * Use mag_shortcut[] when possible 1004 */ 1005 if (slgd->masked == 0 && slab->chunk_size <= NOMSLABSIZE) { 1006 struct zoneinfo *zinfo; 1007 1008 zinfo = &slgd->zone[slab->zone_index]; 1009 j = zinfo->mag_index; 1010 if (j < NMAGSHORTCUT) { 1011 zinfo->mag_shortcut[j] = ptr; 1012 zinfo->mag_index = j + 1; 1013 return; 1014 } 1015 } 1016 1017 /* 1018 * Free to slab and increment navail. We can delay incrementing 1019 * navail to prevent the slab from being destroyed out from under 1020 * us while we do other optimizations. 1021 */ 1022 bno = ((uintptr_t)ptr - (uintptr_t)slab->chunks) / slab->chunk_size; 1023 bmi = bno >> LONG_BITS_SHIFT; 1024 bno &= (LONG_BITS - 1); 1025 bmp = &slab->bitmap[bmi]; 1026 1027 MASSERT(bmi >= 0 && bmi < slab->nmax); 1028 MASSERT((*bmp & (1LU << bno)) == 0); 1029 atomic_set_long(bmp, 1LU << bno); 1030 1031 if (slab->slgd == slgd) { 1032 /* 1033 * We can only do the following if we own the slab. Note 1034 * that navail can be incremented by any thread even if 1035 * we own the slab. 1036 */ 1037 struct zoneinfo *zinfo; 1038 1039 atomic_add_int(&slab->navail, 1); 1040 if (slab->free_index > bmi) { 1041 slab->free_index = bmi; 1042 slab->free_bit = bno; 1043 } else if (slab->free_index == bmi && 1044 slab->free_bit > bno) { 1045 slab->free_bit = bno; 1046 } 1047 zinfo = &slgd->zone[slab->zone_index]; 1048 1049 /* 1050 * Freeing an object from a full slab makes it less than 1051 * full. The slab must be moved to the available list. 1052 * 1053 * If the available list has too many slabs, release some 1054 * to the depot. 1055 */ 1056 if (slab->state == FULL) { 1057 TAILQ_REMOVE(&slgd->full_zones, slab, entry); 1058 slab->state = AVAIL; 1059 stmp = zinfo->avail_base; 1060 slab->next = stmp; 1061 zinfo->avail_base = slab; 1062 ++zinfo->avail_count; 1063 while (zinfo->avail_count > opt_cache) { 1064 slab = zinfo->avail_base; 1065 zinfo->avail_base = slab->next; 1066 --zinfo->avail_count; 1067 slabterm(slgd, slab); 1068 } 1069 goto done; 1070 } 1071 1072 /* 1073 * If the slab becomes completely empty dispose of it in 1074 * some manner. By default each thread caches up to 4 1075 * empty slabs. Only small slabs are cached. 1076 */ 1077 if (slab->navail == slab->nmax && slab->state == AVAIL) { 1078 /* 1079 * Remove slab from available queue 1080 */ 1081 slabp = &zinfo->avail_base; 1082 while ((stmp = *slabp) != slab) 1083 slabp = &stmp->next; 1084 *slabp = slab->next; 1085 --zinfo->avail_count; 1086 1087 if (opt_free || opt_cache == 0) { 1088 /* 1089 * If local caching is disabled cache the 1090 * slab in the depot (or free it). 1091 */ 1092 slabterm(slgd, slab); 1093 } else if (slab->slab_size > BIGSLABSIZE) { 1094 /* 1095 * We do not try to retain large slabs 1096 * in per-thread caches. 1097 */ 1098 slabterm(slgd, slab); 1099 } else if (zinfo->empty_count > opt_cache) { 1100 /* 1101 * We have too many slabs cached, but 1102 * instead of freeing this one free 1103 * an empty slab that's been idle longer. 1104 * 1105 * (empty_count does not change) 1106 */ 1107 stmp = zinfo->empty_base; 1108 slab->state = EMPTY; 1109 slab->next = stmp->next; 1110 zinfo->empty_base = slab; 1111 slabterm(slgd, stmp); 1112 } else { 1113 /* 1114 * Cache the empty slab in our thread local 1115 * empty list. 1116 */ 1117 ++zinfo->empty_count; 1118 slab->state = EMPTY; 1119 slab->next = zinfo->empty_base; 1120 zinfo->empty_base = slab; 1121 } 1122 } 1123 } else if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1124 slglobaldata_t sldepot; 1125 1126 /* 1127 * If freeing to a slab owned by the global depot, and 1128 * the slab becomes completely EMPTY, try to move it to 1129 * the correct list. 1130 */ 1131 sldepot = &slglobaldepot; 1132 if (__isthreaded) 1133 _SPINLOCK(&sldepot->lock); 1134 if (slab->slgd == NULL && slab->navail + 1 == slab->nmax) { 1135 struct zoneinfo *zinfo; 1136 1137 /* 1138 * Move the slab to the empty list 1139 */ 1140 MASSERT(slab->state == AVAIL); 1141 atomic_add_int(&slab->navail, 1); 1142 zinfo = &sldepot->zone[slab->zone_index]; 1143 slabp = &zinfo->avail_base; 1144 while (slab != *slabp) 1145 slabp = &(*slabp)->next; 1146 *slabp = slab->next; 1147 --zinfo->avail_count; 1148 1149 /* 1150 * Clean out excessive empty entries from the 1151 * depot. 1152 */ 1153 slab->state = EMPTY; 1154 slab->next = zinfo->empty_base; 1155 zinfo->empty_base = slab; 1156 ++zinfo->empty_count; 1157 while (zinfo->empty_count > opt_cache) { 1158 slab = zinfo->empty_base; 1159 zinfo->empty_base = slab->next; 1160 --zinfo->empty_count; 1161 slab->state = UNKNOWN; 1162 if (__isthreaded) 1163 _SPINUNLOCK(&sldepot->lock); 1164 slabfree(slab); 1165 if (__isthreaded) 1166 _SPINLOCK(&sldepot->lock); 1167 } 1168 } else { 1169 atomic_add_int(&slab->navail, 1); 1170 } 1171 if (__isthreaded) 1172 _SPINUNLOCK(&sldepot->lock); 1173 } else { 1174 /* 1175 * We can't act on the slab other than by adjusting navail 1176 * (and the bitmap which we did in the common code at the 1177 * top). 1178 */ 1179 atomic_add_int(&slab->navail, 1); 1180 } 1181 done: 1182 ; 1183 } 1184 1185 /* 1186 * Allocate a new slab holding objects of size chunk_size. 1187 */ 1188 static slab_t 1189 slaballoc(int zi, size_t chunking, size_t chunk_size) 1190 { 1191 slglobaldata_t slgd; 1192 slglobaldata_t sldepot; 1193 struct zoneinfo *zinfo; 1194 region_t region; 1195 void *save; 1196 slab_t slab; 1197 size_t slab_desire; 1198 size_t slab_size; 1199 size_t region_mask; 1200 uintptr_t chunk_offset; 1201 ssize_t maxchunks; 1202 ssize_t tmpchunks; 1203 int ispower2; 1204 int power; 1205 int ri; 1206 int rx; 1207 int nswath; 1208 int j; 1209 1210 /* 1211 * First look in the depot. Any given zone in the depot may be 1212 * locked by being set to -1. We have to do this instead of simply 1213 * removing the entire chain because removing the entire chain can 1214 * cause racing threads to allocate local slabs for large objects, 1215 * resulting in a large VSZ. 1216 */ 1217 slgd = &slglobal; 1218 sldepot = &slglobaldepot; 1219 zinfo = &sldepot->zone[zi]; 1220 1221 if (zinfo->avail_base) { 1222 if (__isthreaded) 1223 _SPINLOCK(&sldepot->lock); 1224 slab = zinfo->avail_base; 1225 if (slab) { 1226 MASSERT(slab->slgd == NULL); 1227 slab->slgd = slgd; 1228 zinfo->avail_base = slab->next; 1229 --zinfo->avail_count; 1230 if (__isthreaded) 1231 _SPINUNLOCK(&sldepot->lock); 1232 return slab; 1233 } 1234 if (__isthreaded) 1235 _SPINUNLOCK(&sldepot->lock); 1236 } 1237 1238 /* 1239 * Nothing in the depot, allocate a new slab by locating or assigning 1240 * a region and then using the system virtual memory allocator. 1241 */ 1242 slab = NULL; 1243 1244 /* 1245 * Calculate the start of the data chunks relative to the start 1246 * of the slab. If chunk_size is a power of 2 we guarantee 1247 * power of 2 alignment. If it is not we guarantee alignment 1248 * to the chunk size. 1249 */ 1250 if ((chunk_size ^ (chunk_size - 1)) == (chunk_size << 1) - 1) { 1251 ispower2 = 1; 1252 chunk_offset = roundup2(sizeof(*slab), chunk_size); 1253 } else { 1254 ispower2 = 0; 1255 chunk_offset = sizeof(*slab) + chunking - 1; 1256 chunk_offset -= chunk_offset % chunking; 1257 } 1258 1259 /* 1260 * Calculate a reasonable number of chunks for the slab. 1261 * 1262 * Once initialized the MaxChunks[] array can only ever be 1263 * reinitialized to the same value. 1264 */ 1265 maxchunks = MaxChunks[zi]; 1266 if (maxchunks == 0) { 1267 /* 1268 * First calculate how many chunks would fit in 1/1024 1269 * available memory. This is around 2MB on a 32 bit 1270 * system and 128G on a 64-bit (48-bits available) system. 1271 */ 1272 maxchunks = (ssize_t)(NREGIONS_SIZE - chunk_offset) / 1273 (ssize_t)chunk_size; 1274 if (maxchunks <= 0) 1275 maxchunks = 1; 1276 1277 /* 1278 * A slab cannot handle more than MAXCHUNKS chunks, but 1279 * limit us to approximately MAXCHUNKS / 2 here because 1280 * we may have to expand maxchunks when we calculate the 1281 * actual power-of-2 slab. 1282 */ 1283 if (maxchunks > MAXCHUNKS / 2) 1284 maxchunks = MAXCHUNKS / 2; 1285 1286 /* 1287 * Try to limit the slabs to BIGSLABSIZE (~128MB). Larger 1288 * slabs will be created if the allocation does not fit. 1289 */ 1290 if (chunk_offset + chunk_size * maxchunks > BIGSLABSIZE) { 1291 tmpchunks = (ssize_t)(BIGSLABSIZE - chunk_offset) / 1292 (ssize_t)chunk_size; 1293 if (tmpchunks <= 0) 1294 tmpchunks = 1; 1295 if (maxchunks > tmpchunks) 1296 maxchunks = tmpchunks; 1297 } 1298 1299 /* 1300 * If the slab calculates to greater than 2MB see if we 1301 * can cut it down to ~2MB. This controls VSZ but has 1302 * no effect on run-time size or performance. 1303 * 1304 * This is very important in case you core dump and also 1305 * important to reduce unnecessary region allocations. 1306 */ 1307 if (chunk_offset + chunk_size * maxchunks > NOMSLABSIZE) { 1308 tmpchunks = (ssize_t)(NOMSLABSIZE - chunk_offset) / 1309 (ssize_t)chunk_size; 1310 if (tmpchunks < 1) 1311 tmpchunks = 1; 1312 if (maxchunks > tmpchunks) 1313 maxchunks = tmpchunks; 1314 } 1315 1316 /* 1317 * If the slab calculates to greater than 128K see if we 1318 * can cut it down to ~128K while still maintaining a 1319 * reasonably large number of chunks in each slab. This 1320 * controls VSZ but has no effect on run-time size or 1321 * performance. 1322 * 1323 * This is very important in case you core dump and also 1324 * important to reduce unnecessary region allocations. 1325 */ 1326 if (chunk_offset + chunk_size * maxchunks > LITSLABSIZE) { 1327 tmpchunks = (ssize_t)(LITSLABSIZE - chunk_offset) / 1328 (ssize_t)chunk_size; 1329 if (tmpchunks < 32) 1330 tmpchunks = 32; 1331 if (maxchunks > tmpchunks) 1332 maxchunks = tmpchunks; 1333 } 1334 1335 MaxChunks[zi] = maxchunks; 1336 } 1337 MASSERT(maxchunks > 0 && maxchunks <= MAXCHUNKS); 1338 1339 /* 1340 * Calculate the actual slab size. maxchunks will be recalculated 1341 * a little later. 1342 */ 1343 slab_desire = chunk_offset + chunk_size * maxchunks; 1344 slab_size = 8 * MAXCHUNKS; 1345 power = 3 + MAXCHUNKS_BITS; 1346 while (slab_size < slab_desire) { 1347 slab_size <<= 1; 1348 ++power; 1349 } 1350 1351 /* 1352 * Do a quick recalculation based on the actual slab size but not 1353 * yet dealing with whether the slab header is in-band or out-of-band. 1354 * The purpose here is to see if we can reasonably reduce slab_size 1355 * to a power of 4 to allow more chunk sizes to use the same slab 1356 * size. 1357 */ 1358 if ((power & 1) && slab_size > 32768) { 1359 maxchunks = (slab_size - chunk_offset) / chunk_size; 1360 if (maxchunks >= MAXCHUNKS / 8) { 1361 slab_size >>= 1; 1362 --power; 1363 } 1364 } 1365 if ((power & 2) && slab_size > 32768 * 4) { 1366 maxchunks = (slab_size - chunk_offset) / chunk_size; 1367 if (maxchunks >= MAXCHUNKS / 4) { 1368 slab_size >>= 2; 1369 power -= 2; 1370 } 1371 } 1372 /* 1373 * This case occurs when the slab_size is larger than 1/1024 available 1374 * UVM. 1375 */ 1376 nswath = slab_size / NREGIONS_SIZE; 1377 if (nswath > NREGIONS) 1378 return (NULL); 1379 1380 1381 /* 1382 * Try to allocate from our current best region for this zi 1383 */ 1384 region_mask = ~(slab_size - 1); 1385 ri = slgd->zone[zi].best_region; 1386 if (Regions[ri].mask == region_mask) { 1387 if ((slab = _vmem_alloc(ri, slab_size)) != NULL) 1388 goto found; 1389 } 1390 1391 /* 1392 * Try to find an existing region to allocate from. The normal 1393 * case will be for allocations that are less than 1/1024 available 1394 * UVM, which fit into a single Regions[] entry. 1395 */ 1396 while (slab_size <= NREGIONS_SIZE) { 1397 rx = -1; 1398 for (ri = 0; ri < NREGIONS; ++ri) { 1399 if (rx < 0 && Regions[ri].mask == 0) 1400 rx = ri; 1401 if (Regions[ri].mask == region_mask) { 1402 slab = _vmem_alloc(ri, slab_size); 1403 if (slab) { 1404 slgd->zone[zi].best_region = ri; 1405 goto found; 1406 } 1407 } 1408 } 1409 1410 if (rx < 0) 1411 return(NULL); 1412 1413 /* 1414 * This can fail, retry either way 1415 */ 1416 atomic_cmpset_ptr((void **)&Regions[rx].mask, 1417 NULL, 1418 (void *)region_mask); 1419 } 1420 1421 for (;;) { 1422 rx = -1; 1423 for (ri = 0; ri < NREGIONS; ri += nswath) { 1424 if (Regions[ri].mask == region_mask) { 1425 slab = _vmem_alloc(ri, slab_size); 1426 if (slab) { 1427 slgd->zone[zi].best_region = ri; 1428 goto found; 1429 } 1430 } 1431 if (rx < 0) { 1432 for (j = nswath - 1; j >= 0; --j) { 1433 if (Regions[ri+j].mask != 0) 1434 break; 1435 } 1436 if (j < 0) 1437 rx = ri; 1438 } 1439 } 1440 1441 /* 1442 * We found a candidate, try to allocate it backwards so 1443 * another thread racing a slaballoc() does not see the 1444 * mask in the base index position until we are done. 1445 * 1446 * We can safely zero-out any partial allocations because 1447 * the mask is only accessed from the base index. Any other 1448 * threads racing us will fail prior to us clearing the mask. 1449 */ 1450 if (rx < 0) 1451 return(NULL); 1452 for (j = nswath - 1; j >= 0; --j) { 1453 if (!atomic_cmpset_ptr((void **)&Regions[rx+j].mask, 1454 NULL, (void *)region_mask)) { 1455 while (++j < nswath) 1456 Regions[rx+j].mask = 0; 1457 break; 1458 } 1459 } 1460 /* retry */ 1461 } 1462 1463 /* 1464 * Fill in the new slab in region ri. If the slab_size completely 1465 * fills one or more region slots we move the slab structure out of 1466 * band which should optimize the chunking (particularly for a power 1467 * of 2). 1468 */ 1469 found: 1470 region = &Regions[ri]; 1471 MASSERT(region->slab == NULL); 1472 if (slab_size >= NREGIONS_SIZE) { 1473 save = slab; 1474 slab = memalloc(sizeof(*slab), 0); 1475 bzero(slab, sizeof(*slab)); 1476 slab->chunks = save; 1477 for (j = 0; j < nswath; ++j) 1478 region[j].slab = slab; 1479 chunk_offset = 0; 1480 } else { 1481 bzero(slab, sizeof(*slab)); 1482 slab->chunks = (char *)slab + chunk_offset; 1483 } 1484 1485 /* 1486 * Calculate the start of the chunks memory and recalculate the 1487 * actual number of chunks the slab can hold. 1488 */ 1489 maxchunks = (slab_size - chunk_offset) / chunk_size; 1490 if (maxchunks > MAXCHUNKS) 1491 maxchunks = MAXCHUNKS; 1492 1493 /* 1494 * And fill in the rest 1495 */ 1496 slab->magic = ZALLOC_SLAB_MAGIC; 1497 slab->navail = maxchunks; 1498 slab->nmax = maxchunks; 1499 slab->slab_size = slab_size; 1500 slab->chunk_size = chunk_size; 1501 slab->zone_index = zi; 1502 slab->slgd = slgd; 1503 slab->state = UNKNOWN; 1504 slab->region = region; 1505 1506 for (ri = 0; ri < maxchunks; ri += LONG_BITS) { 1507 if (ri + LONG_BITS <= maxchunks) 1508 slab->bitmap[ri >> LONG_BITS_SHIFT] = ULONG_MAX; 1509 else 1510 slab->bitmap[ri >> LONG_BITS_SHIFT] = 1511 (1LU << (maxchunks - ri)) - 1; 1512 } 1513 return (slab); 1514 } 1515 1516 /* 1517 * Free a slab. 1518 */ 1519 static void 1520 slabfree(slab_t slab) 1521 { 1522 int nswath; 1523 int j; 1524 1525 if (slab->region->slab == slab) { 1526 /* 1527 * Out-of-band slab. 1528 */ 1529 nswath = slab->slab_size / NREGIONS_SIZE; 1530 for (j = 0; j < nswath; ++j) 1531 slab->region[j].slab = NULL; 1532 slab->magic = 0; 1533 _vmem_free(slab->chunks, slab->slab_size); 1534 memfree(slab, 0); 1535 } else { 1536 /* 1537 * In-band slab. 1538 */ 1539 slab->magic = 0; 1540 _vmem_free(slab, slab->slab_size); 1541 } 1542 } 1543 1544 /* 1545 * Terminate a slab's use in the current thread. The slab may still have 1546 * outstanding allocations and thus not be deallocatable. 1547 */ 1548 static void 1549 slabterm(slglobaldata_t slgd, slab_t slab) 1550 { 1551 slglobaldata_t sldepot; 1552 struct zoneinfo *zinfo; 1553 int zi = slab->zone_index; 1554 1555 slab->slgd = NULL; 1556 --slgd->nslabs; 1557 sldepot = &slglobaldepot; 1558 zinfo = &sldepot->zone[zi]; 1559 1560 /* 1561 * Move the slab to the avail list or the empty list. 1562 */ 1563 if (__isthreaded) 1564 _SPINLOCK(&sldepot->lock); 1565 if (slab->navail == slab->nmax) { 1566 slab->state = EMPTY; 1567 slab->next = zinfo->empty_base; 1568 zinfo->empty_base = slab; 1569 ++zinfo->empty_count; 1570 } else { 1571 slab->state = AVAIL; 1572 slab->next = zinfo->avail_base; 1573 zinfo->avail_base = slab; 1574 ++zinfo->avail_count; 1575 } 1576 1577 /* 1578 * Clean extra slabs out of the empty list 1579 */ 1580 while (zinfo->empty_count > opt_cache) { 1581 slab = zinfo->empty_base; 1582 zinfo->empty_base = slab->next; 1583 --zinfo->empty_count; 1584 slab->state = UNKNOWN; 1585 if (__isthreaded) 1586 _SPINUNLOCK(&sldepot->lock); 1587 slabfree(slab); 1588 if (__isthreaded) 1589 _SPINLOCK(&sldepot->lock); 1590 } 1591 if (__isthreaded) 1592 _SPINUNLOCK(&sldepot->lock); 1593 } 1594 1595 /* 1596 * _vmem_alloc() 1597 * 1598 * Directly map memory in PAGE_SIZE'd chunks with the specified 1599 * alignment. 1600 * 1601 * Alignment must be a multiple of PAGE_SIZE. 1602 * 1603 * Size must be >= alignment. 1604 */ 1605 static void * 1606 _vmem_alloc(int ri, size_t slab_size) 1607 { 1608 char *baddr = (void *)((uintptr_t)ri << NREGIONS_SHIFT); 1609 char *eaddr; 1610 char *addr; 1611 char *save; 1612 uintptr_t excess; 1613 1614 if (slab_size < NREGIONS_SIZE) 1615 eaddr = baddr + NREGIONS_SIZE; 1616 else 1617 eaddr = baddr + slab_size; 1618 1619 /* 1620 * This usually just works but might not. 1621 */ 1622 addr = mmap(baddr, slab_size, PROT_READ|PROT_WRITE, 1623 MAP_PRIVATE | MAP_ANON | MAP_SIZEALIGN, -1, 0); 1624 if (addr == MAP_FAILED) { 1625 return (NULL); 1626 } 1627 if (addr < baddr || addr + slab_size > eaddr) { 1628 munmap(addr, slab_size); 1629 return (NULL); 1630 } 1631 1632 /* 1633 * Check alignment. The misaligned offset is also the excess 1634 * amount. If misaligned unmap the excess so we have a chance of 1635 * mapping at the next alignment point and recursively try again. 1636 * 1637 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1638 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1639 * xxxxxxxxx final excess calculation 1640 * ^ returned address 1641 */ 1642 excess = (uintptr_t)addr & (slab_size - 1); 1643 while (excess) { 1644 excess = slab_size - excess; 1645 save = addr; 1646 1647 munmap(save + excess, slab_size - excess); 1648 addr = _vmem_alloc(ri, slab_size); 1649 munmap(save, excess); 1650 if (addr == NULL) 1651 return (NULL); 1652 if (addr < baddr || addr + slab_size > eaddr) { 1653 munmap(addr, slab_size); 1654 return (NULL); 1655 } 1656 excess = (uintptr_t)addr & (slab_size - 1); 1657 } 1658 return (addr); 1659 } 1660 1661 /* 1662 * _vmem_free() 1663 * 1664 * Free a chunk of memory allocated with _vmem_alloc() 1665 */ 1666 static void 1667 _vmem_free(void *ptr, size_t size) 1668 { 1669 munmap(ptr, size); 1670 } 1671 1672 /* 1673 * Panic on fatal conditions 1674 */ 1675 static void 1676 _mpanic(const char *ctl, ...) 1677 { 1678 va_list va; 1679 1680 if (malloc_panic == 0) { 1681 malloc_panic = 1; 1682 va_start(va, ctl); 1683 vfprintf(stderr, ctl, va); 1684 fprintf(stderr, "\n"); 1685 fflush(stderr); 1686 va_end(va); 1687 } 1688 abort(); 1689 } 1690 1691 __weak_reference(__malloc, malloc); 1692 __weak_reference(__calloc, calloc); 1693 __weak_reference(__posix_memalign, posix_memalign); 1694 __weak_reference(__realloc, realloc); 1695 __weak_reference(__free, free); 1696