1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "libc_private.h" 114 115 #include <sys/param.h> 116 #include <sys/types.h> 117 #include <sys/mman.h> 118 #include <sys/queue.h> 119 #include <sys/uio.h> 120 #include <sys/ktrace.h> 121 #include <stdio.h> 122 #include <stdint.h> 123 #include <stdlib.h> 124 #include <stdarg.h> 125 #include <stddef.h> 126 #include <unistd.h> 127 #include <string.h> 128 #include <fcntl.h> 129 #include <errno.h> 130 #include <pthread.h> 131 #include <machine/atomic.h> 132 133 #include "spinlock.h" 134 #include "un-namespace.h" 135 136 137 /* 138 * Linked list of large allocations 139 */ 140 typedef struct bigalloc { 141 struct bigalloc *next; /* hash link */ 142 void *base; /* base pointer */ 143 u_long active; /* bytes active */ 144 u_long bytes; /* bytes allocated */ 145 } *bigalloc_t; 146 147 /* 148 * Note that any allocations which are exact multiples of PAGE_SIZE, or 149 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 150 */ 151 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 152 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 153 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 154 #define ZALLOC_ZONE_SIZE (64 * 1024) 155 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 156 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 157 158 #if ZALLOC_ZONE_LIMIT == 16384 159 #define NZONES 72 160 #elif ZALLOC_ZONE_LIMIT == 32768 161 #define NZONES 80 162 #else 163 #error "I couldn't figure out NZONES" 164 #endif 165 166 /* 167 * Chunk structure for free elements 168 */ 169 typedef struct slchunk { 170 struct slchunk *c_Next; 171 } *slchunk_t; 172 173 /* 174 * The IN-BAND zone header is placed at the beginning of each zone. 175 */ 176 struct slglobaldata; 177 178 typedef struct slzone { 179 int32_t z_Magic; /* magic number for sanity check */ 180 int z_NFree; /* total free chunks / ualloc space */ 181 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 182 int z_NMax; /* maximum free chunks */ 183 char *z_BasePtr; /* pointer to start of chunk array */ 184 int z_UIndex; /* current initial allocation index */ 185 int z_UEndIndex; /* last (first) allocation index */ 186 int z_ChunkSize; /* chunk size for validation */ 187 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 188 int z_ZoneIndex; 189 int z_Flags; 190 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 191 } *slzone_t; 192 193 typedef struct slglobaldata { 194 spinlock_t Spinlock; 195 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 196 int JunkIndex; 197 } *slglobaldata_t; 198 199 #define SLZF_UNOTZEROD 0x0001 200 201 #define FASTSLABREALLOC 0x02 202 203 /* 204 * Misc constants. Note that allocations that are exact multiples of 205 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 206 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 207 */ 208 #define MIN_CHUNK_SIZE 8 /* in bytes */ 209 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 210 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 211 212 /* 213 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 214 * not be larger then 64. 215 */ 216 #define BIGHSHIFT 10 /* bigalloc hash table */ 217 #define BIGHSIZE (1 << BIGHSHIFT) 218 #define BIGHMASK (BIGHSIZE - 1) 219 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 220 #define BIGXMASK (BIGXSIZE - 1) 221 222 /* 223 * BIGCACHE caches oversized allocations. Note that a linear search is 224 * performed, so do not make the cache too large. 225 * 226 * BIGCACHE will garbage-collect excess space when the excess exceeds the 227 * specified value. A relatively large number should be used here because 228 * garbage collection is expensive. 229 */ 230 #define BIGCACHE 16 231 #define BIGCACHE_MASK (BIGCACHE - 1) 232 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 233 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 234 235 #define SAFLAG_ZERO 0x0001 236 #define SAFLAG_PASSIVE 0x0002 237 238 /* 239 * Thread control 240 */ 241 242 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 243 244 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 245 _mpanic("assertion: %s in %s", \ 246 #exp, __func__); \ 247 } while (0) 248 249 /* 250 * Magazines 251 */ 252 253 #define M_MAX_ROUNDS 64 254 #define M_ZONE_ROUNDS 64 255 #define M_LOW_ROUNDS 32 256 #define M_INIT_ROUNDS 8 257 #define M_BURST_FACTOR 8 258 #define M_BURST_NSCALE 2 259 260 #define M_BURST 0x0001 261 #define M_BURST_EARLY 0x0002 262 263 struct magazine { 264 SLIST_ENTRY(magazine) nextmagazine; 265 266 int flags; 267 int capacity; /* Max rounds in this magazine */ 268 int rounds; /* Current number of free rounds */ 269 int burst_factor; /* Number of blocks to prefill with */ 270 int low_factor; /* Free till low_factor from full mag */ 271 void *objects[M_MAX_ROUNDS]; 272 }; 273 274 SLIST_HEAD(magazinelist, magazine); 275 276 static spinlock_t zone_mag_lock; 277 static spinlock_t depot_spinlock; 278 static struct magazine zone_magazine = { 279 .flags = M_BURST | M_BURST_EARLY, 280 .capacity = M_ZONE_ROUNDS, 281 .rounds = 0, 282 .burst_factor = M_BURST_FACTOR, 283 .low_factor = M_LOW_ROUNDS 284 }; 285 286 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 287 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 288 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 289 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 290 291 /* 292 * Each thread will have a pair of magazines per size-class (NZONES) 293 * The loaded magazine will support immediate allocations, the previous 294 * magazine will either be full or empty and can be swapped at need 295 */ 296 typedef struct magazine_pair { 297 struct magazine *loaded; 298 struct magazine *prev; 299 } magazine_pair; 300 301 /* A depot is a collection of magazines for a single zone. */ 302 typedef struct magazine_depot { 303 struct magazinelist full; 304 struct magazinelist empty; 305 spinlock_t lock; 306 } magazine_depot; 307 308 typedef struct thr_mags { 309 magazine_pair mags[NZONES]; 310 struct magazine *newmag; 311 int init; 312 } thr_mags; 313 314 /* 315 * With this attribute set, do not require a function call for accessing 316 * this variable when the code is compiled -fPIC. 317 * 318 * Must be empty for libc_rtld (similar to __thread). 319 */ 320 #ifdef __LIBC_RTLD 321 #define TLS_ATTRIBUTE 322 #else 323 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) 324 #endif 325 326 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 327 static pthread_key_t thread_mags_key; 328 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 329 static magazine_depot depots[NZONES]; 330 331 /* 332 * Fixed globals (not per-cpu) 333 */ 334 static const int ZoneSize = ZALLOC_ZONE_SIZE; 335 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 336 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 337 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 338 339 static int opt_madvise = 0; 340 static int opt_utrace = 0; 341 static int g_malloc_flags = 0; 342 static struct slglobaldata SLGlobalData; 343 static bigalloc_t bigalloc_array[BIGHSIZE]; 344 static spinlock_t bigspin_array[BIGXSIZE]; 345 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 346 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 347 static volatile int bigcache_index; /* SMP races ok */ 348 static int malloc_panic; 349 static size_t excess_alloc; /* excess big allocs */ 350 351 static void *_slaballoc(size_t size, int flags); 352 static void *_slabrealloc(void *ptr, size_t size); 353 static void _slabfree(void *ptr, int, bigalloc_t *); 354 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 355 static void _vmem_free(void *ptr, size_t bytes); 356 static void *magazine_alloc(struct magazine *, int *); 357 static int magazine_free(struct magazine *, void *); 358 static void *mtmagazine_alloc(int zi); 359 static int mtmagazine_free(int zi, void *); 360 static void mtmagazine_init(void); 361 static void mtmagazine_destructor(void *); 362 static slzone_t zone_alloc(int flags); 363 static void zone_free(void *z); 364 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 365 static void malloc_init(void) __constructor(101); 366 367 struct nmalloc_utrace { 368 void *p; 369 size_t s; 370 void *r; 371 }; 372 373 #define UTRACE(a, b, c) \ 374 if (opt_utrace) { \ 375 struct nmalloc_utrace ut = { \ 376 .p = (a), \ 377 .s = (b), \ 378 .r = (c) \ 379 }; \ 380 utrace(&ut, sizeof(ut)); \ 381 } 382 383 static void 384 malloc_init(void) 385 { 386 const char *p = NULL; 387 388 if (issetugid() == 0) 389 p = getenv("MALLOC_OPTIONS"); 390 391 for (; p != NULL && *p != '\0'; p++) { 392 switch(*p) { 393 case 'u': opt_utrace = 0; break; 394 case 'U': opt_utrace = 1; break; 395 case 'h': opt_madvise = 0; break; 396 case 'H': opt_madvise = 1; break; 397 case 'z': g_malloc_flags = 0; break; 398 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 399 default: 400 break; 401 } 402 } 403 404 UTRACE((void *) -1, 0, NULL); 405 } 406 407 /* 408 * We have to install a handler for nmalloc thread teardowns when 409 * the thread is created. We cannot delay this because destructors in 410 * sophisticated userland programs can call malloc() for the first time 411 * during their thread exit. 412 * 413 * This routine is called directly from pthreads. 414 */ 415 void 416 _nmalloc_thr_init(void) 417 { 418 static int init_once; 419 thr_mags *tp; 420 421 /* 422 * Disallow mtmagazine operations until the mtmagazine is 423 * initialized. 424 */ 425 tp = &thread_mags; 426 tp->init = -1; 427 428 if (init_once == 0) { 429 init_once = 1; 430 pthread_once(&thread_mags_once, mtmagazine_init); 431 } 432 pthread_setspecific(thread_mags_key, tp); 433 tp->init = 1; 434 } 435 436 void 437 _nmalloc_thr_prepfork(void) 438 { 439 if (__isthreaded) { 440 _SPINLOCK(&zone_mag_lock); 441 _SPINLOCK(&depot_spinlock); 442 } 443 } 444 445 void 446 _nmalloc_thr_parentfork(void) 447 { 448 if (__isthreaded) { 449 _SPINUNLOCK(&depot_spinlock); 450 _SPINUNLOCK(&zone_mag_lock); 451 } 452 } 453 454 void 455 _nmalloc_thr_childfork(void) 456 { 457 if (__isthreaded) { 458 _SPINUNLOCK(&depot_spinlock); 459 _SPINUNLOCK(&zone_mag_lock); 460 } 461 } 462 463 /* 464 * Thread locks. 465 */ 466 static __inline void 467 slgd_lock(slglobaldata_t slgd) 468 { 469 if (__isthreaded) 470 _SPINLOCK(&slgd->Spinlock); 471 } 472 473 static __inline void 474 slgd_unlock(slglobaldata_t slgd) 475 { 476 if (__isthreaded) 477 _SPINUNLOCK(&slgd->Spinlock); 478 } 479 480 static __inline void 481 depot_lock(magazine_depot *dp) 482 { 483 if (__isthreaded) 484 _SPINLOCK(&depot_spinlock); 485 #if 0 486 if (__isthreaded) 487 _SPINLOCK(&dp->lock); 488 #endif 489 } 490 491 static __inline void 492 depot_unlock(magazine_depot *dp) 493 { 494 if (__isthreaded) 495 _SPINUNLOCK(&depot_spinlock); 496 #if 0 497 if (__isthreaded) 498 _SPINUNLOCK(&dp->lock); 499 #endif 500 } 501 502 static __inline void 503 zone_magazine_lock(void) 504 { 505 if (__isthreaded) 506 _SPINLOCK(&zone_mag_lock); 507 } 508 509 static __inline void 510 zone_magazine_unlock(void) 511 { 512 if (__isthreaded) 513 _SPINUNLOCK(&zone_mag_lock); 514 } 515 516 static __inline void 517 swap_mags(magazine_pair *mp) 518 { 519 struct magazine *tmp; 520 tmp = mp->loaded; 521 mp->loaded = mp->prev; 522 mp->prev = tmp; 523 } 524 525 /* 526 * bigalloc hashing and locking support. 527 * 528 * Return an unmasked hash code for the passed pointer. 529 */ 530 static __inline int 531 _bigalloc_hash(void *ptr) 532 { 533 int hv; 534 535 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 536 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 537 538 return(hv); 539 } 540 541 /* 542 * Lock the hash chain and return a pointer to its base for the specified 543 * address. 544 */ 545 static __inline bigalloc_t * 546 bigalloc_lock(void *ptr) 547 { 548 int hv = _bigalloc_hash(ptr); 549 bigalloc_t *bigp; 550 551 bigp = &bigalloc_array[hv & BIGHMASK]; 552 if (__isthreaded) 553 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 554 return(bigp); 555 } 556 557 /* 558 * Lock the hash chain and return a pointer to its base for the specified 559 * address. 560 * 561 * BUT, if the hash chain is empty, just return NULL and do not bother 562 * to lock anything. 563 */ 564 static __inline bigalloc_t * 565 bigalloc_check_and_lock(void *ptr) 566 { 567 int hv = _bigalloc_hash(ptr); 568 bigalloc_t *bigp; 569 570 bigp = &bigalloc_array[hv & BIGHMASK]; 571 if (*bigp == NULL) 572 return(NULL); 573 if (__isthreaded) { 574 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 575 } 576 return(bigp); 577 } 578 579 static __inline void 580 bigalloc_unlock(void *ptr) 581 { 582 int hv; 583 584 if (__isthreaded) { 585 hv = _bigalloc_hash(ptr); 586 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 587 } 588 } 589 590 /* 591 * Find a bigcache entry that might work for the allocation. SMP races are 592 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 593 * is wrong or if a NULL or too-small big is returned). 594 * 595 * Generally speaking it is ok to find a large entry even if the bytes 596 * requested are relatively small (but still oversized), because we really 597 * don't know *what* the application is going to do with the buffer. 598 */ 599 static __inline 600 bigalloc_t 601 bigcache_find_alloc(size_t bytes) 602 { 603 bigalloc_t big = NULL; 604 size_t test; 605 int i; 606 607 for (i = 0; i < BIGCACHE; ++i) { 608 test = bigcache_size_array[i]; 609 if (bytes <= test) { 610 bigcache_size_array[i] = 0; 611 big = atomic_swap_ptr(&bigcache_array[i], NULL); 612 break; 613 } 614 } 615 return big; 616 } 617 618 /* 619 * Free a bigcache entry, possibly returning one that the caller really must 620 * free. This is used to cache recent oversized memory blocks. Only 621 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 622 * to collect the biggest ones we can that are under the limit. 623 */ 624 static __inline 625 bigalloc_t 626 bigcache_find_free(bigalloc_t big) 627 { 628 int i; 629 int j; 630 int b; 631 632 b = ++bigcache_index; 633 for (i = 0; i < BIGCACHE; ++i) { 634 j = (b + i) & BIGCACHE_MASK; 635 if (bigcache_size_array[j] < big->bytes) { 636 bigcache_size_array[j] = big->bytes; 637 big = atomic_swap_ptr(&bigcache_array[j], big); 638 break; 639 } 640 } 641 return big; 642 } 643 644 static __inline 645 void 646 handle_excess_big(void) 647 { 648 int i; 649 bigalloc_t big; 650 bigalloc_t *bigp; 651 652 if (excess_alloc <= BIGCACHE_EXCESS) 653 return; 654 655 for (i = 0; i < BIGHSIZE; ++i) { 656 bigp = &bigalloc_array[i]; 657 if (*bigp == NULL) 658 continue; 659 if (__isthreaded) 660 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 661 for (big = *bigp; big; big = big->next) { 662 if (big->active < big->bytes) { 663 MASSERT((big->active & PAGE_MASK) == 0); 664 MASSERT((big->bytes & PAGE_MASK) == 0); 665 munmap((char *)big->base + big->active, 666 big->bytes - big->active); 667 atomic_add_long(&excess_alloc, 668 big->active - big->bytes); 669 big->bytes = big->active; 670 } 671 } 672 if (__isthreaded) 673 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 674 } 675 } 676 677 /* 678 * Calculate the zone index for the allocation request size and set the 679 * allocation request size to that particular zone's chunk size. 680 */ 681 static __inline int 682 zoneindex(size_t *bytes, size_t *chunking) 683 { 684 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 685 686 /* 687 * This used to be 8-byte chunks and 16 zones for n < 128. 688 * However some instructions may require 16-byte alignment 689 * (aka SIMD) and programs might not request an aligned size 690 * (aka GCC-7), so change this as follows: 691 * 692 * 0-15 bytes 8-byte alignment in two zones (0-1) 693 * 16-127 bytes 16-byte alignment in four zones (3-10) 694 * zone index 2 and 11-15 are currently unused. 695 */ 696 if (n < 16) { 697 *bytes = n = (n + 7) & ~7; 698 *chunking = 8; 699 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 700 /* zones 0,1, zone 2 is unused */ 701 } 702 if (n < 128) { 703 *bytes = n = (n + 15) & ~15; 704 *chunking = 16; 705 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 706 /* zones 3-10, zones 11-15 unused */ 707 } 708 if (n < 256) { 709 *bytes = n = (n + 15) & ~15; 710 *chunking = 16; 711 return(n / 16 + 7); 712 } 713 if (n < 8192) { 714 if (n < 512) { 715 *bytes = n = (n + 31) & ~31; 716 *chunking = 32; 717 return(n / 32 + 15); 718 } 719 if (n < 1024) { 720 *bytes = n = (n + 63) & ~63; 721 *chunking = 64; 722 return(n / 64 + 23); 723 } 724 if (n < 2048) { 725 *bytes = n = (n + 127) & ~127; 726 *chunking = 128; 727 return(n / 128 + 31); 728 } 729 if (n < 4096) { 730 *bytes = n = (n + 255) & ~255; 731 *chunking = 256; 732 return(n / 256 + 39); 733 } 734 *bytes = n = (n + 511) & ~511; 735 *chunking = 512; 736 return(n / 512 + 47); 737 } 738 #if ZALLOC_ZONE_LIMIT > 8192 739 if (n < 16384) { 740 *bytes = n = (n + 1023) & ~1023; 741 *chunking = 1024; 742 return(n / 1024 + 55); 743 } 744 #endif 745 #if ZALLOC_ZONE_LIMIT > 16384 746 if (n < 32768) { 747 *bytes = n = (n + 2047) & ~2047; 748 *chunking = 2048; 749 return(n / 2048 + 63); 750 } 751 #endif 752 _mpanic("Unexpected byte count %zu", n); 753 return(0); 754 } 755 756 /* 757 * malloc() - call internal slab allocator 758 */ 759 void * 760 __malloc(size_t size) 761 { 762 void *ptr; 763 764 ptr = _slaballoc(size, 0); 765 if (ptr == NULL) 766 errno = ENOMEM; 767 else 768 UTRACE(0, size, ptr); 769 return(ptr); 770 } 771 772 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 773 774 /* 775 * calloc() - call internal slab allocator 776 */ 777 void * 778 __calloc(size_t number, size_t size) 779 { 780 void *ptr; 781 782 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 783 number > 0 && SIZE_MAX / number < size) { 784 errno = ENOMEM; 785 return(NULL); 786 } 787 788 ptr = _slaballoc(number * size, SAFLAG_ZERO); 789 if (ptr == NULL) 790 errno = ENOMEM; 791 else 792 UTRACE(0, number * size, ptr); 793 return(ptr); 794 } 795 796 /* 797 * realloc() (SLAB ALLOCATOR) 798 * 799 * We do not attempt to optimize this routine beyond reusing the same 800 * pointer if the new size fits within the chunking of the old pointer's 801 * zone. 802 */ 803 void * 804 __realloc(void *ptr, size_t size) 805 { 806 void *ret; 807 ret = _slabrealloc(ptr, size); 808 if (ret == NULL) 809 errno = ENOMEM; 810 else 811 UTRACE(ptr, size, ret); 812 return(ret); 813 } 814 815 /* 816 * posix_memalign() 817 * 818 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 819 * is a power of 2 >= sizeof(void *). 820 * 821 * The slab allocator will allocate on power-of-2 boundaries up to 822 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 823 * zone matching the requirements, and _vmem_alloc() otherwise. 824 */ 825 int 826 __posix_memalign(void **memptr, size_t alignment, size_t size) 827 { 828 bigalloc_t *bigp; 829 bigalloc_t big; 830 size_t chunking; 831 int zi __unused; 832 833 /* 834 * OpenGroup spec issue 6 checks 835 */ 836 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 837 *memptr = NULL; 838 return(EINVAL); 839 } 840 if (alignment < sizeof(void *)) { 841 *memptr = NULL; 842 return(EINVAL); 843 } 844 845 /* 846 * Our zone mechanism guarantees same-sized alignment for any 847 * power-of-2 allocation. If size is a power-of-2 and reasonable 848 * we can just call _slaballoc() and be done. We round size up 849 * to the nearest alignment boundary to improve our odds of 850 * it becoming a power-of-2 if it wasn't before. 851 */ 852 if (size <= alignment) 853 size = alignment; 854 else 855 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 856 857 /* 858 * If we have overflowed above when rounding to the nearest alignment 859 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 860 * 861 * Power-of-2 allocations up to 8KB will be aligned to the allocation 862 * size and _slaballoc() can simply be used. Please see line 1082 863 * for this special case: 'Align the storage in the zone based on 864 * the chunking' has a special case for powers of 2. 865 */ 866 if (size == 0) 867 return(ENOMEM); 868 869 if (size <= PAGE_SIZE*2 && (size | (size - 1)) + 1 == (size << 1)) { 870 *memptr = _slaballoc(size, 0); 871 return(*memptr ? 0 : ENOMEM); 872 } 873 874 /* 875 * Otherwise locate a zone with a chunking that matches 876 * the requested alignment, within reason. Consider two cases: 877 * 878 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 879 * we find will be the best fit because the chunking will be 880 * greater or equal to the alignment. 881 * 882 * (2) A 513 allocation on a 256-byte alignment. In this case 883 * the first zoneindex we find will be for 576 byte allocations 884 * with a chunking of 64, which is not sufficient. To fix this 885 * we simply find the nearest power-of-2 >= size and use the 886 * same side-effect of _slaballoc() which guarantees 887 * same-alignment on a power-of-2 allocation. 888 */ 889 if (size < PAGE_SIZE) { 890 zi = zoneindex(&size, &chunking); 891 if (chunking >= alignment) { 892 *memptr = _slaballoc(size, 0); 893 return(*memptr ? 0 : ENOMEM); 894 } 895 if (size >= 1024) 896 alignment = 1024; 897 if (size >= 16384) 898 alignment = 16384; 899 while (alignment < size) 900 alignment <<= 1; 901 *memptr = _slaballoc(alignment, 0); 902 return(*memptr ? 0 : ENOMEM); 903 } 904 905 /* 906 * If the slab allocator cannot handle it use vmem_alloc(). 907 * 908 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 909 */ 910 if (alignment < PAGE_SIZE) 911 alignment = PAGE_SIZE; 912 if (size < alignment) 913 size = alignment; 914 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 915 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 916 big = bigcache_find_alloc(size); 917 if (big && big->bytes < size) { 918 _slabfree(big->base, FASTSLABREALLOC, &big); 919 big = NULL; 920 } 921 if (big) { 922 *memptr = big->base; 923 big->active = size; 924 if (big->active < big->bytes) { 925 atomic_add_long(&excess_alloc, 926 big->bytes - big->active); 927 } 928 bigp = bigalloc_lock(*memptr); 929 big->next = *bigp; 930 *bigp = big; 931 bigalloc_unlock(*memptr); 932 handle_excess_big(); 933 return(0); 934 } 935 } 936 *memptr = _vmem_alloc(size, alignment, 0); 937 if (*memptr == NULL) 938 return(ENOMEM); 939 940 big = _slaballoc(sizeof(struct bigalloc), 0); 941 if (big == NULL) { 942 _vmem_free(*memptr, size); 943 *memptr = NULL; 944 return(ENOMEM); 945 } 946 bigp = bigalloc_lock(*memptr); 947 big->base = *memptr; 948 big->active = size; 949 big->bytes = size; /* no excess */ 950 big->next = *bigp; 951 *bigp = big; 952 bigalloc_unlock(*memptr); 953 954 return(0); 955 } 956 957 /* 958 * free() (SLAB ALLOCATOR) - do the obvious 959 */ 960 void 961 __free(void *ptr) 962 { 963 UTRACE(ptr, 0, 0); 964 _slabfree(ptr, 0, NULL); 965 } 966 967 /* 968 * _slaballoc() (SLAB ALLOCATOR) 969 * 970 * Allocate memory via the slab allocator. If the request is too large, 971 * or if it page-aligned beyond a certain size, we fall back to the 972 * KMEM subsystem 973 */ 974 static void * 975 _slaballoc(size_t size, int flags) 976 { 977 slzone_t z; 978 slchunk_t chunk; 979 slglobaldata_t slgd; 980 size_t chunking; 981 int zi; 982 int off; 983 void *obj; 984 985 /* 986 * Handle the degenerate size == 0 case. Yes, this does happen. 987 * Return a special pointer. This is to maintain compatibility with 988 * the original malloc implementation. Certain devices, such as the 989 * adaptec driver, not only allocate 0 bytes, they check for NULL and 990 * also realloc() later on. Joy. 991 */ 992 if (size == 0) 993 size = 1; 994 995 /* Capture global flags */ 996 flags |= g_malloc_flags; 997 998 /* 999 * Handle large allocations directly. There should not be very many 1000 * of these so performance is not a big issue. 1001 * 1002 * The backend allocator is pretty nasty on a SMP system. Use the 1003 * slab allocator for one and two page-sized chunks even though we 1004 * lose some efficiency. 1005 * 1006 * NOTE: Please see posix_memalign around line 864, which assumes 1007 * that power-of-2 allocations of PAGE_SIZE and PAGE_SIZE*2 1008 * can use _slaballoc() and be aligned to the same. The 1009 * zone cache can be used for this case, bigalloc does not 1010 * have to be used. 1011 */ 1012 if (size >= ZoneLimit || 1013 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 1014 bigalloc_t big; 1015 bigalloc_t *bigp; 1016 1017 /* 1018 * Page-align and cache-color in case of virtually indexed 1019 * physically tagged L1 caches (aka SandyBridge). No sweat 1020 * otherwise, so just do it. 1021 * 1022 * (don't count as excess). 1023 */ 1024 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1025 1026 /* 1027 * If we have overflowed above when rounding to the page 1028 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1029 * so just return NULL, size at this point should be >= 0. 1030 */ 1031 if (size == 0) 1032 return (NULL); 1033 1034 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1035 size += PAGE_SIZE; 1036 1037 /* 1038 * Try to reuse a cached big block to avoid mmap'ing. If it 1039 * turns out not to fit our requirements we throw it away 1040 * and allocate normally. 1041 */ 1042 big = NULL; 1043 if (size <= BIGCACHE_LIMIT) { 1044 big = bigcache_find_alloc(size); 1045 if (big && big->bytes < size) { 1046 _slabfree(big->base, FASTSLABREALLOC, &big); 1047 big = NULL; 1048 } 1049 } 1050 if (big) { 1051 chunk = big->base; 1052 if (flags & SAFLAG_ZERO) 1053 bzero(chunk, size); 1054 } else { 1055 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1056 if (chunk == NULL) 1057 return(NULL); 1058 1059 big = _slaballoc(sizeof(struct bigalloc), 0); 1060 if (big == NULL) { 1061 _vmem_free(chunk, size); 1062 return(NULL); 1063 } 1064 big->base = chunk; 1065 big->bytes = size; 1066 } 1067 big->active = size; 1068 1069 bigp = bigalloc_lock(chunk); 1070 if (big->active < big->bytes) { 1071 atomic_add_long(&excess_alloc, 1072 big->bytes - big->active); 1073 } 1074 big->next = *bigp; 1075 *bigp = big; 1076 bigalloc_unlock(chunk); 1077 handle_excess_big(); 1078 1079 return(chunk); 1080 } 1081 1082 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1083 zi = zoneindex(&size, &chunking); 1084 MASSERT(zi < NZONES); 1085 1086 obj = mtmagazine_alloc(zi); 1087 if (obj != NULL) { 1088 if (flags & SAFLAG_ZERO) 1089 bzero(obj, size); 1090 return (obj); 1091 } 1092 1093 slgd = &SLGlobalData; 1094 slgd_lock(slgd); 1095 1096 /* 1097 * Attempt to allocate out of an existing zone. If all zones are 1098 * exhausted pull one off the free list or allocate a new one. 1099 */ 1100 if ((z = slgd->ZoneAry[zi]) == NULL) { 1101 z = zone_alloc(flags); 1102 if (z == NULL) 1103 goto fail; 1104 1105 /* 1106 * How big is the base structure? 1107 */ 1108 off = sizeof(struct slzone); 1109 1110 /* 1111 * Align the storage in the zone based on the chunking. 1112 * 1113 * Guarantee power-of-2 alignment for power-of-2-sized 1114 * chunks. Otherwise align based on the chunking size 1115 * (typically 8 or 16 bytes for small allocations). 1116 * 1117 * NOTE: Allocations >= ZoneLimit are governed by the 1118 * bigalloc code and typically only guarantee page-alignment. 1119 * 1120 * Set initial conditions for UIndex near the zone header 1121 * to reduce unecessary page faults, vs semi-randomization 1122 * to improve L1 cache saturation. 1123 * 1124 * NOTE: Please see posix_memalign around line 864-ish, which 1125 * assumes that power-of-2 allocations of PAGE_SIZE 1126 * and PAGE_SIZE*2 can use _slaballoc() and be aligned 1127 * to the same. The zone cache can be used for this 1128 * case, bigalloc does not have to be used. 1129 * 1130 * ALL power-of-2 requests that fall through to this 1131 * code use this rule (conditionals above limit this 1132 * to <= PAGE_SIZE*2. 1133 */ 1134 if ((size | (size - 1)) + 1 == (size << 1)) 1135 off = roundup2(off, size); 1136 else 1137 off = roundup2(off, chunking); 1138 z->z_Magic = ZALLOC_SLAB_MAGIC; 1139 z->z_ZoneIndex = zi; 1140 z->z_NMax = (ZoneSize - off) / size; 1141 z->z_NFree = z->z_NMax; 1142 z->z_BasePtr = (char *)z + off; 1143 z->z_UIndex = z->z_UEndIndex = 0; 1144 z->z_ChunkSize = size; 1145 z->z_FirstFreePg = ZonePageCount; 1146 z->z_Next = slgd->ZoneAry[zi]; 1147 slgd->ZoneAry[zi] = z; 1148 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1149 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1150 flags |= SAFLAG_PASSIVE; 1151 } 1152 1153 /* 1154 * Slide the base index for initial allocations out of the 1155 * next zone we create so we do not over-weight the lower 1156 * part of the cpu memory caches. 1157 */ 1158 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1159 & (ZALLOC_MAX_ZONE_SIZE - 1); 1160 } 1161 1162 /* 1163 * Ok, we have a zone from which at least one chunk is available. 1164 * 1165 * Remove us from the ZoneAry[] when we become empty 1166 */ 1167 MASSERT(z->z_NFree > 0); 1168 1169 if (--z->z_NFree == 0) { 1170 slgd->ZoneAry[zi] = z->z_Next; 1171 z->z_Next = NULL; 1172 } 1173 1174 /* 1175 * Locate a chunk in a free page. This attempts to localize 1176 * reallocations into earlier pages without us having to sort 1177 * the chunk list. A chunk may still overlap a page boundary. 1178 */ 1179 while (z->z_FirstFreePg < ZonePageCount) { 1180 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1181 MASSERT((uintptr_t)chunk & ZoneMask); 1182 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1183 goto done; 1184 } 1185 ++z->z_FirstFreePg; 1186 } 1187 1188 /* 1189 * No chunks are available but NFree said we had some memory, 1190 * so it must be available in the never-before-used-memory 1191 * area governed by UIndex. The consequences are very 1192 * serious if our zone got corrupted so we use an explicit 1193 * panic rather then a KASSERT. 1194 */ 1195 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1196 1197 if (++z->z_UIndex == z->z_NMax) 1198 z->z_UIndex = 0; 1199 if (z->z_UIndex == z->z_UEndIndex) { 1200 if (z->z_NFree != 0) 1201 _mpanic("slaballoc: corrupted zone"); 1202 } 1203 1204 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1205 flags &= ~SAFLAG_ZERO; 1206 flags |= SAFLAG_PASSIVE; 1207 } 1208 1209 done: 1210 slgd_unlock(slgd); 1211 if (flags & SAFLAG_ZERO) 1212 bzero(chunk, size); 1213 return(chunk); 1214 fail: 1215 slgd_unlock(slgd); 1216 return(NULL); 1217 } 1218 1219 /* 1220 * Reallocate memory within the chunk 1221 */ 1222 static void * 1223 _slabrealloc(void *ptr, size_t size) 1224 { 1225 bigalloc_t *bigp; 1226 void *nptr; 1227 slzone_t z; 1228 size_t chunking; 1229 1230 if (ptr == NULL) { 1231 return(_slaballoc(size, 0)); 1232 } 1233 1234 if (size == 0) 1235 size = 1; 1236 1237 /* 1238 * Handle oversized allocations. 1239 */ 1240 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1241 bigalloc_t big; 1242 size_t bigbytes; 1243 1244 while ((big = *bigp) != NULL) { 1245 if (big->base == ptr) { 1246 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1247 bigbytes = big->bytes; 1248 1249 /* 1250 * If it already fits determine if it makes 1251 * sense to shrink/reallocate. Try to optimize 1252 * programs which stupidly make incremental 1253 * reallocations larger or smaller by scaling 1254 * the allocation. Also deal with potential 1255 * coloring. 1256 */ 1257 if (size >= (bigbytes >> 1) && 1258 size <= bigbytes) { 1259 if (big->active != size) { 1260 atomic_add_long(&excess_alloc, 1261 big->active - 1262 size); 1263 } 1264 big->active = size; 1265 bigalloc_unlock(ptr); 1266 return(ptr); 1267 } 1268 1269 /* 1270 * For large reallocations, allocate more space 1271 * than we need to try to avoid excessive 1272 * reallocations later on. 1273 */ 1274 chunking = size + (size >> 3); 1275 chunking = (chunking + PAGE_MASK) & 1276 ~(size_t)PAGE_MASK; 1277 1278 /* 1279 * Try to allocate adjacently in case the 1280 * program is idiotically realloc()ing a 1281 * huge memory block just slightly bigger. 1282 * (llvm's llc tends to do this a lot). 1283 * 1284 * (MAP_TRYFIXED forces mmap to fail if there 1285 * is already something at the address). 1286 */ 1287 if (chunking > bigbytes) { 1288 char *addr; 1289 int errno_save = errno; 1290 1291 addr = mmap((char *)ptr + bigbytes, 1292 chunking - bigbytes, 1293 PROT_READ|PROT_WRITE, 1294 MAP_PRIVATE|MAP_ANON| 1295 MAP_TRYFIXED, 1296 -1, 0); 1297 errno = errno_save; 1298 if (addr == (char *)ptr + bigbytes) { 1299 atomic_add_long(&excess_alloc, 1300 big->active - 1301 big->bytes + 1302 chunking - 1303 size); 1304 big->bytes = chunking; 1305 big->active = size; 1306 bigalloc_unlock(ptr); 1307 1308 return(ptr); 1309 } 1310 MASSERT((void *)addr == MAP_FAILED); 1311 } 1312 1313 /* 1314 * Failed, unlink big and allocate fresh. 1315 * (note that we have to leave (big) intact 1316 * in case the slaballoc fails). 1317 */ 1318 *bigp = big->next; 1319 bigalloc_unlock(ptr); 1320 if ((nptr = _slaballoc(size, 0)) == NULL) { 1321 /* Relink block */ 1322 bigp = bigalloc_lock(ptr); 1323 big->next = *bigp; 1324 *bigp = big; 1325 bigalloc_unlock(ptr); 1326 return(NULL); 1327 } 1328 if (size > bigbytes) 1329 size = bigbytes; 1330 bcopy(ptr, nptr, size); 1331 atomic_add_long(&excess_alloc, big->active - 1332 big->bytes); 1333 _slabfree(ptr, FASTSLABREALLOC, &big); 1334 1335 return(nptr); 1336 } 1337 bigp = &big->next; 1338 } 1339 bigalloc_unlock(ptr); 1340 handle_excess_big(); 1341 } 1342 1343 /* 1344 * Get the original allocation's zone. If the new request winds 1345 * up using the same chunk size we do not have to do anything. 1346 * 1347 * NOTE: We don't have to lock the globaldata here, the fields we 1348 * access here will not change at least as long as we have control 1349 * over the allocation. 1350 */ 1351 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1352 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1353 1354 /* 1355 * Use zoneindex() to chunk-align the new size, as long as the 1356 * new size is not too large. 1357 */ 1358 if (size < ZoneLimit) { 1359 zoneindex(&size, &chunking); 1360 if (z->z_ChunkSize == size) { 1361 return(ptr); 1362 } 1363 } 1364 1365 /* 1366 * Allocate memory for the new request size and copy as appropriate. 1367 */ 1368 if ((nptr = _slaballoc(size, 0)) != NULL) { 1369 if (size > z->z_ChunkSize) 1370 size = z->z_ChunkSize; 1371 bcopy(ptr, nptr, size); 1372 _slabfree(ptr, 0, NULL); 1373 } 1374 1375 return(nptr); 1376 } 1377 1378 /* 1379 * free (SLAB ALLOCATOR) 1380 * 1381 * Free a memory block previously allocated by malloc. Note that we do not 1382 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1383 * checking memory limits in malloc. 1384 * 1385 * flags: 1386 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1387 * unlinked. 1388 * 1389 * MPSAFE 1390 */ 1391 static void 1392 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1393 { 1394 slzone_t z; 1395 slchunk_t chunk; 1396 bigalloc_t big; 1397 bigalloc_t *bigp; 1398 slglobaldata_t slgd; 1399 size_t size; 1400 int zi; 1401 int pgno; 1402 1403 /* Fast realloc path for big allocations */ 1404 if (flags & FASTSLABREALLOC) { 1405 big = *rbigp; 1406 goto fastslabrealloc; 1407 } 1408 1409 /* 1410 * Handle NULL frees and special 0-byte allocations 1411 */ 1412 if (ptr == NULL) 1413 return; 1414 1415 /* 1416 * Handle oversized allocations. 1417 */ 1418 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1419 while ((big = *bigp) != NULL) { 1420 if (big->base == ptr) { 1421 *bigp = big->next; 1422 atomic_add_long(&excess_alloc, big->active - 1423 big->bytes); 1424 bigalloc_unlock(ptr); 1425 1426 /* 1427 * Try to stash the block we are freeing, 1428 * potentially receiving another block in 1429 * return which must be freed. 1430 */ 1431 fastslabrealloc: 1432 if (big->bytes <= BIGCACHE_LIMIT) { 1433 big = bigcache_find_free(big); 1434 if (big == NULL) 1435 return; 1436 } 1437 ptr = big->base; /* reload */ 1438 size = big->bytes; 1439 _slabfree(big, 0, NULL); 1440 _vmem_free(ptr, size); 1441 return; 1442 } 1443 bigp = &big->next; 1444 } 1445 bigalloc_unlock(ptr); 1446 handle_excess_big(); 1447 } 1448 1449 /* 1450 * Zone case. Figure out the zone based on the fact that it is 1451 * ZoneSize aligned. 1452 */ 1453 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1454 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1455 1456 size = z->z_ChunkSize; 1457 zi = z->z_ZoneIndex; 1458 1459 if (g_malloc_flags & SAFLAG_ZERO) 1460 bzero(ptr, size); 1461 1462 if (mtmagazine_free(zi, ptr) == 0) 1463 return; 1464 1465 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1466 chunk = ptr; 1467 slgd = &SLGlobalData; 1468 slgd_lock(slgd); 1469 1470 /* 1471 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1472 * z_FirstFreePg. 1473 */ 1474 chunk->c_Next = z->z_PageAry[pgno]; 1475 z->z_PageAry[pgno] = chunk; 1476 if (z->z_FirstFreePg > pgno) 1477 z->z_FirstFreePg = pgno; 1478 1479 /* 1480 * Bump the number of free chunks. If it becomes non-zero the zone 1481 * must be added back onto the appropriate list. 1482 */ 1483 if (z->z_NFree++ == 0) { 1484 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1485 slgd->ZoneAry[z->z_ZoneIndex] = z; 1486 } 1487 1488 /* 1489 * If the zone becomes totally free then release it. 1490 */ 1491 if (z->z_NFree == z->z_NMax) { 1492 slzone_t *pz; 1493 1494 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1495 while (z != *pz) 1496 pz = &(*pz)->z_Next; 1497 *pz = z->z_Next; 1498 z->z_Magic = -1; 1499 z->z_Next = NULL; 1500 zone_free(z); 1501 /* slgd lock released */ 1502 return; 1503 } 1504 slgd_unlock(slgd); 1505 } 1506 1507 /* 1508 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1509 * if the magazine is empty. 1510 */ 1511 static __inline void * 1512 magazine_alloc(struct magazine *mp, int *burst) 1513 { 1514 void *obj; 1515 1516 if (mp == NULL) 1517 return(NULL); 1518 if (MAGAZINE_NOTEMPTY(mp)) { 1519 obj = mp->objects[--mp->rounds]; 1520 return(obj); 1521 } 1522 1523 /* 1524 * Return burst factor to caller along with NULL 1525 */ 1526 if ((mp->flags & M_BURST) && (burst != NULL)) { 1527 *burst = mp->burst_factor; 1528 } 1529 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1530 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1531 (burst != NULL)) { 1532 mp->burst_factor -= M_BURST_NSCALE; 1533 if (mp->burst_factor <= 1) { 1534 mp->burst_factor = 1; 1535 mp->flags &= ~(M_BURST); 1536 mp->flags &= ~(M_BURST_EARLY); 1537 } 1538 } 1539 return (NULL); 1540 } 1541 1542 static __inline int 1543 magazine_free(struct magazine *mp, void *p) 1544 { 1545 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1546 mp->objects[mp->rounds++] = p; 1547 return 0; 1548 } 1549 1550 return -1; 1551 } 1552 1553 static void * 1554 mtmagazine_alloc(int zi) 1555 { 1556 thr_mags *tp; 1557 struct magazine *mp, *emptymag; 1558 magazine_depot *d; 1559 void *obj; 1560 1561 /* 1562 * Do not try to access per-thread magazines while the mtmagazine 1563 * is being initialized or destroyed. 1564 */ 1565 tp = &thread_mags; 1566 if (tp->init < 0) 1567 return(NULL); 1568 1569 /* 1570 * Primary per-thread allocation loop 1571 */ 1572 for (;;) { 1573 /* 1574 * If the loaded magazine has rounds, allocate and return 1575 */ 1576 mp = tp->mags[zi].loaded; 1577 obj = magazine_alloc(mp, NULL); 1578 if (obj) 1579 break; 1580 1581 /* 1582 * If the prev magazine is full, swap with the loaded 1583 * magazine and retry. 1584 */ 1585 mp = tp->mags[zi].prev; 1586 if (mp && MAGAZINE_FULL(mp)) { 1587 MASSERT(mp->rounds != 0); 1588 swap_mags(&tp->mags[zi]); /* prev now empty */ 1589 continue; 1590 } 1591 1592 /* 1593 * Try to get a full magazine from the depot. Cycle 1594 * through depot(full)->loaded->prev->depot(empty). 1595 * Retry if a full magazine was available from the depot. 1596 * 1597 * Return NULL (caller will fall through) if no magazines 1598 * can be found anywhere. 1599 */ 1600 d = &depots[zi]; 1601 depot_lock(d); 1602 emptymag = tp->mags[zi].prev; 1603 if (emptymag) 1604 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1605 tp->mags[zi].prev = tp->mags[zi].loaded; 1606 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1607 tp->mags[zi].loaded = mp; 1608 if (mp) { 1609 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1610 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1611 depot_unlock(d); 1612 continue; 1613 } 1614 depot_unlock(d); 1615 break; 1616 } 1617 1618 return (obj); 1619 } 1620 1621 static int 1622 mtmagazine_free(int zi, void *ptr) 1623 { 1624 thr_mags *tp; 1625 struct magazine *mp, *loadedmag; 1626 magazine_depot *d; 1627 int rc = -1; 1628 1629 /* 1630 * Do not try to access per-thread magazines while the mtmagazine 1631 * is being initialized or destroyed. 1632 */ 1633 tp = &thread_mags; 1634 if (tp->init < 0) 1635 return(-1); 1636 1637 /* 1638 * Primary per-thread freeing loop 1639 */ 1640 for (;;) { 1641 /* 1642 * Make sure a new magazine is available in case we have 1643 * to use it. Staging the newmag allows us to avoid 1644 * some locking/reentrancy complexity. 1645 * 1646 * Temporarily disable the per-thread caches for this 1647 * allocation to avoid reentrancy and/or to avoid a 1648 * stack overflow if the [zi] happens to be the same that 1649 * would be used to allocate the new magazine. 1650 */ 1651 if (tp->newmag == NULL) { 1652 tp->init = -1; 1653 tp->newmag = _slaballoc(sizeof(struct magazine), 1654 SAFLAG_ZERO); 1655 tp->init = 1; 1656 if (tp->newmag == NULL) { 1657 rc = -1; 1658 break; 1659 } 1660 } 1661 1662 /* 1663 * If the loaded magazine has space, free directly to it 1664 */ 1665 rc = magazine_free(tp->mags[zi].loaded, ptr); 1666 if (rc == 0) 1667 break; 1668 1669 /* 1670 * If the prev magazine is empty, swap with the loaded 1671 * magazine and retry. 1672 */ 1673 mp = tp->mags[zi].prev; 1674 if (mp && MAGAZINE_EMPTY(mp)) { 1675 MASSERT(mp->rounds == 0); 1676 swap_mags(&tp->mags[zi]); /* prev now full */ 1677 continue; 1678 } 1679 1680 /* 1681 * Try to get an empty magazine from the depot. Cycle 1682 * through depot(empty)->loaded->prev->depot(full). 1683 * Retry if an empty magazine was available from the depot. 1684 */ 1685 d = &depots[zi]; 1686 depot_lock(d); 1687 1688 if ((loadedmag = tp->mags[zi].prev) != NULL) 1689 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1690 tp->mags[zi].prev = tp->mags[zi].loaded; 1691 mp = SLIST_FIRST(&d->empty); 1692 if (mp) { 1693 tp->mags[zi].loaded = mp; 1694 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1695 MASSERT(MAGAZINE_NOTFULL(mp)); 1696 } else { 1697 mp = tp->newmag; 1698 tp->newmag = NULL; 1699 mp->capacity = M_MAX_ROUNDS; 1700 mp->rounds = 0; 1701 mp->flags = 0; 1702 tp->mags[zi].loaded = mp; 1703 } 1704 depot_unlock(d); 1705 } 1706 1707 return rc; 1708 } 1709 1710 static void 1711 mtmagazine_init(void) 1712 { 1713 int error; 1714 1715 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1716 if (error) 1717 abort(); 1718 } 1719 1720 /* 1721 * This function is only used by the thread exit destructor 1722 */ 1723 static void 1724 mtmagazine_drain(struct magazine *mp) 1725 { 1726 void *obj; 1727 1728 while (MAGAZINE_NOTEMPTY(mp)) { 1729 obj = magazine_alloc(mp, NULL); 1730 _slabfree(obj, 0, NULL); 1731 } 1732 } 1733 1734 /* 1735 * mtmagazine_destructor() 1736 * 1737 * When a thread exits, we reclaim all its resources; all its magazines are 1738 * drained and the structures are freed. 1739 * 1740 * WARNING! The destructor can be called multiple times if the larger user 1741 * program has its own destructors which run after ours which 1742 * allocate or free memory. 1743 */ 1744 static void 1745 mtmagazine_destructor(void *thrp) 1746 { 1747 thr_mags *tp = thrp; 1748 struct magazine *mp; 1749 int i; 1750 1751 /* 1752 * Prevent further use of mtmagazines while we are destructing 1753 * them, as well as for any destructors which are run after us 1754 * prior to the thread actually being destroyed. 1755 */ 1756 tp->init = -1; 1757 1758 for (i = 0; i < NZONES; i++) { 1759 mp = tp->mags[i].loaded; 1760 tp->mags[i].loaded = NULL; 1761 if (mp) { 1762 if (MAGAZINE_NOTEMPTY(mp)) 1763 mtmagazine_drain(mp); 1764 _slabfree(mp, 0, NULL); 1765 } 1766 1767 mp = tp->mags[i].prev; 1768 tp->mags[i].prev = NULL; 1769 if (mp) { 1770 if (MAGAZINE_NOTEMPTY(mp)) 1771 mtmagazine_drain(mp); 1772 _slabfree(mp, 0, NULL); 1773 } 1774 } 1775 1776 if (tp->newmag) { 1777 mp = tp->newmag; 1778 tp->newmag = NULL; 1779 _slabfree(mp, 0, NULL); 1780 } 1781 } 1782 1783 /* 1784 * zone_alloc() 1785 * 1786 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1787 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1788 */ 1789 static slzone_t 1790 zone_alloc(int flags) 1791 { 1792 slglobaldata_t slgd = &SLGlobalData; 1793 int burst = 1; 1794 int i, j; 1795 slzone_t z; 1796 1797 zone_magazine_lock(); 1798 slgd_unlock(slgd); 1799 1800 z = magazine_alloc(&zone_magazine, &burst); 1801 if (z == NULL && burst == 1) { 1802 zone_magazine_unlock(); 1803 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1804 } else if (z == NULL) { 1805 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1806 if (z) { 1807 for (i = 1; i < burst; i++) { 1808 j = magazine_free(&zone_magazine, 1809 (char *) z + (ZoneSize * i)); 1810 MASSERT(j == 0); 1811 } 1812 } 1813 zone_magazine_unlock(); 1814 } else { 1815 z->z_Flags |= SLZF_UNOTZEROD; 1816 zone_magazine_unlock(); 1817 } 1818 slgd_lock(slgd); 1819 return z; 1820 } 1821 1822 /* 1823 * zone_free() 1824 * 1825 * Release a zone and unlock the slgd lock. 1826 */ 1827 static void 1828 zone_free(void *z) 1829 { 1830 slglobaldata_t slgd = &SLGlobalData; 1831 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1832 int i, j; 1833 1834 zone_magazine_lock(); 1835 slgd_unlock(slgd); 1836 1837 bzero(z, sizeof(struct slzone)); 1838 1839 if (opt_madvise) 1840 madvise(z, ZoneSize, MADV_FREE); 1841 1842 i = magazine_free(&zone_magazine, z); 1843 1844 /* 1845 * If we failed to free, collect excess magazines; release the zone 1846 * magazine lock, and then free to the system via _vmem_free. Re-enable 1847 * BURST mode for the magazine. 1848 */ 1849 if (i == -1) { 1850 j = zone_magazine.rounds - zone_magazine.low_factor; 1851 for (i = 0; i < j; i++) { 1852 excess[i] = magazine_alloc(&zone_magazine, NULL); 1853 MASSERT(excess[i] != NULL); 1854 } 1855 1856 zone_magazine_unlock(); 1857 1858 for (i = 0; i < j; i++) 1859 _vmem_free(excess[i], ZoneSize); 1860 1861 _vmem_free(z, ZoneSize); 1862 } else { 1863 zone_magazine_unlock(); 1864 } 1865 } 1866 1867 /* 1868 * _vmem_alloc() 1869 * 1870 * Directly map memory in PAGE_SIZE'd chunks with the specified 1871 * alignment. 1872 * 1873 * Alignment must be a multiple of PAGE_SIZE. 1874 * 1875 * Size must be >= alignment. 1876 */ 1877 static void * 1878 _vmem_alloc(size_t size, size_t align, int flags) 1879 { 1880 char *addr; 1881 char *save; 1882 size_t excess; 1883 1884 /* 1885 * Map anonymous private memory. 1886 */ 1887 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1888 MAP_PRIVATE|MAP_ANON, -1, 0); 1889 if (addr == MAP_FAILED) 1890 return(NULL); 1891 1892 /* 1893 * Check alignment. The misaligned offset is also the excess 1894 * amount. If misaligned unmap the excess so we have a chance of 1895 * mapping at the next alignment point and recursively try again. 1896 * 1897 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1898 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1899 * xxxxxxxxx final excess calculation 1900 * ^ returned address 1901 */ 1902 excess = (uintptr_t)addr & (align - 1); 1903 1904 if (excess) { 1905 excess = align - excess; 1906 save = addr; 1907 1908 munmap(save + excess, size - excess); 1909 addr = _vmem_alloc(size, align, flags); 1910 munmap(save, excess); 1911 } 1912 return((void *)addr); 1913 } 1914 1915 /* 1916 * _vmem_free() 1917 * 1918 * Free a chunk of memory allocated with _vmem_alloc() 1919 */ 1920 static void 1921 _vmem_free(void *ptr, size_t size) 1922 { 1923 munmap(ptr, size); 1924 } 1925 1926 /* 1927 * Panic on fatal conditions 1928 */ 1929 static void 1930 _mpanic(const char *ctl, ...) 1931 { 1932 va_list va; 1933 1934 if (malloc_panic == 0) { 1935 malloc_panic = 1; 1936 va_start(va, ctl); 1937 vfprintf(stderr, ctl, va); 1938 fprintf(stderr, "\n"); 1939 fflush(stderr); 1940 va_end(va); 1941 } 1942 abort(); 1943 } 1944 1945 __weak_reference(__malloc, malloc); 1946 __weak_reference(__calloc, calloc); 1947 __weak_reference(__posix_memalign, posix_memalign); 1948 __weak_reference(__realloc, realloc); 1949 __weak_reference(__free, free); 1950