1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010-2019 The DragonFly Project, 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@backplane.com> and by 9 * Venkatesh Srinivas <me@endeavour.zapto.org>. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 39 */ 40 /* 41 * This module implements a slab allocator drop-in replacement for the 42 * libc malloc(). 43 * 44 * A slab allocator reserves a ZONE for each chunk size, then lays the 45 * chunks out in an array within the zone. Allocation and deallocation 46 * is nearly instantaneous, and overhead losses are limited to a fixed 47 * worst-case amount. 48 * 49 * The slab allocator does not have to pre-initialize the list of 50 * free chunks for each zone, and the underlying VM will not be 51 * touched at all beyond the zone header until an actual allocation 52 * needs it. 53 * 54 * Slab management and locking is done on a per-zone basis. 55 * 56 * Alloc Size Chunking Number of zones 57 * 0-127 8 16 58 * 128-255 16 8 59 * 256-511 32 8 60 * 512-1023 64 8 61 * 1024-2047 128 8 62 * 2048-4095 256 8 63 * 4096-8191 512 8 64 * 8192-16383 1024 8 65 * 16384-32767 2048 8 66 * 67 * Allocations >= ZoneLimit go directly to mmap and a hash table 68 * is used to locate for free. One and Two-page allocations use the 69 * zone mechanic to avoid excessive mmap()/munmap() calls. 70 * 71 * API FEATURES AND SIDE EFFECTS 72 * 73 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 74 * Above that power-of-2 sized allocations are page-aligned. Non 75 * power-of-2 sized allocations are aligned the same as the chunk 76 * size for their zone. 77 * + malloc(0) returns a special non-NULL value 78 * + ability to allocate arbitrarily large chunks of memory 79 * + realloc will reuse the passed pointer if possible, within the 80 * limitations of the zone chunking. 81 * 82 * Multithreaded enhancements for small allocations introduced August 2010. 83 * These are in the spirit of 'libumem'. See: 84 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 85 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 86 * USENIX Technical Conference. USENIX Association. 87 * 88 * Oversized allocations employ the BIGCACHE mechanic whereby large 89 * allocations may be handed significantly larger buffers, allowing them 90 * to avoid mmap/munmap operations even through significant realloc()s. 91 * The excess space is only trimmed if too many large allocations have been 92 * given this treatment. 93 * 94 * TUNING 95 * 96 * The value of the environment variable MALLOC_OPTIONS is a character string 97 * containing various flags to tune nmalloc. 98 * 99 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 100 * This will generate utrace events for all malloc, 101 * realloc, and free calls. There are tools (mtrplay) to 102 * replay and allocation pattern or to graph heap structure 103 * (mtrgraph) which can interpret these logs. 104 * 'Z' / ['z'] Zero out / do not zero all allocations. 105 * Each new byte of memory allocated by malloc, realloc, or 106 * reallocf will be initialized to 0. This is intended for 107 * debugging and will affect performance negatively. 108 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 109 * allocation functions. 110 */ 111 112 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 113 114 #include "namespace.h" 115 #include <sys/param.h> 116 #include <sys/types.h> 117 #include <sys/mman.h> 118 #include <sys/queue.h> 119 #include <sys/ktrace.h> 120 #include <stdio.h> 121 #include <stdint.h> 122 #include <stdlib.h> 123 #include <stdarg.h> 124 #include <stddef.h> 125 #include <unistd.h> 126 #include <string.h> 127 #include <fcntl.h> 128 #include <errno.h> 129 #include <pthread.h> 130 #include <machine/atomic.h> 131 #include "un-namespace.h" 132 133 #include "libc_private.h" 134 #include "spinlock.h" 135 136 void __free(void *); 137 void *__malloc(size_t); 138 void *__calloc(size_t, size_t); 139 void *__realloc(void *, size_t); 140 void *__aligned_alloc(size_t, size_t); 141 size_t __malloc_usable_size(const void *ptr); 142 int __posix_memalign(void **, size_t, size_t); 143 144 /* 145 * Linked list of large allocations 146 */ 147 typedef struct bigalloc { 148 struct bigalloc *next; /* hash link */ 149 void *base; /* base pointer */ 150 u_long active; /* bytes active */ 151 u_long bytes; /* bytes allocated */ 152 } *bigalloc_t; 153 154 /* 155 * Note that any allocations which are exact multiples of PAGE_SIZE, or 156 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 157 */ 158 #define MAX_SLAB_PAGEALIGN (2 * PAGE_SIZE) /* max slab for PAGE_SIZE*n */ 159 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 160 #define ZALLOC_ZONE_SIZE (64 * 1024) /* zone size */ 161 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 162 163 #if ZALLOC_ZONE_LIMIT == 16384 164 #define NZONES 72 165 #elif ZALLOC_ZONE_LIMIT == 32768 166 #define NZONES 80 167 #else 168 #error "I couldn't figure out NZONES" 169 #endif 170 171 /* 172 * Chunk structure for free elements 173 */ 174 typedef struct slchunk { 175 struct slchunk *c_Next; 176 } *slchunk_t; 177 178 /* 179 * The IN-BAND zone header is placed at the beginning of each zone. 180 */ 181 struct slglobaldata; 182 183 typedef struct slzone { 184 int32_t z_Magic; /* magic number for sanity check */ 185 int z_NFree; /* total free chunks / ualloc space */ 186 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 187 int z_NMax; /* maximum free chunks */ 188 char *z_BasePtr; /* pointer to start of chunk array */ 189 int z_UIndex; /* current initial allocation index */ 190 int z_UEndIndex; /* last (first) allocation index */ 191 int z_ChunkSize; /* chunk size for validation */ 192 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 193 int z_ZoneIndex; 194 int z_Flags; 195 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 196 } *slzone_t; 197 198 typedef struct slglobaldata { 199 spinlock_t Spinlock; 200 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 201 } *slglobaldata_t; 202 203 #define SLZF_UNOTZEROD 0x0001 204 205 #define FASTSLABREALLOC 0x02 206 207 /* 208 * Misc constants. Note that allocations that are exact multiples of 209 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 210 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 211 */ 212 #define MIN_CHUNK_SIZE 8 /* in bytes */ 213 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 214 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 215 216 /* 217 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 218 * not be larger then 64. 219 */ 220 #define BIGHSHIFT 10 /* bigalloc hash table */ 221 #define BIGHSIZE (1 << BIGHSHIFT) 222 #define BIGHMASK (BIGHSIZE - 1) 223 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 224 #define BIGXMASK (BIGXSIZE - 1) 225 226 /* 227 * BIGCACHE caches oversized allocations. Note that a linear search is 228 * performed, so do not make the cache too large. 229 * 230 * BIGCACHE will garbage-collect excess space when the excess exceeds the 231 * specified value. A relatively large number should be used here because 232 * garbage collection is expensive. 233 */ 234 #define BIGCACHE 16 235 #define BIGCACHE_MASK (BIGCACHE - 1) 236 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 237 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 238 239 #define CACHE_CHUNKS 32 240 241 #define SAFLAG_ZERO 0x0001 242 #define SAFLAG_PASSIVE 0x0002 243 #define SAFLAG_MAGS 0x0004 244 245 /* 246 * Thread control 247 */ 248 249 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 250 251 /* 252 * The assertion macros try to pretty-print assertion failures 253 * which can be caused by corruption. If a lock is held, we 254 * provide a macro that attempts to release it before asserting 255 * in order to prevent (e.g.) a reentrant SIGABRT calling malloc 256 * and deadlocking, resulting in the program freezing up. 257 */ 258 #define MASSERT(exp) \ 259 do { if (__predict_false(!(exp))) \ 260 _mpanic("assertion: %s in %s", \ 261 #exp, __func__); \ 262 } while (0) 263 264 #define MASSERT_WTHUNLK(exp, unlk) \ 265 do { if (__predict_false(!(exp))) { \ 266 unlk; \ 267 _mpanic("assertion: %s in %s", \ 268 #exp, __func__); \ 269 } \ 270 } while (0) 271 272 /* 273 * Magazines, arrange so the structure is roughly 4KB. 274 */ 275 #define M_MAX_ROUNDS (512 - 3) 276 #define M_MIN_ROUNDS 16 277 #define M_ZONE_INIT_ROUNDS 64 278 #define M_ZONE_HYSTERESIS 32 279 280 struct magazine { 281 SLIST_ENTRY(magazine) nextmagazine; 282 283 int flags; 284 int capacity; /* Max rounds in this magazine */ 285 int rounds; /* Current number of free rounds */ 286 int unused01; 287 void *objects[M_MAX_ROUNDS]; 288 }; 289 290 SLIST_HEAD(magazinelist, magazine); 291 292 static spinlock_t zone_mag_lock; 293 static spinlock_t depot_spinlock; 294 static struct magazine zone_magazine = { 295 .flags = 0, 296 .capacity = M_ZONE_INIT_ROUNDS, 297 .rounds = 0, 298 }; 299 300 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 301 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 302 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 303 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 304 305 /* 306 * Each thread will have a pair of magazines per size-class (NZONES) 307 * The loaded magazine will support immediate allocations, the previous 308 * magazine will either be full or empty and can be swapped at need 309 */ 310 typedef struct magazine_pair { 311 struct magazine *loaded; 312 struct magazine *prev; 313 } magazine_pair; 314 315 /* A depot is a collection of magazines for a single zone. */ 316 typedef struct magazine_depot { 317 struct magazinelist full; 318 struct magazinelist empty; 319 spinlock_t lock; 320 } magazine_depot; 321 322 typedef struct thr_mags { 323 magazine_pair mags[NZONES]; 324 struct magazine *newmag; 325 int init; 326 } thr_mags; 327 328 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 329 static pthread_key_t thread_mags_key; 330 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 331 static magazine_depot depots[NZONES]; 332 333 /* 334 * Fixed globals (not per-cpu) 335 */ 336 static const int ZoneSize = ZALLOC_ZONE_SIZE; 337 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 338 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 339 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 340 341 static int opt_madvise = 0; 342 static int opt_utrace = 0; 343 static int g_malloc_flags = 0; 344 static struct slglobaldata SLGlobalData; 345 static bigalloc_t bigalloc_array[BIGHSIZE]; 346 static spinlock_t bigspin_array[BIGXSIZE]; 347 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 348 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 349 static volatile int bigcache_index; /* SMP races ok */ 350 static int malloc_panic; 351 static size_t excess_alloc; /* excess big allocs */ 352 353 static void *_slaballoc(size_t size, int flags); 354 static void *_slabrealloc(void *ptr, size_t size); 355 static size_t _slabusablesize(const void *ptr); 356 static void _slabfree(void *ptr, int, bigalloc_t *); 357 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 358 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 359 static void _vmem_free(void *ptr, size_t bytes); 360 static void *magazine_alloc(struct magazine *); 361 static int magazine_free(struct magazine *, void *); 362 static void *mtmagazine_alloc(int zi, int flags); 363 static int mtmagazine_free(int zi, void *); 364 static void mtmagazine_init(void); 365 static void mtmagazine_destructor(void *); 366 static slzone_t zone_alloc(int flags); 367 static void zone_free(void *z); 368 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 369 static void malloc_init(void) __constructor(101); 370 371 struct nmalloc_utrace { 372 void *p; 373 size_t s; 374 void *r; 375 }; 376 377 #define UTRACE(a, b, c) \ 378 if (opt_utrace) { \ 379 struct nmalloc_utrace ut = { \ 380 .p = (a), \ 381 .s = (b), \ 382 .r = (c) \ 383 }; \ 384 utrace(&ut, sizeof(ut)); \ 385 } 386 387 static void 388 malloc_init(void) 389 { 390 const char *p = NULL; 391 392 if (issetugid() == 0) 393 p = getenv("MALLOC_OPTIONS"); 394 395 for (; p != NULL && *p != '\0'; p++) { 396 switch(*p) { 397 case 'u': opt_utrace = 0; break; 398 case 'U': opt_utrace = 1; break; 399 case 'h': opt_madvise = 0; break; 400 case 'H': opt_madvise = 1; break; 401 case 'z': g_malloc_flags = 0; break; 402 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 403 default: 404 break; 405 } 406 } 407 408 UTRACE((void *) -1, 0, NULL); 409 } 410 411 /* 412 * We have to install a handler for nmalloc thread teardowns when 413 * the thread is created. We cannot delay this because destructors in 414 * sophisticated userland programs can call malloc() for the first time 415 * during their thread exit. 416 * 417 * This routine is called directly from pthreads. 418 */ 419 void 420 _nmalloc_thr_init(void) 421 { 422 thr_mags *tp; 423 424 /* 425 * Disallow mtmagazine operations until the mtmagazine is 426 * initialized. 427 */ 428 tp = &thread_mags; 429 tp->init = -1; 430 431 _pthread_once(&thread_mags_once, mtmagazine_init); 432 _pthread_setspecific(thread_mags_key, tp); 433 tp->init = 1; 434 } 435 436 void 437 _nmalloc_thr_prepfork(void) 438 { 439 if (__isthreaded) { 440 _SPINLOCK(&zone_mag_lock); 441 _SPINLOCK(&depot_spinlock); 442 } 443 } 444 445 void 446 _nmalloc_thr_parentfork(void) 447 { 448 if (__isthreaded) { 449 _SPINUNLOCK(&depot_spinlock); 450 _SPINUNLOCK(&zone_mag_lock); 451 } 452 } 453 454 void 455 _nmalloc_thr_childfork(void) 456 { 457 if (__isthreaded) { 458 _SPINUNLOCK(&depot_spinlock); 459 _SPINUNLOCK(&zone_mag_lock); 460 } 461 } 462 463 /* 464 * Handle signal reentrancy safely whether we are threaded or not. 465 * This improves the stability for mono and will probably improve 466 * stability for other high-level languages which are becoming increasingly 467 * sophisticated. 468 * 469 * The sigblockall()/sigunblockall() implementation uses a counter on 470 * a per-thread shared user/kernel page, avoids system calls, and is thus 471 * very fast. 472 */ 473 static __inline void 474 nmalloc_sigblockall(void) 475 { 476 sigblockall(); 477 } 478 479 static __inline void 480 nmalloc_sigunblockall(void) 481 { 482 sigunblockall(); 483 } 484 485 /* 486 * Thread locks. 487 */ 488 static __inline void 489 slgd_lock(slglobaldata_t slgd) 490 { 491 if (__isthreaded) 492 _SPINLOCK(&slgd->Spinlock); 493 } 494 495 static __inline void 496 slgd_unlock(slglobaldata_t slgd) 497 { 498 if (__isthreaded) 499 _SPINUNLOCK(&slgd->Spinlock); 500 } 501 502 static __inline void 503 depot_lock(magazine_depot *dp __unused) 504 { 505 if (__isthreaded) 506 _SPINLOCK(&depot_spinlock); 507 } 508 509 static __inline void 510 depot_unlock(magazine_depot *dp __unused) 511 { 512 if (__isthreaded) 513 _SPINUNLOCK(&depot_spinlock); 514 } 515 516 static __inline void 517 zone_magazine_lock(void) 518 { 519 if (__isthreaded) 520 _SPINLOCK(&zone_mag_lock); 521 } 522 523 static __inline void 524 zone_magazine_unlock(void) 525 { 526 if (__isthreaded) 527 _SPINUNLOCK(&zone_mag_lock); 528 } 529 530 static __inline void 531 swap_mags(magazine_pair *mp) 532 { 533 struct magazine *tmp; 534 tmp = mp->loaded; 535 mp->loaded = mp->prev; 536 mp->prev = tmp; 537 } 538 539 /* 540 * bigalloc hashing and locking support. 541 * 542 * Return an unmasked hash code for the passed pointer. 543 */ 544 static __inline int 545 _bigalloc_hash(const void *ptr) 546 { 547 int hv; 548 549 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 550 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 551 552 return(hv); 553 } 554 555 /* 556 * Lock the hash chain and return a pointer to its base for the specified 557 * address. 558 */ 559 static __inline bigalloc_t * 560 bigalloc_lock(void *ptr) 561 { 562 int hv = _bigalloc_hash(ptr); 563 bigalloc_t *bigp; 564 565 bigp = &bigalloc_array[hv & BIGHMASK]; 566 if (__isthreaded) 567 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 568 return(bigp); 569 } 570 571 /* 572 * Lock the hash chain and return a pointer to its base for the specified 573 * address. 574 * 575 * BUT, if the hash chain is empty, just return NULL and do not bother 576 * to lock anything. 577 */ 578 static __inline bigalloc_t * 579 bigalloc_check_and_lock(const void *ptr) 580 { 581 int hv = _bigalloc_hash(ptr); 582 bigalloc_t *bigp; 583 584 bigp = &bigalloc_array[hv & BIGHMASK]; 585 if (*bigp == NULL) 586 return(NULL); 587 if (__isthreaded) { 588 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 589 } 590 return(bigp); 591 } 592 593 static __inline void 594 bigalloc_unlock(const void *ptr) 595 { 596 int hv; 597 598 if (__isthreaded) { 599 hv = _bigalloc_hash(ptr); 600 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 601 } 602 } 603 604 /* 605 * Find a bigcache entry that might work for the allocation. SMP races are 606 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 607 * is wrong or if a NULL or too-small big is returned). 608 * 609 * Generally speaking it is ok to find a large entry even if the bytes 610 * requested are relatively small (but still oversized), because we really 611 * don't know *what* the application is going to do with the buffer. 612 */ 613 static __inline 614 bigalloc_t 615 bigcache_find_alloc(size_t bytes) 616 { 617 bigalloc_t big = NULL; 618 size_t test; 619 int i; 620 621 for (i = 0; i < BIGCACHE; ++i) { 622 test = bigcache_size_array[i]; 623 if (bytes <= test) { 624 bigcache_size_array[i] = 0; 625 big = atomic_swap_ptr(&bigcache_array[i], NULL); 626 break; 627 } 628 } 629 return big; 630 } 631 632 /* 633 * Free a bigcache entry, possibly returning one that the caller really must 634 * free. This is used to cache recent oversized memory blocks. Only 635 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 636 * to collect the biggest ones we can that are under the limit. 637 */ 638 static __inline 639 bigalloc_t 640 bigcache_find_free(bigalloc_t big) 641 { 642 int i; 643 int j; 644 int b; 645 646 b = ++bigcache_index; 647 for (i = 0; i < BIGCACHE; ++i) { 648 j = (b + i) & BIGCACHE_MASK; 649 if (bigcache_size_array[j] < big->bytes) { 650 bigcache_size_array[j] = big->bytes; 651 big = atomic_swap_ptr(&bigcache_array[j], big); 652 break; 653 } 654 } 655 return big; 656 } 657 658 static __inline 659 void 660 handle_excess_big(void) 661 { 662 int i; 663 bigalloc_t big; 664 bigalloc_t *bigp; 665 666 if (excess_alloc <= BIGCACHE_EXCESS) 667 return; 668 669 for (i = 0; i < BIGHSIZE; ++i) { 670 bigp = &bigalloc_array[i]; 671 if (*bigp == NULL) 672 continue; 673 if (__isthreaded) 674 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 675 for (big = *bigp; big; big = big->next) { 676 if (big->active < big->bytes) { 677 MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0, 678 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 679 MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0, 680 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 681 munmap((char *)big->base + big->active, 682 big->bytes - big->active); 683 atomic_add_long(&excess_alloc, 684 big->active - big->bytes); 685 big->bytes = big->active; 686 } 687 } 688 if (__isthreaded) 689 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 690 } 691 } 692 693 /* 694 * Calculate the zone index for the allocation request size and set the 695 * allocation request size to that particular zone's chunk size. 696 */ 697 static __inline int 698 zoneindex(size_t *bytes, size_t *chunking) 699 { 700 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 701 702 /* 703 * This used to be 8-byte chunks and 16 zones for n < 128. 704 * However some instructions may require 16-byte alignment 705 * (aka SIMD) and programs might not request an aligned size 706 * (aka GCC-7), so change this as follows: 707 * 708 * 0-15 bytes 8-byte alignment in two zones (0-1) 709 * 16-127 bytes 16-byte alignment in four zones (3-10) 710 * zone index 2 and 11-15 are currently unused. 711 */ 712 if (n < 16) { 713 *bytes = n = (n + 7) & ~7; 714 *chunking = 8; 715 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 716 /* zones 0,1, zone 2 is unused */ 717 } 718 if (n < 128) { 719 *bytes = n = (n + 15) & ~15; 720 *chunking = 16; 721 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 722 /* zones 3-10, zones 11-15 unused */ 723 } 724 if (n < 256) { 725 *bytes = n = (n + 15) & ~15; 726 *chunking = 16; 727 return(n / 16 + 7); 728 } 729 if (n < 8192) { 730 if (n < 512) { 731 *bytes = n = (n + 31) & ~31; 732 *chunking = 32; 733 return(n / 32 + 15); 734 } 735 if (n < 1024) { 736 *bytes = n = (n + 63) & ~63; 737 *chunking = 64; 738 return(n / 64 + 23); 739 } 740 if (n < 2048) { 741 *bytes = n = (n + 127) & ~127; 742 *chunking = 128; 743 return(n / 128 + 31); 744 } 745 if (n < 4096) { 746 *bytes = n = (n + 255) & ~255; 747 *chunking = 256; 748 return(n / 256 + 39); 749 } 750 *bytes = n = (n + 511) & ~511; 751 *chunking = 512; 752 return(n / 512 + 47); 753 } 754 #if ZALLOC_ZONE_LIMIT > 8192 755 if (n < 16384) { 756 *bytes = n = (n + 1023) & ~1023; 757 *chunking = 1024; 758 return(n / 1024 + 55); 759 } 760 #endif 761 #if ZALLOC_ZONE_LIMIT > 16384 762 if (n < 32768) { 763 *bytes = n = (n + 2047) & ~2047; 764 *chunking = 2048; 765 return(n / 2048 + 63); 766 } 767 #endif 768 _mpanic("Unexpected byte count %zu", n); 769 return(0); 770 } 771 772 /* 773 * We want large magazines for small allocations 774 */ 775 static __inline int 776 zonecapacity(int zi) 777 { 778 int cap; 779 780 cap = (NZONES - zi) * (M_MAX_ROUNDS - M_MIN_ROUNDS) / NZONES + 781 M_MIN_ROUNDS; 782 783 return cap; 784 } 785 786 /* 787 * malloc() - call internal slab allocator 788 */ 789 void * 790 __malloc(size_t size) 791 { 792 void *ptr; 793 794 nmalloc_sigblockall(); 795 ptr = _slaballoc(size, 0); 796 if (ptr == NULL) 797 errno = ENOMEM; 798 else 799 UTRACE(0, size, ptr); 800 nmalloc_sigunblockall(); 801 802 return(ptr); 803 } 804 805 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 806 807 /* 808 * calloc() - call internal slab allocator 809 */ 810 void * 811 __calloc(size_t number, size_t size) 812 { 813 void *ptr; 814 815 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 816 number > 0 && SIZE_MAX / number < size) { 817 errno = ENOMEM; 818 return(NULL); 819 } 820 821 nmalloc_sigblockall(); 822 ptr = _slaballoc(number * size, SAFLAG_ZERO); 823 if (ptr == NULL) 824 errno = ENOMEM; 825 else 826 UTRACE(0, number * size, ptr); 827 nmalloc_sigunblockall(); 828 829 return(ptr); 830 } 831 832 /* 833 * realloc() (SLAB ALLOCATOR) 834 * 835 * We do not attempt to optimize this routine beyond reusing the same 836 * pointer if the new size fits within the chunking of the old pointer's 837 * zone. 838 */ 839 void * 840 __realloc(void *ptr, size_t size) 841 { 842 void *ret; 843 844 nmalloc_sigblockall(); 845 ret = _slabrealloc(ptr, size); 846 if (ret == NULL) 847 errno = ENOMEM; 848 else 849 UTRACE(ptr, size, ret); 850 nmalloc_sigunblockall(); 851 852 return(ret); 853 } 854 855 /* 856 * malloc_usable_size() (SLAB ALLOCATOR) 857 */ 858 size_t 859 __malloc_usable_size(const void *ptr) 860 { 861 return _slabusablesize(ptr); 862 } 863 864 /* 865 * aligned_alloc() 866 * 867 * Allocate (size) bytes with a alignment of (alignment). 868 */ 869 void * 870 __aligned_alloc(size_t alignment, size_t size) 871 { 872 void *ptr; 873 int rc; 874 875 nmalloc_sigblockall(); 876 ptr = NULL; 877 rc = _slabmemalign(&ptr, alignment, size); 878 if (rc) 879 errno = rc; 880 nmalloc_sigunblockall(); 881 882 return (ptr); 883 } 884 885 /* 886 * posix_memalign() 887 * 888 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 889 * is a power of 2 >= sizeof(void *). 890 */ 891 int 892 __posix_memalign(void **memptr, size_t alignment, size_t size) 893 { 894 int rc; 895 896 /* 897 * OpenGroup spec issue 6 check 898 */ 899 if (alignment < sizeof(void *)) { 900 *memptr = NULL; 901 return(EINVAL); 902 } 903 904 nmalloc_sigblockall(); 905 rc = _slabmemalign(memptr, alignment, size); 906 nmalloc_sigunblockall(); 907 908 return (rc); 909 } 910 911 /* 912 * The slab allocator will allocate on power-of-2 boundaries up to 913 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 914 * zone matching the requirements, and _vmem_alloc() otherwise. 915 */ 916 static int 917 _slabmemalign(void **memptr, size_t alignment, size_t size) 918 { 919 bigalloc_t *bigp; 920 bigalloc_t big; 921 size_t chunking; 922 int zi __unused; 923 924 if (alignment < 1) { 925 *memptr = NULL; 926 return(EINVAL); 927 } 928 929 /* 930 * OpenGroup spec issue 6 checks 931 */ 932 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 933 *memptr = NULL; 934 return(EINVAL); 935 } 936 937 /* 938 * Our zone mechanism guarantees same-sized alignment for any 939 * power-of-2 allocation. If size is a power-of-2 and reasonable 940 * we can just call _slaballoc() and be done. We round size up 941 * to the nearest alignment boundary to improve our odds of 942 * it becoming a power-of-2 if it wasn't before. 943 */ 944 if (size <= alignment) 945 size = alignment; 946 else 947 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 948 949 /* 950 * If we have overflowed above when rounding to the nearest alignment 951 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 952 * 953 * Power-of-2 allocations up to 8KB will be aligned to the allocation 954 * size and _slaballoc() can simply be used. Please see line 1082 955 * for this special case: 'Align the storage in the zone based on 956 * the chunking' has a special case for powers of 2. 957 */ 958 if (size == 0) 959 return(ENOMEM); 960 961 if (size <= MAX_SLAB_PAGEALIGN && 962 (size | (size - 1)) + 1 == (size << 1)) { 963 *memptr = _slaballoc(size, 0); 964 return(*memptr ? 0 : ENOMEM); 965 } 966 967 /* 968 * Otherwise locate a zone with a chunking that matches 969 * the requested alignment, within reason. Consider two cases: 970 * 971 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 972 * we find will be the best fit because the chunking will be 973 * greater or equal to the alignment. 974 * 975 * (2) A 513 allocation on a 256-byte alignment. In this case 976 * the first zoneindex we find will be for 576 byte allocations 977 * with a chunking of 64, which is not sufficient. To fix this 978 * we simply find the nearest power-of-2 >= size and use the 979 * same side-effect of _slaballoc() which guarantees 980 * same-alignment on a power-of-2 allocation. 981 */ 982 if (size < PAGE_SIZE) { 983 zi = zoneindex(&size, &chunking); 984 if (chunking >= alignment) { 985 *memptr = _slaballoc(size, 0); 986 return(*memptr ? 0 : ENOMEM); 987 } 988 if (size >= 1024) 989 alignment = 1024; 990 if (size >= 16384) 991 alignment = 16384; 992 while (alignment < size) 993 alignment <<= 1; 994 *memptr = _slaballoc(alignment, 0); 995 return(*memptr ? 0 : ENOMEM); 996 } 997 998 /* 999 * If the slab allocator cannot handle it use vmem_alloc(). 1000 * 1001 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 1002 */ 1003 if (alignment < PAGE_SIZE) 1004 alignment = PAGE_SIZE; 1005 if (size < alignment) 1006 size = alignment; 1007 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1008 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 1009 big = bigcache_find_alloc(size); 1010 if (big && big->bytes < size) { 1011 _slabfree(big->base, FASTSLABREALLOC, &big); 1012 big = NULL; 1013 } 1014 if (big) { 1015 *memptr = big->base; 1016 big->active = size; 1017 if (big->active < big->bytes) { 1018 atomic_add_long(&excess_alloc, 1019 big->bytes - big->active); 1020 } 1021 bigp = bigalloc_lock(*memptr); 1022 big->next = *bigp; 1023 *bigp = big; 1024 bigalloc_unlock(*memptr); 1025 handle_excess_big(); 1026 return(0); 1027 } 1028 } 1029 *memptr = _vmem_alloc(size, alignment, 0); 1030 if (*memptr == NULL) 1031 return(ENOMEM); 1032 1033 big = _slaballoc(sizeof(struct bigalloc), 0); 1034 if (big == NULL) { 1035 _vmem_free(*memptr, size); 1036 *memptr = NULL; 1037 return(ENOMEM); 1038 } 1039 bigp = bigalloc_lock(*memptr); 1040 big->base = *memptr; 1041 big->active = size; 1042 big->bytes = size; /* no excess */ 1043 big->next = *bigp; 1044 *bigp = big; 1045 bigalloc_unlock(*memptr); 1046 1047 return(0); 1048 } 1049 1050 /* 1051 * free() (SLAB ALLOCATOR) - do the obvious 1052 */ 1053 void 1054 __free(void *ptr) 1055 { 1056 UTRACE(ptr, 0, 0); 1057 1058 nmalloc_sigblockall(); 1059 _slabfree(ptr, 0, NULL); 1060 nmalloc_sigunblockall(); 1061 } 1062 1063 /* 1064 * _slaballoc() (SLAB ALLOCATOR) 1065 * 1066 * Allocate memory via the slab allocator. If the request is too large, 1067 * or if it page-aligned beyond a certain size, we fall back to the 1068 * KMEM subsystem 1069 */ 1070 static void * 1071 _slaballoc(size_t size, int flags) 1072 { 1073 slzone_t z; 1074 slchunk_t chunk; 1075 slglobaldata_t slgd; 1076 size_t chunking; 1077 thr_mags *tp; 1078 struct magazine *mp; 1079 int count; 1080 int zi; 1081 int off; 1082 void *obj; 1083 1084 /* 1085 * Handle the degenerate size == 0 case. Yes, this does happen. 1086 * Return a special pointer. This is to maintain compatibility with 1087 * the original malloc implementation. Certain devices, such as the 1088 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1089 * also realloc() later on. Joy. 1090 */ 1091 if (size == 0) 1092 size = 1; 1093 1094 /* Capture global flags */ 1095 flags |= g_malloc_flags; 1096 1097 /* 1098 * Handle large allocations directly, with a separate bigmem cache. 1099 * 1100 * The backend allocator is pretty nasty on a SMP system. Use the 1101 * slab allocator for one and two page-sized chunks even though we 1102 * lose some efficiency. 1103 * 1104 * NOTE: Please see _slabmemalign(), which assumes that power-of-2 1105 * allocations up to an including MAX_SLAB_PAGEALIGN 1106 * can use _slaballoc() and be aligned to the same. The 1107 * zone cache can be used for this case, bigalloc does not 1108 * have to be used. 1109 */ 1110 if (size >= ZoneLimit || 1111 ((size & PAGE_MASK) == 0 && size > MAX_SLAB_PAGEALIGN)) { 1112 bigalloc_t big; 1113 bigalloc_t *bigp; 1114 1115 /* 1116 * Page-align and cache-color in case of virtually indexed 1117 * physically tagged L1 caches (aka SandyBridge). No sweat 1118 * otherwise, so just do it. 1119 * 1120 * (don't count as excess). 1121 */ 1122 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1123 1124 /* 1125 * If we have overflowed above when rounding to the page 1126 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1127 * so just return NULL, size at this point should be >= 0. 1128 */ 1129 if (size == 0) 1130 return (NULL); 1131 1132 /* 1133 * Force an additional page offset for 8KB-aligned requests 1134 * (i.e. 8KB, 16KB, etc) that helps spread data across the 1135 * CPU caches at the cost of some dead space in the memory 1136 * map. 1137 */ 1138 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1139 size += PAGE_SIZE; 1140 1141 /* 1142 * Try to reuse a cached big block to avoid mmap'ing. If it 1143 * turns out not to fit our requirements we throw it away 1144 * and allocate normally. 1145 */ 1146 big = NULL; 1147 if (size <= BIGCACHE_LIMIT) { 1148 big = bigcache_find_alloc(size); 1149 if (big && big->bytes < size) { 1150 _slabfree(big->base, FASTSLABREALLOC, &big); 1151 big = NULL; 1152 } 1153 } 1154 if (big) { 1155 chunk = big->base; 1156 if (flags & SAFLAG_ZERO) 1157 bzero(chunk, size); 1158 } else { 1159 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1160 if (chunk == NULL) 1161 return(NULL); 1162 1163 big = _slaballoc(sizeof(struct bigalloc), 0); 1164 if (big == NULL) { 1165 _vmem_free(chunk, size); 1166 return(NULL); 1167 } 1168 big->base = chunk; 1169 big->bytes = size; 1170 } 1171 big->active = size; 1172 1173 bigp = bigalloc_lock(chunk); 1174 if (big->active < big->bytes) { 1175 atomic_add_long(&excess_alloc, 1176 big->bytes - big->active); 1177 } 1178 big->next = *bigp; 1179 *bigp = big; 1180 bigalloc_unlock(chunk); 1181 handle_excess_big(); 1182 1183 return(chunk); 1184 } 1185 1186 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1187 zi = zoneindex(&size, &chunking); 1188 MASSERT(zi < NZONES); 1189 1190 obj = mtmagazine_alloc(zi, flags); 1191 if (obj != NULL) { 1192 if (flags & SAFLAG_ZERO) 1193 bzero(obj, size); 1194 return (obj); 1195 } 1196 1197 /* 1198 * Attempt to allocate out of an existing global zone. If all zones 1199 * are exhausted pull one off the free list or allocate a new one. 1200 */ 1201 slgd = &SLGlobalData; 1202 1203 again: 1204 if (slgd->ZoneAry[zi] == NULL) { 1205 z = zone_alloc(flags); 1206 if (z == NULL) 1207 goto fail; 1208 1209 /* 1210 * How big is the base structure? 1211 */ 1212 off = sizeof(struct slzone); 1213 1214 /* 1215 * Align the storage in the zone based on the chunking. 1216 * 1217 * Guarantee power-of-2 alignment for power-of-2-sized 1218 * chunks. Otherwise align based on the chunking size 1219 * (typically 8 or 16 bytes for small allocations). 1220 * 1221 * NOTE: Allocations >= ZoneLimit are governed by the 1222 * bigalloc code and typically only guarantee page-alignment. 1223 * 1224 * Set initial conditions for UIndex near the zone header 1225 * to reduce unecessary page faults, vs semi-randomization 1226 * to improve L1 cache saturation. 1227 * 1228 * NOTE: Please see _slabmemalign(), which assumes that 1229 * power-of-2 allocations up to an including 1230 * MAX_SLAB_PAGEALIGN can use _slaballoc() 1231 * and be aligned to the same. The zone cache can be 1232 * used for this case, bigalloc does not have to be 1233 * used. 1234 * 1235 * ALL power-of-2 requests that fall through to this 1236 * code use this rule (conditionals above limit this 1237 * to <= MAX_SLAB_PAGEALIGN). 1238 */ 1239 if ((size | (size - 1)) + 1 == (size << 1)) 1240 off = roundup2(off, size); 1241 else 1242 off = roundup2(off, chunking); 1243 z->z_Magic = ZALLOC_SLAB_MAGIC; 1244 z->z_ZoneIndex = zi; 1245 z->z_NMax = (ZoneSize - off) / size; 1246 z->z_NFree = z->z_NMax; 1247 z->z_BasePtr = (char *)z + off; 1248 z->z_UIndex = z->z_UEndIndex = 0; 1249 z->z_ChunkSize = size; 1250 z->z_FirstFreePg = ZonePageCount; 1251 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1252 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1253 flags |= SAFLAG_PASSIVE; 1254 } 1255 1256 /* 1257 * Slide the base index for initial allocations out of the 1258 * next zone we create so we do not over-weight the lower 1259 * part of the cpu memory caches. 1260 */ 1261 slgd_lock(slgd); 1262 z->z_Next = slgd->ZoneAry[zi]; 1263 slgd->ZoneAry[zi] = z; 1264 } else { 1265 slgd_lock(slgd); 1266 z = slgd->ZoneAry[zi]; 1267 if (z == NULL) { 1268 slgd_unlock(slgd); 1269 goto again; 1270 } 1271 } 1272 1273 /* 1274 * Ok, we have a zone from which at least one chunk is available. 1275 */ 1276 MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd)); 1277 1278 /* 1279 * Try to cache <count> chunks, up to CACHE_CHUNKS (32 typ) 1280 * to avoid unnecessary global lock contention. 1281 */ 1282 tp = &thread_mags; 1283 mp = tp->mags[zi].loaded; 1284 count = 0; 1285 if (mp && tp->init >= 0) { 1286 count = mp->capacity - mp->rounds; 1287 if (count >= z->z_NFree) 1288 count = z->z_NFree - 1; 1289 if (count > CACHE_CHUNKS) 1290 count = CACHE_CHUNKS; 1291 } 1292 1293 /* 1294 * Locate a chunk in a free page. This attempts to localize 1295 * reallocations into earlier pages without us having to sort 1296 * the chunk list. A chunk may still overlap a page boundary. 1297 */ 1298 while (z->z_FirstFreePg < ZonePageCount) { 1299 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1300 if (((uintptr_t)chunk & ZoneMask) == 0) { 1301 slgd_unlock(slgd); 1302 _mpanic("assertion: corrupt malloc zone"); 1303 } 1304 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1305 --z->z_NFree; 1306 1307 if (count == 0) 1308 goto done; 1309 mp->objects[mp->rounds++] = chunk; 1310 --count; 1311 continue; 1312 } 1313 ++z->z_FirstFreePg; 1314 } 1315 1316 /* 1317 * No chunks are available but NFree said we had some memory, 1318 * so it must be available in the never-before-used-memory 1319 * area governed by UIndex. The consequences are very 1320 * serious if our zone got corrupted so we use an explicit 1321 * panic rather then a KASSERT. 1322 */ 1323 for (;;) { 1324 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1325 --z->z_NFree; 1326 if (++z->z_UIndex == z->z_NMax) 1327 z->z_UIndex = 0; 1328 if (z->z_UIndex == z->z_UEndIndex) { 1329 if (z->z_NFree != 0) { 1330 slgd_unlock(slgd); 1331 _mpanic("slaballoc: corrupted zone"); 1332 } 1333 } 1334 if (count == 0) 1335 break; 1336 mp->objects[mp->rounds++] = chunk; 1337 --count; 1338 } 1339 1340 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1341 flags &= ~SAFLAG_ZERO; 1342 flags |= SAFLAG_PASSIVE; 1343 } 1344 1345 done: 1346 /* 1347 * Remove us from the ZoneAry[] when we become empty 1348 */ 1349 if (z->z_NFree == 0) { 1350 slgd->ZoneAry[zi] = z->z_Next; 1351 z->z_Next = NULL; 1352 } 1353 slgd_unlock(slgd); 1354 if (flags & SAFLAG_ZERO) 1355 bzero(chunk, size); 1356 1357 return(chunk); 1358 fail: 1359 return(NULL); 1360 } 1361 1362 /* 1363 * Reallocate memory within the chunk 1364 */ 1365 static void * 1366 _slabrealloc(void *ptr, size_t size) 1367 { 1368 bigalloc_t *bigp; 1369 void *nptr; 1370 slzone_t z; 1371 size_t chunking; 1372 1373 if (ptr == NULL) { 1374 return(_slaballoc(size, 0)); 1375 } 1376 1377 if (size == 0) 1378 size = 1; 1379 1380 /* 1381 * Handle oversized allocations. 1382 */ 1383 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1384 bigalloc_t big; 1385 size_t bigbytes; 1386 1387 while ((big = *bigp) != NULL) { 1388 if (big->base == ptr) { 1389 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1390 bigbytes = big->bytes; 1391 1392 /* 1393 * If it already fits determine if it makes 1394 * sense to shrink/reallocate. Try to optimize 1395 * programs which stupidly make incremental 1396 * reallocations larger or smaller by scaling 1397 * the allocation. Also deal with potential 1398 * coloring. 1399 */ 1400 if (size >= (bigbytes >> 1) && 1401 size <= bigbytes) { 1402 if (big->active != size) { 1403 atomic_add_long(&excess_alloc, 1404 big->active - 1405 size); 1406 } 1407 big->active = size; 1408 bigalloc_unlock(ptr); 1409 return(ptr); 1410 } 1411 1412 /* 1413 * For large reallocations, allocate more space 1414 * than we need to try to avoid excessive 1415 * reallocations later on. 1416 */ 1417 chunking = size + (size >> 3); 1418 chunking = (chunking + PAGE_MASK) & 1419 ~(size_t)PAGE_MASK; 1420 1421 /* 1422 * Try to allocate adjacently in case the 1423 * program is idiotically realloc()ing a 1424 * huge memory block just slightly bigger. 1425 * (llvm's llc tends to do this a lot). 1426 * 1427 * (MAP_TRYFIXED forces mmap to fail if there 1428 * is already something at the address). 1429 */ 1430 if (chunking > bigbytes) { 1431 char *addr; 1432 int errno_save = errno; 1433 1434 addr = mmap((char *)ptr + bigbytes, 1435 chunking - bigbytes, 1436 PROT_READ|PROT_WRITE, 1437 MAP_PRIVATE|MAP_ANON| 1438 MAP_TRYFIXED, 1439 -1, 0); 1440 errno = errno_save; 1441 if (addr == (char *)ptr + bigbytes) { 1442 atomic_add_long(&excess_alloc, 1443 big->active - 1444 big->bytes + 1445 chunking - 1446 size); 1447 big->bytes = chunking; 1448 big->active = size; 1449 bigalloc_unlock(ptr); 1450 1451 return(ptr); 1452 } 1453 MASSERT_WTHUNLK( 1454 (void *)addr == MAP_FAILED, 1455 bigalloc_unlock(ptr)); 1456 } 1457 1458 /* 1459 * Failed, unlink big and allocate fresh. 1460 * (note that we have to leave (big) intact 1461 * in case the slaballoc fails). 1462 */ 1463 *bigp = big->next; 1464 bigalloc_unlock(ptr); 1465 if ((nptr = _slaballoc(size, 0)) == NULL) { 1466 /* Relink block */ 1467 bigp = bigalloc_lock(ptr); 1468 big->next = *bigp; 1469 *bigp = big; 1470 bigalloc_unlock(ptr); 1471 return(NULL); 1472 } 1473 if (size > bigbytes) 1474 size = bigbytes; 1475 bcopy(ptr, nptr, size); 1476 atomic_add_long(&excess_alloc, big->active - 1477 big->bytes); 1478 _slabfree(ptr, FASTSLABREALLOC, &big); 1479 1480 return(nptr); 1481 } 1482 bigp = &big->next; 1483 } 1484 bigalloc_unlock(ptr); 1485 handle_excess_big(); 1486 } 1487 1488 /* 1489 * Get the original allocation's zone. If the new request winds 1490 * up using the same chunk size we do not have to do anything. 1491 * 1492 * NOTE: We don't have to lock the globaldata here, the fields we 1493 * access here will not change at least as long as we have control 1494 * over the allocation. 1495 */ 1496 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1497 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1498 1499 /* 1500 * Use zoneindex() to chunk-align the new size, as long as the 1501 * new size is not too large. 1502 */ 1503 if (size < ZoneLimit) { 1504 zoneindex(&size, &chunking); 1505 if (z->z_ChunkSize == size) { 1506 return(ptr); 1507 } 1508 } 1509 1510 /* 1511 * Allocate memory for the new request size and copy as appropriate. 1512 */ 1513 if ((nptr = _slaballoc(size, 0)) != NULL) { 1514 if (size > z->z_ChunkSize) 1515 size = z->z_ChunkSize; 1516 bcopy(ptr, nptr, size); 1517 _slabfree(ptr, 0, NULL); 1518 } 1519 1520 return(nptr); 1521 } 1522 1523 /* 1524 * Returns the usable area of an allocated pointer 1525 */ 1526 static size_t 1527 _slabusablesize(const void *ptr) 1528 { 1529 size_t size; 1530 bigalloc_t *bigp; 1531 slzone_t z; 1532 1533 if (ptr == NULL) 1534 return 0; 1535 1536 /* 1537 * Handle oversized allocations. 1538 */ 1539 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1540 bigalloc_t big; 1541 1542 while ((big = *bigp) != NULL) { 1543 const char *base = big->base; 1544 1545 if ((const char *)ptr >= base && 1546 (const char *)ptr < base + big->bytes) 1547 { 1548 size = base + big->bytes - (const char *)ptr; 1549 1550 return size; 1551 } 1552 bigp = &big->next; 1553 } 1554 bigalloc_unlock(ptr); 1555 handle_excess_big(); 1556 } 1557 1558 /* 1559 * Get the original allocation's zone. If the new request winds 1560 * up using the same chunk size we do not have to do anything. 1561 * 1562 * NOTE: We don't have to lock the globaldata here, the fields we 1563 * access here will not change at least as long as we have control 1564 * over the allocation. 1565 */ 1566 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1567 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1568 1569 size = z->z_ChunkSize - 1570 ((const char *)ptr - (const char *)z->z_BasePtr) % 1571 z->z_ChunkSize; 1572 return size; 1573 } 1574 1575 /* 1576 * free (SLAB ALLOCATOR) 1577 * 1578 * Free a memory block previously allocated by malloc. Note that we do not 1579 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1580 * checking memory limits in malloc. 1581 * 1582 * flags: 1583 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1584 * unlinked. 1585 * 1586 * MPSAFE 1587 */ 1588 static void 1589 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1590 { 1591 slzone_t z; 1592 slchunk_t chunk; 1593 bigalloc_t big; 1594 bigalloc_t *bigp; 1595 slglobaldata_t slgd; 1596 size_t size; 1597 int zi; 1598 int pgno; 1599 1600 /* Fast realloc path for big allocations */ 1601 if (flags & FASTSLABREALLOC) { 1602 big = *rbigp; 1603 goto fastslabrealloc; 1604 } 1605 1606 /* 1607 * Handle NULL frees and special 0-byte allocations 1608 */ 1609 if (ptr == NULL) 1610 return; 1611 1612 /* 1613 * Handle oversized allocations. 1614 */ 1615 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1616 while ((big = *bigp) != NULL) { 1617 if (big->base == ptr) { 1618 *bigp = big->next; 1619 atomic_add_long(&excess_alloc, big->active - 1620 big->bytes); 1621 bigalloc_unlock(ptr); 1622 1623 /* 1624 * Try to stash the block we are freeing, 1625 * potentially receiving another block in 1626 * return which must be freed. 1627 */ 1628 fastslabrealloc: 1629 if (big->bytes <= BIGCACHE_LIMIT) { 1630 big = bigcache_find_free(big); 1631 if (big == NULL) 1632 return; 1633 } 1634 ptr = big->base; /* reload */ 1635 size = big->bytes; 1636 _slabfree(big, 0, NULL); 1637 _vmem_free(ptr, size); 1638 return; 1639 } 1640 bigp = &big->next; 1641 } 1642 bigalloc_unlock(ptr); 1643 handle_excess_big(); 1644 } 1645 1646 /* 1647 * Zone case. Figure out the zone based on the fact that it is 1648 * ZoneSize aligned. 1649 */ 1650 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1651 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1652 1653 size = z->z_ChunkSize; 1654 zi = z->z_ZoneIndex; 1655 1656 if (g_malloc_flags & SAFLAG_ZERO) 1657 bzero(ptr, size); 1658 1659 if (mtmagazine_free(zi, ptr) == 0) 1660 return; 1661 1662 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1663 chunk = ptr; 1664 1665 /* 1666 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1667 * z_FirstFreePg. 1668 */ 1669 slgd = &SLGlobalData; 1670 slgd_lock(slgd); 1671 1672 chunk->c_Next = z->z_PageAry[pgno]; 1673 z->z_PageAry[pgno] = chunk; 1674 if (z->z_FirstFreePg > pgno) 1675 z->z_FirstFreePg = pgno; 1676 1677 /* 1678 * Bump the number of free chunks. If it becomes non-zero the zone 1679 * must be added back onto the appropriate list. 1680 */ 1681 if (z->z_NFree++ == 0) { 1682 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1683 slgd->ZoneAry[z->z_ZoneIndex] = z; 1684 } 1685 1686 /* 1687 * If the zone becomes totally free we get rid of it. 1688 */ 1689 if (z->z_NFree == z->z_NMax) { 1690 slzone_t *pz; 1691 1692 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1693 while (z != *pz) 1694 pz = &(*pz)->z_Next; 1695 *pz = z->z_Next; 1696 z->z_Magic = -1; 1697 z->z_Next = NULL; 1698 slgd_unlock(slgd); 1699 zone_free(z); 1700 } else { 1701 slgd_unlock(slgd); 1702 } 1703 } 1704 1705 /* 1706 * Allocate and return a magazine. Return NULL if no magazines are 1707 * available. 1708 */ 1709 static __inline void * 1710 magazine_alloc(struct magazine *mp) 1711 { 1712 void *obj; 1713 1714 if (mp && MAGAZINE_NOTEMPTY(mp)) { 1715 obj = mp->objects[--mp->rounds]; 1716 } else { 1717 obj = NULL; 1718 } 1719 return (obj); 1720 } 1721 1722 static __inline int 1723 magazine_free(struct magazine *mp, void *p) 1724 { 1725 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1726 mp->objects[mp->rounds++] = p; 1727 return 0; 1728 } 1729 1730 return -1; 1731 } 1732 1733 static void * 1734 mtmagazine_alloc(int zi, int flags) 1735 { 1736 thr_mags *tp; 1737 struct magazine *mp, *emptymag; 1738 magazine_depot *d; 1739 void *obj; 1740 1741 /* 1742 * Do not try to access per-thread magazines while the mtmagazine 1743 * is being initialized or destroyed. 1744 */ 1745 tp = &thread_mags; 1746 if (tp->init < 0) 1747 return(NULL); 1748 1749 /* 1750 * Primary per-thread allocation loop 1751 */ 1752 for (;;) { 1753 /* 1754 * Make sure we have a magazine available for use. 1755 */ 1756 if (tp->newmag == NULL && (flags & SAFLAG_MAGS) == 0) { 1757 mp = _slaballoc(sizeof(struct magazine), 1758 SAFLAG_ZERO | SAFLAG_MAGS); 1759 if (mp == NULL) { 1760 obj = NULL; 1761 break; 1762 } 1763 if (tp->newmag) { 1764 _slabfree(mp, 0, NULL); 1765 } else { 1766 tp->newmag = mp; 1767 } 1768 } 1769 1770 /* 1771 * If the loaded magazine has rounds, allocate and return 1772 */ 1773 mp = tp->mags[zi].loaded; 1774 obj = magazine_alloc(mp); 1775 if (obj) 1776 break; 1777 1778 /* 1779 * The prev magazine can only be completely empty or completely 1780 * full. If it is full, swap it with the loaded magazine 1781 * and retry. 1782 */ 1783 mp = tp->mags[zi].prev; 1784 if (mp && MAGAZINE_FULL(mp)) { 1785 MASSERT(mp->rounds != 0); 1786 swap_mags(&tp->mags[zi]); /* prev now empty */ 1787 continue; 1788 } 1789 1790 /* 1791 * If the depot has no loaded magazines ensure that tp->loaded 1792 * is not NULL and return NULL. This will allow _slaballoc() 1793 * to cache referals to SLGlobalData in a magazine. 1794 */ 1795 d = &depots[zi]; 1796 if (SLIST_EMPTY(&d->full)) { /* UNLOCKED TEST IS SAFE */ 1797 mp = tp->mags[zi].loaded; 1798 if (mp == NULL && tp->newmag) { 1799 mp = tp->newmag; 1800 tp->newmag = NULL; 1801 mp->capacity = zonecapacity(zi); 1802 mp->rounds = 0; 1803 mp->flags = 0; 1804 tp->mags[zi].loaded = mp; 1805 } 1806 break; 1807 } 1808 1809 /* 1810 * Cycle: depot(loaded) -> loaded -> prev -> depot(empty) 1811 * 1812 * If we race and the depot has no full magazines, retry. 1813 */ 1814 depot_lock(d); 1815 mp = SLIST_FIRST(&d->full); 1816 if (mp) { 1817 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1818 emptymag = tp->mags[zi].prev; 1819 if (emptymag) { 1820 SLIST_INSERT_HEAD(&d->empty, emptymag, 1821 nextmagazine); 1822 } 1823 tp->mags[zi].prev = tp->mags[zi].loaded; 1824 tp->mags[zi].loaded = mp; 1825 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1826 } 1827 depot_unlock(d); 1828 continue; 1829 } 1830 1831 return (obj); 1832 } 1833 1834 static int 1835 mtmagazine_free(int zi, void *ptr) 1836 { 1837 thr_mags *tp; 1838 struct magazine *mp, *loadedmag; 1839 magazine_depot *d; 1840 int rc = -1; 1841 1842 /* 1843 * Do not try to access per-thread magazines while the mtmagazine 1844 * is being initialized or destroyed. 1845 */ 1846 tp = &thread_mags; 1847 if (tp->init < 0) 1848 return(-1); 1849 1850 /* 1851 * Primary per-thread freeing loop 1852 */ 1853 for (;;) { 1854 /* 1855 * Make sure a new magazine is available in case we have 1856 * to use it. Staging the newmag allows us to avoid 1857 * some locking/reentrancy complexity. 1858 * 1859 * Temporarily disable the per-thread caches for this 1860 * allocation to avoid reentrancy and/or to avoid a 1861 * stack overflow if the [zi] happens to be the same that 1862 * would be used to allocate the new magazine. 1863 * 1864 * WARNING! Calling _slaballoc() can indirectly modify 1865 * tp->newmag. 1866 */ 1867 if (tp->newmag == NULL) { 1868 mp = _slaballoc(sizeof(struct magazine), 1869 SAFLAG_ZERO | SAFLAG_MAGS); 1870 if (tp->newmag && mp) 1871 _slabfree(mp, 0, NULL); 1872 else 1873 tp->newmag = mp; 1874 if (tp->newmag == NULL) { 1875 rc = -1; 1876 break; 1877 } 1878 } 1879 1880 /* 1881 * If the loaded magazine has space, free directly to it 1882 */ 1883 rc = magazine_free(tp->mags[zi].loaded, ptr); 1884 if (rc == 0) 1885 break; 1886 1887 /* 1888 * The prev magazine can only be completely empty or completely 1889 * full. If it is empty, swap it with the loaded magazine 1890 * and retry. 1891 */ 1892 mp = tp->mags[zi].prev; 1893 if (mp && MAGAZINE_EMPTY(mp)) { 1894 MASSERT(mp->rounds == 0); 1895 swap_mags(&tp->mags[zi]); /* prev now full */ 1896 continue; 1897 } 1898 1899 /* 1900 * Try to get an empty magazine from the depot. Cycle 1901 * through depot(empty)->loaded->prev->depot(full). 1902 * Retry if an empty magazine was available from the depot. 1903 */ 1904 d = &depots[zi]; 1905 depot_lock(d); 1906 1907 if ((loadedmag = tp->mags[zi].prev) != NULL) 1908 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1909 tp->mags[zi].prev = tp->mags[zi].loaded; 1910 mp = SLIST_FIRST(&d->empty); 1911 if (mp) { 1912 tp->mags[zi].loaded = mp; 1913 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1914 depot_unlock(d); 1915 MASSERT(MAGAZINE_NOTFULL(mp)); 1916 } else { 1917 mp = tp->newmag; 1918 tp->newmag = NULL; 1919 mp->capacity = zonecapacity(zi); 1920 mp->rounds = 0; 1921 mp->flags = 0; 1922 tp->mags[zi].loaded = mp; 1923 depot_unlock(d); 1924 } 1925 } 1926 1927 return rc; 1928 } 1929 1930 static void 1931 mtmagazine_init(void) 1932 { 1933 int error; 1934 1935 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1936 if (error) 1937 abort(); 1938 } 1939 1940 /* 1941 * This function is only used by the thread exit destructor 1942 */ 1943 static void 1944 mtmagazine_drain(struct magazine *mp) 1945 { 1946 void *obj; 1947 1948 nmalloc_sigblockall(); 1949 while (MAGAZINE_NOTEMPTY(mp)) { 1950 obj = magazine_alloc(mp); 1951 _slabfree(obj, 0, NULL); 1952 } 1953 nmalloc_sigunblockall(); 1954 } 1955 1956 /* 1957 * mtmagazine_destructor() 1958 * 1959 * When a thread exits, we reclaim all its resources; all its magazines are 1960 * drained and the structures are freed. 1961 * 1962 * WARNING! The destructor can be called multiple times if the larger user 1963 * program has its own destructors which run after ours which 1964 * allocate or free memory. 1965 */ 1966 static void 1967 mtmagazine_destructor(void *thrp) 1968 { 1969 thr_mags *tp = thrp; 1970 struct magazine *mp; 1971 int i; 1972 1973 if (__isexiting) 1974 return; 1975 1976 /* 1977 * Prevent further use of mtmagazines while we are destructing 1978 * them, as well as for any destructors which are run after us 1979 * prior to the thread actually being destroyed. 1980 */ 1981 tp->init = -1; 1982 1983 nmalloc_sigblockall(); 1984 for (i = 0; i < NZONES; i++) { 1985 mp = tp->mags[i].loaded; 1986 tp->mags[i].loaded = NULL; 1987 if (mp) { 1988 if (MAGAZINE_NOTEMPTY(mp)) 1989 mtmagazine_drain(mp); 1990 _slabfree(mp, 0, NULL); 1991 } 1992 1993 mp = tp->mags[i].prev; 1994 tp->mags[i].prev = NULL; 1995 if (mp) { 1996 if (MAGAZINE_NOTEMPTY(mp)) 1997 mtmagazine_drain(mp); 1998 _slabfree(mp, 0, NULL); 1999 } 2000 } 2001 if (tp->newmag) { 2002 mp = tp->newmag; 2003 tp->newmag = NULL; 2004 _slabfree(mp, 0, NULL); 2005 } 2006 nmalloc_sigunblockall(); 2007 } 2008 2009 /* 2010 * zone_alloc() 2011 * 2012 * Attempt to allocate a zone from the zone magazine. 2013 */ 2014 static slzone_t 2015 zone_alloc(int flags) 2016 { 2017 slzone_t z; 2018 2019 zone_magazine_lock(); 2020 2021 z = magazine_alloc(&zone_magazine); 2022 if (z == NULL) { 2023 zone_magazine_unlock(); 2024 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 2025 } else { 2026 z->z_Flags |= SLZF_UNOTZEROD; 2027 zone_magazine_unlock(); 2028 } 2029 return z; 2030 } 2031 2032 /* 2033 * Free a zone. 2034 */ 2035 static void 2036 zone_free(void *z) 2037 { 2038 void *excess[M_ZONE_HYSTERESIS]; 2039 int i; 2040 2041 zone_magazine_lock(); 2042 2043 bzero(z, sizeof(struct slzone)); 2044 2045 if (opt_madvise) 2046 madvise(z, ZoneSize, MADV_FREE); 2047 2048 i = magazine_free(&zone_magazine, z); 2049 2050 /* 2051 * If we failed to free, collect excess magazines; release the zone 2052 * magazine lock, and then free to the system via _vmem_free. Re-enable 2053 * BURST mode for the magazine. 2054 */ 2055 if (i == -1) { 2056 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) { 2057 excess[i] = magazine_alloc(&zone_magazine); 2058 MASSERT_WTHUNLK(excess[i] != NULL, 2059 zone_magazine_unlock()); 2060 } 2061 zone_magazine_unlock(); 2062 2063 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) 2064 _vmem_free(excess[i], ZoneSize); 2065 _vmem_free(z, ZoneSize); 2066 } else { 2067 zone_magazine_unlock(); 2068 } 2069 } 2070 2071 /* 2072 * _vmem_alloc() 2073 * 2074 * Directly map memory in PAGE_SIZE'd chunks with the specified 2075 * alignment. 2076 * 2077 * Alignment must be a multiple of PAGE_SIZE. 2078 * 2079 * Size must be >= alignment. 2080 */ 2081 static void * 2082 _vmem_alloc(size_t size, size_t align, int flags) 2083 { 2084 static char *addr_hint; 2085 static int reset_hint = 16; 2086 char *addr; 2087 char *save; 2088 2089 if (--reset_hint <= 0) { 2090 addr_hint = NULL; 2091 reset_hint = 16; 2092 } 2093 2094 /* 2095 * Map anonymous private memory. 2096 */ 2097 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2098 MAP_PRIVATE|MAP_ANON, -1, 0); 2099 if (save == MAP_FAILED) 2100 goto worst_case; 2101 if (((uintptr_t)save & (align - 1)) == 0) 2102 return((void *)save); 2103 2104 addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2105 munmap(save, size); 2106 2107 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2108 MAP_PRIVATE|MAP_ANON, -1, 0); 2109 if (save == MAP_FAILED) 2110 goto worst_case; 2111 if (((size_t)save & (align - 1)) == 0) 2112 return((void *)save); 2113 munmap(save, size); 2114 2115 worst_case: 2116 save = mmap(NULL, size + align, PROT_READ|PROT_WRITE, 2117 MAP_PRIVATE|MAP_ANON, -1, 0); 2118 if (save == MAP_FAILED) 2119 return NULL; 2120 2121 addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2122 if (save != addr) 2123 munmap(save, addr - save); 2124 if (addr + size != save + size + align) 2125 munmap(addr + size, save + align - addr); 2126 2127 addr_hint = addr + size; 2128 2129 return ((void *)addr); 2130 } 2131 2132 /* 2133 * _vmem_free() 2134 * 2135 * Free a chunk of memory allocated with _vmem_alloc() 2136 */ 2137 static void 2138 _vmem_free(void *ptr, size_t size) 2139 { 2140 munmap(ptr, size); 2141 } 2142 2143 /* 2144 * Panic on fatal conditions 2145 */ 2146 static void 2147 _mpanic(const char *ctl, ...) 2148 { 2149 va_list va; 2150 2151 if (malloc_panic == 0) { 2152 malloc_panic = 1; 2153 va_start(va, ctl); 2154 vfprintf(stderr, ctl, va); 2155 fprintf(stderr, "\n"); 2156 fflush(stderr); 2157 va_end(va); 2158 } 2159 abort(); 2160 } 2161 2162 __weak_reference(__aligned_alloc, aligned_alloc); 2163 __weak_reference(__malloc, malloc); 2164 __weak_reference(__calloc, calloc); 2165 __weak_reference(__posix_memalign, posix_memalign); 2166 __weak_reference(__realloc, realloc); 2167 __weak_reference(__free, free); 2168 __weak_reference(__malloc_usable_size, malloc_usable_size); 2169