1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010-2019 The DragonFly Project, 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@backplane.com> and by 9 * Venkatesh Srinivas <me@endeavour.zapto.org>. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in 19 * the documentation and/or other materials provided with the 20 * distribution. 21 * 3. Neither the name of The DragonFly Project nor the names of its 22 * contributors may be used to endorse or promote products derived 23 * from this software without specific, prior written permission. 24 * 25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 28 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 29 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 30 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 31 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 32 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 33 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 34 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 35 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 39 */ 40 /* 41 * This module implements a slab allocator drop-in replacement for the 42 * libc malloc(). 43 * 44 * A slab allocator reserves a ZONE for each chunk size, then lays the 45 * chunks out in an array within the zone. Allocation and deallocation 46 * is nearly instantaneous, and overhead losses are limited to a fixed 47 * worst-case amount. 48 * 49 * The slab allocator does not have to pre-initialize the list of 50 * free chunks for each zone, and the underlying VM will not be 51 * touched at all beyond the zone header until an actual allocation 52 * needs it. 53 * 54 * Slab management and locking is done on a per-zone basis. 55 * 56 * Alloc Size Chunking Number of zones 57 * 0-127 8 16 58 * 128-255 16 8 59 * 256-511 32 8 60 * 512-1023 64 8 61 * 1024-2047 128 8 62 * 2048-4095 256 8 63 * 4096-8191 512 8 64 * 8192-16383 1024 8 65 * 16384-32767 2048 8 66 * 67 * Allocations >= ZoneLimit go directly to mmap and a hash table 68 * is used to locate for free. One and Two-page allocations use the 69 * zone mechanic to avoid excessive mmap()/munmap() calls. 70 * 71 * API FEATURES AND SIDE EFFECTS 72 * 73 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 74 * Above that power-of-2 sized allocations are page-aligned. Non 75 * power-of-2 sized allocations are aligned the same as the chunk 76 * size for their zone. 77 * + malloc(0) returns a special non-NULL value 78 * + ability to allocate arbitrarily large chunks of memory 79 * + realloc will reuse the passed pointer if possible, within the 80 * limitations of the zone chunking. 81 * 82 * Multithreaded enhancements for small allocations introduced August 2010. 83 * These are in the spirit of 'libumem'. See: 84 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 85 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 86 * USENIX Technical Conference. USENIX Association. 87 * 88 * Oversized allocations employ the BIGCACHE mechanic whereby large 89 * allocations may be handed significantly larger buffers, allowing them 90 * to avoid mmap/munmap operations even through significant realloc()s. 91 * The excess space is only trimmed if too many large allocations have been 92 * given this treatment. 93 * 94 * TUNING 95 * 96 * The value of the environment variable MALLOC_OPTIONS is a character string 97 * containing various flags to tune nmalloc. 98 * 99 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 100 * This will generate utrace events for all malloc, 101 * realloc, and free calls. There are tools (mtrplay) to 102 * replay and allocation pattern or to graph heap structure 103 * (mtrgraph) which can interpret these logs. 104 * 'Z' / ['z'] Zero out / do not zero all allocations. 105 * Each new byte of memory allocated by malloc, realloc, or 106 * reallocf will be initialized to 0. This is intended for 107 * debugging and will affect performance negatively. 108 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 109 * allocation functions. 110 */ 111 112 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 113 114 #include "namespace.h" 115 #include <sys/param.h> 116 #include <sys/types.h> 117 #include <sys/mman.h> 118 #include <sys/queue.h> 119 #include <sys/ktrace.h> 120 #include <stdio.h> 121 #include <stdint.h> 122 #include <stdlib.h> 123 #include <stdarg.h> 124 #include <stddef.h> 125 #include <unistd.h> 126 #include <string.h> 127 #include <fcntl.h> 128 #include <errno.h> 129 #include <pthread.h> 130 #include <machine/atomic.h> 131 #include "un-namespace.h" 132 133 #include "libc_private.h" 134 #include "spinlock.h" 135 136 void __free(void *); 137 void *__malloc(size_t); 138 void *__calloc(size_t, size_t); 139 void *__realloc(void *, size_t); 140 void *__aligned_alloc(size_t, size_t); 141 size_t __malloc_usable_size(const void *ptr); 142 int __posix_memalign(void **, size_t, size_t); 143 144 /* 145 * Linked list of large allocations 146 */ 147 typedef struct bigalloc { 148 struct bigalloc *next; /* hash link */ 149 void *base; /* base pointer */ 150 u_long active; /* bytes active */ 151 u_long bytes; /* bytes allocated */ 152 } *bigalloc_t; 153 154 /* 155 * Note that any allocations which are exact multiples of PAGE_SIZE, or 156 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 157 */ 158 #define MAX_SLAB_PAGEALIGN (2 * PAGE_SIZE) /* max slab for PAGE_SIZE*n */ 159 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 160 #define ZALLOC_ZONE_SIZE (64 * 1024) /* zone size */ 161 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 162 163 #if ZALLOC_ZONE_LIMIT == 16384 164 #define NZONES 72 165 #elif ZALLOC_ZONE_LIMIT == 32768 166 #define NZONES 80 167 #else 168 #error "I couldn't figure out NZONES" 169 #endif 170 171 /* 172 * Chunk structure for free elements 173 */ 174 typedef struct slchunk { 175 struct slchunk *c_Next; 176 } *slchunk_t; 177 178 /* 179 * The IN-BAND zone header is placed at the beginning of each zone. 180 */ 181 struct slglobaldata; 182 183 typedef struct slzone { 184 int32_t z_Magic; /* magic number for sanity check */ 185 int z_NFree; /* total free chunks / ualloc space */ 186 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 187 int z_NMax; /* maximum free chunks */ 188 char *z_BasePtr; /* pointer to start of chunk array */ 189 int z_UIndex; /* current initial allocation index */ 190 int z_UEndIndex; /* last (first) allocation index */ 191 int z_ChunkSize; /* chunk size for validation */ 192 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 193 int z_ZoneIndex; 194 int z_Flags; 195 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 196 } *slzone_t; 197 198 typedef struct slglobaldata { 199 spinlock_t Spinlock; 200 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 201 } *slglobaldata_t; 202 203 #define SLZF_UNOTZEROD 0x0001 204 205 #define FASTSLABREALLOC 0x02 206 207 /* 208 * Misc constants. Note that allocations that are exact multiples of 209 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 210 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 211 */ 212 #define MIN_CHUNK_SIZE 8 /* in bytes */ 213 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 214 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 215 216 /* 217 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 218 * not be larger then 64. 219 */ 220 #define BIGHSHIFT 10 /* bigalloc hash table */ 221 #define BIGHSIZE (1 << BIGHSHIFT) 222 #define BIGHMASK (BIGHSIZE - 1) 223 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 224 #define BIGXMASK (BIGXSIZE - 1) 225 226 /* 227 * BIGCACHE caches oversized allocations. Note that a linear search is 228 * performed, so do not make the cache too large. 229 * 230 * BIGCACHE will garbage-collect excess space when the excess exceeds the 231 * specified value. A relatively large number should be used here because 232 * garbage collection is expensive. 233 */ 234 #define BIGCACHE 16 235 #define BIGCACHE_MASK (BIGCACHE - 1) 236 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 237 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 238 239 #define CACHE_CHUNKS 32 240 241 #define SAFLAG_ZERO 0x0001 242 #define SAFLAG_PASSIVE 0x0002 243 #define SAFLAG_MAGS 0x0004 244 245 /* 246 * Thread control 247 */ 248 249 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 250 251 /* 252 * The assertion macros try to pretty-print assertion failures 253 * which can be caused by corruption. If a lock is held, we 254 * provide a macro that attempts to release it before asserting 255 * in order to prevent (e.g.) a reentrant SIGABRT calling malloc 256 * and deadlocking, resulting in the program freezing up. 257 */ 258 #define MASSERT(exp) \ 259 do { if (__predict_false(!(exp))) \ 260 _mpanic("assertion: %s in %s", \ 261 #exp, __func__); \ 262 } while (0) 263 264 #define MASSERT_WTHUNLK(exp, unlk) \ 265 do { if (__predict_false(!(exp))) { \ 266 unlk; \ 267 _mpanic("assertion: %s in %s", \ 268 #exp, __func__); \ 269 } \ 270 } while (0) 271 272 /* 273 * Magazines, arrange so the structure is roughly 4KB. 274 */ 275 #define M_MAX_ROUNDS (512 - 3) 276 #define M_MIN_ROUNDS 16 277 #define M_ZONE_INIT_ROUNDS 64 278 #define M_ZONE_HYSTERESIS 32 279 280 struct magazine { 281 SLIST_ENTRY(magazine) nextmagazine; 282 283 int flags; 284 int capacity; /* Max rounds in this magazine */ 285 int rounds; /* Current number of free rounds */ 286 int unused01; 287 void *objects[M_MAX_ROUNDS]; 288 }; 289 290 SLIST_HEAD(magazinelist, magazine); 291 292 static spinlock_t zone_mag_lock; 293 static spinlock_t depot_spinlock; 294 static struct magazine zone_magazine = { 295 .flags = 0, 296 .capacity = M_ZONE_INIT_ROUNDS, 297 .rounds = 0, 298 }; 299 300 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 301 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 302 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 303 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 304 305 /* 306 * Each thread will have a pair of magazines per size-class (NZONES) 307 * The loaded magazine will support immediate allocations, the previous 308 * magazine will either be full or empty and can be swapped at need 309 */ 310 typedef struct magazine_pair { 311 struct magazine *loaded; 312 struct magazine *prev; 313 } magazine_pair; 314 315 /* A depot is a collection of magazines for a single zone. */ 316 typedef struct magazine_depot { 317 struct magazinelist full; 318 struct magazinelist empty; 319 spinlock_t lock; 320 } magazine_depot; 321 322 typedef struct thr_mags { 323 magazine_pair mags[NZONES]; 324 struct magazine *newmag; 325 int init; 326 } thr_mags; 327 328 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 329 static pthread_key_t thread_mags_key; 330 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 331 static magazine_depot depots[NZONES]; 332 333 /* 334 * Fixed globals (not per-cpu) 335 */ 336 static const int ZoneSize = ZALLOC_ZONE_SIZE; 337 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 338 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 339 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 340 341 static int opt_madvise = 0; 342 static int opt_utrace = 0; 343 static int g_malloc_flags = 0; 344 static struct slglobaldata SLGlobalData; 345 static bigalloc_t bigalloc_array[BIGHSIZE]; 346 static spinlock_t bigspin_array[BIGXSIZE]; 347 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 348 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 349 static volatile int bigcache_index; /* SMP races ok */ 350 static int malloc_panic; 351 static size_t excess_alloc; /* excess big allocs */ 352 353 static void *_slaballoc(size_t size, int flags); 354 static void *_slabrealloc(void *ptr, size_t size); 355 static size_t _slabusablesize(const void *ptr); 356 static void _slabfree(void *ptr, int, bigalloc_t *); 357 static int _slabmemalign(void **memptr, size_t alignment, size_t size); 358 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 359 static void _vmem_free(void *ptr, size_t bytes); 360 static void *magazine_alloc(struct magazine *); 361 static int magazine_free(struct magazine *, void *); 362 static void *mtmagazine_alloc(int zi, int flags); 363 static int mtmagazine_free(int zi, void *); 364 static void mtmagazine_init(void); 365 static void mtmagazine_destructor(void *); 366 static slzone_t zone_alloc(int flags); 367 static void zone_free(void *z); 368 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 369 static void malloc_init(void) __constructor(101); 370 371 struct nmalloc_utrace { 372 void *p; 373 size_t s; 374 void *r; 375 }; 376 377 #define UTRACE(a, b, c) \ 378 if (opt_utrace) { \ 379 struct nmalloc_utrace ut = { \ 380 .p = (a), \ 381 .s = (b), \ 382 .r = (c) \ 383 }; \ 384 utrace(&ut, sizeof(ut)); \ 385 } 386 387 static void 388 malloc_init(void) 389 { 390 const char *p = NULL; 391 392 if (issetugid() == 0) 393 p = getenv("MALLOC_OPTIONS"); 394 395 for (; p != NULL && *p != '\0'; p++) { 396 switch(*p) { 397 case 'u': opt_utrace = 0; break; 398 case 'U': opt_utrace = 1; break; 399 case 'h': opt_madvise = 0; break; 400 case 'H': opt_madvise = 1; break; 401 case 'z': g_malloc_flags = 0; break; 402 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 403 default: 404 break; 405 } 406 } 407 408 UTRACE((void *) -1, 0, NULL); 409 } 410 411 /* 412 * We have to install a handler for nmalloc thread teardowns when 413 * the thread is created. We cannot delay this because destructors in 414 * sophisticated userland programs can call malloc() for the first time 415 * during their thread exit. 416 * 417 * This routine is called directly from pthreads. 418 */ 419 void 420 _nmalloc_thr_init(void) 421 { 422 thr_mags *tp; 423 424 /* 425 * Disallow mtmagazine operations until the mtmagazine is 426 * initialized. 427 */ 428 tp = &thread_mags; 429 tp->init = -1; 430 431 _pthread_once(&thread_mags_once, mtmagazine_init); 432 _pthread_setspecific(thread_mags_key, tp); 433 tp->init = 1; 434 } 435 436 void 437 _nmalloc_thr_prepfork(void) 438 { 439 if (__isthreaded) { 440 _SPINLOCK(&zone_mag_lock); 441 _SPINLOCK(&depot_spinlock); 442 } 443 } 444 445 void 446 _nmalloc_thr_parentfork(void) 447 { 448 if (__isthreaded) { 449 _SPINUNLOCK(&depot_spinlock); 450 _SPINUNLOCK(&zone_mag_lock); 451 } 452 } 453 454 void 455 _nmalloc_thr_childfork(void) 456 { 457 if (__isthreaded) { 458 _SPINUNLOCK(&depot_spinlock); 459 _SPINUNLOCK(&zone_mag_lock); 460 } 461 } 462 463 /* 464 * Handle signal reentrancy safely whether we are threaded or not. 465 * This improves the stability for mono and will probably improve 466 * stability for other high-level languages which are becoming increasingly 467 * sophisticated. 468 * 469 * The sigblockall()/sigunblockall() implementation uses a counter on 470 * a per-thread shared user/kernel page, avoids system calls, and is thus 471 * very fast. 472 */ 473 static __inline void 474 nmalloc_sigblockall(void) 475 { 476 sigblockall(); 477 } 478 479 static __inline void 480 nmalloc_sigunblockall(void) 481 { 482 sigunblockall(); 483 } 484 485 /* 486 * Thread locks. 487 */ 488 static __inline void 489 slgd_lock(slglobaldata_t slgd) 490 { 491 if (__isthreaded) 492 _SPINLOCK(&slgd->Spinlock); 493 } 494 495 static __inline void 496 slgd_unlock(slglobaldata_t slgd) 497 { 498 if (__isthreaded) 499 _SPINUNLOCK(&slgd->Spinlock); 500 } 501 502 static __inline void 503 depot_lock(magazine_depot *dp __unused) 504 { 505 if (__isthreaded) 506 _SPINLOCK(&depot_spinlock); 507 } 508 509 static __inline void 510 depot_unlock(magazine_depot *dp __unused) 511 { 512 if (__isthreaded) 513 _SPINUNLOCK(&depot_spinlock); 514 } 515 516 static __inline void 517 zone_magazine_lock(void) 518 { 519 if (__isthreaded) 520 _SPINLOCK(&zone_mag_lock); 521 } 522 523 static __inline void 524 zone_magazine_unlock(void) 525 { 526 if (__isthreaded) 527 _SPINUNLOCK(&zone_mag_lock); 528 } 529 530 static __inline void 531 swap_mags(magazine_pair *mp) 532 { 533 struct magazine *tmp; 534 tmp = mp->loaded; 535 mp->loaded = mp->prev; 536 mp->prev = tmp; 537 } 538 539 /* 540 * bigalloc hashing and locking support. 541 * 542 * Return an unmasked hash code for the passed pointer. 543 */ 544 static __inline int 545 _bigalloc_hash(const void *ptr) 546 { 547 int hv; 548 549 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 550 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 551 552 return(hv); 553 } 554 555 /* 556 * Lock the hash chain and return a pointer to its base for the specified 557 * address. 558 */ 559 static __inline bigalloc_t * 560 bigalloc_lock(void *ptr) 561 { 562 int hv = _bigalloc_hash(ptr); 563 bigalloc_t *bigp; 564 565 bigp = &bigalloc_array[hv & BIGHMASK]; 566 if (__isthreaded) 567 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 568 return(bigp); 569 } 570 571 /* 572 * Lock the hash chain and return a pointer to its base for the specified 573 * address. 574 * 575 * BUT, if the hash chain is empty, just return NULL and do not bother 576 * to lock anything. 577 */ 578 static __inline bigalloc_t * 579 bigalloc_check_and_lock(const void *ptr) 580 { 581 int hv = _bigalloc_hash(ptr); 582 bigalloc_t *bigp; 583 584 bigp = &bigalloc_array[hv & BIGHMASK]; 585 if (*bigp == NULL) 586 return(NULL); 587 if (__isthreaded) { 588 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 589 } 590 return(bigp); 591 } 592 593 static __inline void 594 bigalloc_unlock(const void *ptr) 595 { 596 int hv; 597 598 if (__isthreaded) { 599 hv = _bigalloc_hash(ptr); 600 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 601 } 602 } 603 604 /* 605 * Find a bigcache entry that might work for the allocation. SMP races are 606 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 607 * is wrong or if a NULL or too-small big is returned). 608 * 609 * Generally speaking it is ok to find a large entry even if the bytes 610 * requested are relatively small (but still oversized), because we really 611 * don't know *what* the application is going to do with the buffer. 612 */ 613 static __inline 614 bigalloc_t 615 bigcache_find_alloc(size_t bytes) 616 { 617 bigalloc_t big = NULL; 618 size_t test; 619 int i; 620 621 for (i = 0; i < BIGCACHE; ++i) { 622 test = bigcache_size_array[i]; 623 if (bytes <= test) { 624 bigcache_size_array[i] = 0; 625 big = atomic_swap_ptr(&bigcache_array[i], NULL); 626 break; 627 } 628 } 629 return big; 630 } 631 632 /* 633 * Free a bigcache entry, possibly returning one that the caller really must 634 * free. This is used to cache recent oversized memory blocks. Only 635 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 636 * to collect the biggest ones we can that are under the limit. 637 */ 638 static __inline 639 bigalloc_t 640 bigcache_find_free(bigalloc_t big) 641 { 642 int i; 643 int j; 644 int b; 645 646 b = ++bigcache_index; 647 for (i = 0; i < BIGCACHE; ++i) { 648 j = (b + i) & BIGCACHE_MASK; 649 if (bigcache_size_array[j] < big->bytes) { 650 bigcache_size_array[j] = big->bytes; 651 big = atomic_swap_ptr(&bigcache_array[j], big); 652 break; 653 } 654 } 655 return big; 656 } 657 658 static __inline 659 void 660 handle_excess_big(void) 661 { 662 int i; 663 bigalloc_t big; 664 bigalloc_t *bigp; 665 666 if (excess_alloc <= BIGCACHE_EXCESS) 667 return; 668 669 for (i = 0; i < BIGHSIZE; ++i) { 670 bigp = &bigalloc_array[i]; 671 if (*bigp == NULL) 672 continue; 673 if (__isthreaded) 674 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 675 for (big = *bigp; big; big = big->next) { 676 if (big->active < big->bytes) { 677 MASSERT_WTHUNLK((big->active & PAGE_MASK) == 0, 678 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 679 MASSERT_WTHUNLK((big->bytes & PAGE_MASK) == 0, 680 _SPINUNLOCK(&bigspin_array[i & BIGXMASK])); 681 munmap((char *)big->base + big->active, 682 big->bytes - big->active); 683 atomic_add_long(&excess_alloc, 684 big->active - big->bytes); 685 big->bytes = big->active; 686 } 687 } 688 if (__isthreaded) 689 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 690 } 691 } 692 693 /* 694 * Calculate the zone index for the allocation request size and set the 695 * allocation request size to that particular zone's chunk size. 696 */ 697 static __inline int 698 zoneindex(size_t *bytes, size_t *chunking) 699 { 700 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 701 702 /* 703 * This used to be 8-byte chunks and 16 zones for n < 128. 704 * However some instructions may require 16-byte alignment 705 * (aka SIMD) and programs might not request an aligned size 706 * (aka GCC-7), so change this as follows: 707 * 708 * 0-15 bytes 8-byte alignment in two zones (0-1) 709 * 16-127 bytes 16-byte alignment in four zones (3-10) 710 * zone index 2 and 11-15 are currently unused. 711 */ 712 if (n < 16) { 713 *bytes = n = (n + 7) & ~7; 714 *chunking = 8; 715 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 716 /* zones 0,1, zone 2 is unused */ 717 } 718 if (n < 128) { 719 *bytes = n = (n + 15) & ~15; 720 *chunking = 16; 721 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 722 /* zones 3-10, zones 11-15 unused */ 723 } 724 if (n < 256) { 725 *bytes = n = (n + 15) & ~15; 726 *chunking = 16; 727 return(n / 16 + 7); 728 } 729 if (n < 8192) { 730 if (n < 512) { 731 *bytes = n = (n + 31) & ~31; 732 *chunking = 32; 733 return(n / 32 + 15); 734 } 735 if (n < 1024) { 736 *bytes = n = (n + 63) & ~63; 737 *chunking = 64; 738 return(n / 64 + 23); 739 } 740 if (n < 2048) { 741 *bytes = n = (n + 127) & ~127; 742 *chunking = 128; 743 return(n / 128 + 31); 744 } 745 if (n < 4096) { 746 *bytes = n = (n + 255) & ~255; 747 *chunking = 256; 748 return(n / 256 + 39); 749 } 750 *bytes = n = (n + 511) & ~511; 751 *chunking = 512; 752 return(n / 512 + 47); 753 } 754 #if ZALLOC_ZONE_LIMIT > 8192 755 if (n < 16384) { 756 *bytes = n = (n + 1023) & ~1023; 757 *chunking = 1024; 758 return(n / 1024 + 55); 759 } 760 #endif 761 #if ZALLOC_ZONE_LIMIT > 16384 762 if (n < 32768) { 763 *bytes = n = (n + 2047) & ~2047; 764 *chunking = 2048; 765 return(n / 2048 + 63); 766 } 767 #endif 768 _mpanic("Unexpected byte count %zu", n); 769 return(0); 770 } 771 772 /* 773 * We want large magazines for small allocations 774 */ 775 static __inline int 776 zonecapacity(int zi) 777 { 778 int cap; 779 780 cap = (NZONES - zi) * (M_MAX_ROUNDS - M_MIN_ROUNDS) / NZONES + 781 M_MIN_ROUNDS; 782 783 return cap; 784 } 785 786 /* 787 * malloc() - call internal slab allocator 788 */ 789 void * 790 __malloc(size_t size) 791 { 792 void *ptr; 793 794 nmalloc_sigblockall(); 795 ptr = _slaballoc(size, 0); 796 if (ptr == NULL) 797 errno = ENOMEM; 798 else 799 UTRACE(0, size, ptr); 800 nmalloc_sigunblockall(); 801 802 return(ptr); 803 } 804 805 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 806 807 /* 808 * calloc() - call internal slab allocator 809 */ 810 void * 811 __calloc(size_t number, size_t size) 812 { 813 void *ptr; 814 815 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 816 number > 0 && SIZE_MAX / number < size) { 817 errno = ENOMEM; 818 return(NULL); 819 } 820 821 nmalloc_sigblockall(); 822 ptr = _slaballoc(number * size, SAFLAG_ZERO); 823 if (ptr == NULL) 824 errno = ENOMEM; 825 else 826 UTRACE(0, number * size, ptr); 827 nmalloc_sigunblockall(); 828 829 return(ptr); 830 } 831 832 /* 833 * realloc() (SLAB ALLOCATOR) 834 * 835 * We do not attempt to optimize this routine beyond reusing the same 836 * pointer if the new size fits within the chunking of the old pointer's 837 * zone. 838 */ 839 void * 840 __realloc(void *ptr, size_t size) 841 { 842 void *ret; 843 844 nmalloc_sigblockall(); 845 ret = _slabrealloc(ptr, size); 846 if (ret == NULL) 847 errno = ENOMEM; 848 else 849 UTRACE(ptr, size, ret); 850 nmalloc_sigunblockall(); 851 852 return(ret); 853 } 854 855 /* 856 * malloc_usable_size() (SLAB ALLOCATOR) 857 */ 858 size_t 859 __malloc_usable_size(const void *ptr) 860 { 861 return _slabusablesize(ptr); 862 } 863 864 /* 865 * aligned_alloc() 866 * 867 * Allocate (size) bytes with a alignment of (alignment). 868 */ 869 void * 870 __aligned_alloc(size_t alignment, size_t size) 871 { 872 void *ptr; 873 int rc; 874 875 nmalloc_sigblockall(); 876 ptr = NULL; 877 rc = _slabmemalign(&ptr, alignment, size); 878 if (rc) 879 errno = rc; 880 nmalloc_sigunblockall(); 881 882 return (ptr); 883 } 884 885 /* 886 * posix_memalign() 887 * 888 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 889 * is a power of 2 >= sizeof(void *). 890 */ 891 int 892 __posix_memalign(void **memptr, size_t alignment, size_t size) 893 { 894 int rc; 895 896 /* 897 * OpenGroup spec issue 6 check 898 */ 899 if (alignment < sizeof(void *)) { 900 *memptr = NULL; 901 return(EINVAL); 902 } 903 904 nmalloc_sigblockall(); 905 rc = _slabmemalign(memptr, alignment, size); 906 nmalloc_sigunblockall(); 907 908 return (rc); 909 } 910 911 /* 912 * The slab allocator will allocate on power-of-2 boundaries up to 913 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 914 * zone matching the requirements, and _vmem_alloc() otherwise. 915 */ 916 static int 917 _slabmemalign(void **memptr, size_t alignment, size_t size) 918 { 919 bigalloc_t *bigp; 920 bigalloc_t big; 921 size_t chunking; 922 int zi __unused; 923 924 if (alignment < 1) { 925 *memptr = NULL; 926 return(EINVAL); 927 } 928 929 /* 930 * OpenGroup spec issue 6 checks 931 */ 932 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 933 *memptr = NULL; 934 return(EINVAL); 935 } 936 937 /* 938 * Our zone mechanism guarantees same-sized alignment for any 939 * power-of-2 allocation. If size is a power-of-2 and reasonable 940 * we can just call _slaballoc() and be done. We round size up 941 * to the nearest alignment boundary to improve our odds of 942 * it becoming a power-of-2 if it wasn't before. 943 */ 944 if (size <= alignment) 945 size = alignment; 946 else 947 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 948 949 /* 950 * If we have overflowed above when rounding to the nearest alignment 951 * boundary, just return ENOMEM, size should be == N * sizeof(void *). 952 * 953 * Power-of-2 allocations up to 8KB will be aligned to the allocation 954 * size and _slaballoc() can simply be used. Please see line 1082 955 * for this special case: 'Align the storage in the zone based on 956 * the chunking' has a special case for powers of 2. 957 */ 958 if (size == 0) 959 return(ENOMEM); 960 961 if (size <= MAX_SLAB_PAGEALIGN && 962 (size | (size - 1)) + 1 == (size << 1)) { 963 *memptr = _slaballoc(size, 0); 964 return(*memptr ? 0 : ENOMEM); 965 } 966 967 /* 968 * Otherwise locate a zone with a chunking that matches 969 * the requested alignment, within reason. Consider two cases: 970 * 971 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 972 * we find will be the best fit because the chunking will be 973 * greater or equal to the alignment. 974 * 975 * (2) A 513 allocation on a 256-byte alignment. In this case 976 * the first zoneindex we find will be for 576 byte allocations 977 * with a chunking of 64, which is not sufficient. To fix this 978 * we simply find the nearest power-of-2 >= size and use the 979 * same side-effect of _slaballoc() which guarantees 980 * same-alignment on a power-of-2 allocation. 981 */ 982 if (size < PAGE_SIZE) { 983 zi = zoneindex(&size, &chunking); 984 if (chunking >= alignment) { 985 *memptr = _slaballoc(size, 0); 986 return(*memptr ? 0 : ENOMEM); 987 } 988 if (size >= 1024) 989 alignment = 1024; 990 if (size >= 16384) 991 alignment = 16384; 992 while (alignment < size) 993 alignment <<= 1; 994 *memptr = _slaballoc(alignment, 0); 995 return(*memptr ? 0 : ENOMEM); 996 } 997 998 /* 999 * If the slab allocator cannot handle it use vmem_alloc(). 1000 * 1001 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 1002 */ 1003 if (alignment < PAGE_SIZE) 1004 alignment = PAGE_SIZE; 1005 if (size < alignment) 1006 size = alignment; 1007 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1008 if (alignment == PAGE_SIZE && size <= BIGCACHE_LIMIT) { 1009 big = bigcache_find_alloc(size); 1010 if (big && big->bytes < size) { 1011 _slabfree(big->base, FASTSLABREALLOC, &big); 1012 big = NULL; 1013 } 1014 if (big) { 1015 *memptr = big->base; 1016 big->active = size; 1017 if (big->active < big->bytes) { 1018 atomic_add_long(&excess_alloc, 1019 big->bytes - big->active); 1020 } 1021 bigp = bigalloc_lock(*memptr); 1022 big->next = *bigp; 1023 *bigp = big; 1024 bigalloc_unlock(*memptr); 1025 handle_excess_big(); 1026 return(0); 1027 } 1028 } 1029 *memptr = _vmem_alloc(size, alignment, 0); 1030 if (*memptr == NULL) 1031 return(ENOMEM); 1032 1033 big = _slaballoc(sizeof(struct bigalloc), 0); 1034 if (big == NULL) { 1035 _vmem_free(*memptr, size); 1036 *memptr = NULL; 1037 return(ENOMEM); 1038 } 1039 bigp = bigalloc_lock(*memptr); 1040 big->base = *memptr; 1041 big->active = size; 1042 big->bytes = size; /* no excess */ 1043 big->next = *bigp; 1044 *bigp = big; 1045 bigalloc_unlock(*memptr); 1046 1047 return(0); 1048 } 1049 1050 /* 1051 * free() (SLAB ALLOCATOR) - do the obvious 1052 */ 1053 void 1054 __free(void *ptr) 1055 { 1056 UTRACE(ptr, 0, 0); 1057 1058 nmalloc_sigblockall(); 1059 _slabfree(ptr, 0, NULL); 1060 nmalloc_sigunblockall(); 1061 } 1062 1063 /* 1064 * _slaballoc() (SLAB ALLOCATOR) 1065 * 1066 * Allocate memory via the slab allocator. If the request is too large, 1067 * or if it page-aligned beyond a certain size, we fall back to the 1068 * KMEM subsystem 1069 */ 1070 static void * 1071 _slaballoc(size_t size, int flags) 1072 { 1073 slzone_t z; 1074 slchunk_t chunk; 1075 slglobaldata_t slgd; 1076 size_t chunking; 1077 thr_mags *tp; 1078 struct magazine *mp; 1079 int count; 1080 int zi; 1081 int off; 1082 void *obj; 1083 1084 /* 1085 * Handle the degenerate size == 0 case. Yes, this does happen. 1086 * Return a special pointer. This is to maintain compatibility with 1087 * the original malloc implementation. Certain devices, such as the 1088 * adaptec driver, not only allocate 0 bytes, they check for NULL and 1089 * also realloc() later on. Joy. 1090 */ 1091 if (size == 0) 1092 size = 1; 1093 1094 /* Capture global flags */ 1095 flags |= g_malloc_flags; 1096 1097 /* 1098 * Handle large allocations directly, with a separate bigmem cache. 1099 * 1100 * The backend allocator is pretty nasty on a SMP system. Use the 1101 * slab allocator for one and two page-sized chunks even though we 1102 * lose some efficiency. 1103 * 1104 * NOTE: Please see _slabmemalign(), which assumes that power-of-2 1105 * allocations up to an including MAX_SLAB_PAGEALIGN 1106 * can use _slaballoc() and be aligned to the same. The 1107 * zone cache can be used for this case, bigalloc does not 1108 * have to be used. 1109 */ 1110 if (size >= ZoneLimit || 1111 ((size & PAGE_MASK) == 0 && size > MAX_SLAB_PAGEALIGN)) { 1112 bigalloc_t big; 1113 bigalloc_t *bigp; 1114 1115 /* 1116 * Page-align and cache-color in case of virtually indexed 1117 * physically tagged L1 caches (aka SandyBridge). No sweat 1118 * otherwise, so just do it. 1119 * 1120 * (don't count as excess). 1121 */ 1122 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1123 1124 /* 1125 * If we have overflowed above when rounding to the page 1126 * boundary, something has passed us (size_t)[-PAGE_MASK..-1] 1127 * so just return NULL, size at this point should be >= 0. 1128 */ 1129 if (size == 0) 1130 return (NULL); 1131 1132 /* 1133 * Force an additional page offset for 8KB-aligned requests 1134 * (i.e. 8KB, 16KB, etc) that helps spread data across the 1135 * CPU caches at the cost of some dead space in the memory 1136 * map. 1137 */ 1138 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 1139 size += PAGE_SIZE; 1140 1141 /* 1142 * Try to reuse a cached big block to avoid mmap'ing. If it 1143 * turns out not to fit our requirements we throw it away 1144 * and allocate normally. 1145 */ 1146 big = NULL; 1147 if (size <= BIGCACHE_LIMIT) { 1148 big = bigcache_find_alloc(size); 1149 if (big && big->bytes < size) { 1150 _slabfree(big->base, FASTSLABREALLOC, &big); 1151 big = NULL; 1152 } 1153 } 1154 if (big) { 1155 chunk = big->base; 1156 if (flags & SAFLAG_ZERO) 1157 bzero(chunk, size); 1158 } else { 1159 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 1160 if (chunk == NULL) 1161 return(NULL); 1162 1163 big = _slaballoc(sizeof(struct bigalloc), 0); 1164 if (big == NULL) { 1165 _vmem_free(chunk, size); 1166 return(NULL); 1167 } 1168 big->base = chunk; 1169 big->bytes = size; 1170 } 1171 big->active = size; 1172 1173 bigp = bigalloc_lock(chunk); 1174 if (big->active < big->bytes) { 1175 atomic_add_long(&excess_alloc, 1176 big->bytes - big->active); 1177 } 1178 big->next = *bigp; 1179 *bigp = big; 1180 bigalloc_unlock(chunk); 1181 handle_excess_big(); 1182 1183 return(chunk); 1184 } 1185 1186 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1187 zi = zoneindex(&size, &chunking); 1188 MASSERT(zi < NZONES); 1189 1190 obj = mtmagazine_alloc(zi, flags); 1191 if (obj != NULL) { 1192 if (flags & SAFLAG_ZERO) 1193 bzero(obj, size); 1194 return (obj); 1195 } 1196 1197 /* 1198 * Attempt to allocate out of an existing global zone. If all zones 1199 * are exhausted pull one off the free list or allocate a new one. 1200 */ 1201 slgd = &SLGlobalData; 1202 1203 again: 1204 if (slgd->ZoneAry[zi] == NULL) { 1205 z = zone_alloc(flags); 1206 if (z == NULL) 1207 goto fail; 1208 1209 /* 1210 * How big is the base structure? 1211 */ 1212 off = sizeof(struct slzone); 1213 1214 /* 1215 * Align the storage in the zone based on the chunking. 1216 * 1217 * Guarantee power-of-2 alignment for power-of-2-sized 1218 * chunks. Otherwise align based on the chunking size 1219 * (typically 8 or 16 bytes for small allocations). 1220 * 1221 * NOTE: Allocations >= ZoneLimit are governed by the 1222 * bigalloc code and typically only guarantee page-alignment. 1223 * 1224 * Set initial conditions for UIndex near the zone header 1225 * to reduce unecessary page faults, vs semi-randomization 1226 * to improve L1 cache saturation. 1227 * 1228 * NOTE: Please see _slabmemalign(), which assumes that 1229 * power-of-2 allocations up to an including 1230 * MAX_SLAB_PAGEALIGN can use _slaballoc() 1231 * and be aligned to the same. The zone cache can be 1232 * used for this case, bigalloc does not have to be 1233 * used. 1234 * 1235 * ALL power-of-2 requests that fall through to this 1236 * code use this rule (conditionals above limit this 1237 * to <= MAX_SLAB_PAGEALIGN). 1238 */ 1239 if ((size | (size - 1)) + 1 == (size << 1)) 1240 off = roundup2(off, size); 1241 else 1242 off = roundup2(off, chunking); 1243 z->z_Magic = ZALLOC_SLAB_MAGIC; 1244 z->z_ZoneIndex = zi; 1245 z->z_NMax = (ZoneSize - off) / size; 1246 z->z_NFree = z->z_NMax; 1247 z->z_BasePtr = (char *)z + off; 1248 z->z_UIndex = z->z_UEndIndex = 0; 1249 z->z_ChunkSize = size; 1250 z->z_FirstFreePg = ZonePageCount; 1251 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1252 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1253 flags |= SAFLAG_PASSIVE; 1254 } 1255 1256 /* 1257 * Slide the base index for initial allocations out of the 1258 * next zone we create so we do not over-weight the lower 1259 * part of the cpu memory caches. 1260 */ 1261 slgd_lock(slgd); 1262 z->z_Next = slgd->ZoneAry[zi]; 1263 slgd->ZoneAry[zi] = z; 1264 } else { 1265 slgd_lock(slgd); 1266 z = slgd->ZoneAry[zi]; 1267 if (z == NULL) { 1268 slgd_unlock(slgd); 1269 goto again; 1270 } 1271 } 1272 1273 /* 1274 * Ok, we have a zone from which at least one chunk is available. 1275 */ 1276 MASSERT_WTHUNLK(z->z_NFree > 0, slgd_unlock(slgd)); 1277 1278 /* 1279 * Try to cache <count> chunks, up to CACHE_CHUNKS (32 typ) 1280 * to avoid unnecessary global lock contention. 1281 */ 1282 tp = &thread_mags; 1283 mp = tp->mags[zi].loaded; 1284 count = 0; 1285 if (mp && tp->init >= 0) { 1286 count = mp->capacity - mp->rounds; 1287 if (count >= z->z_NFree) 1288 count = z->z_NFree - 1; 1289 if (count > CACHE_CHUNKS) 1290 count = CACHE_CHUNKS; 1291 } 1292 1293 /* 1294 * Locate a chunk in a free page. This attempts to localize 1295 * reallocations into earlier pages without us having to sort 1296 * the chunk list. A chunk may still overlap a page boundary. 1297 */ 1298 while (z->z_FirstFreePg < ZonePageCount) { 1299 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1300 if (((uintptr_t)chunk & ZoneMask) == 0) { 1301 slgd_unlock(slgd); 1302 _mpanic("assertion: corrupt malloc zone"); 1303 } 1304 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1305 --z->z_NFree; 1306 1307 if (count == 0) 1308 goto done; 1309 mp->objects[mp->rounds++] = chunk; 1310 --count; 1311 continue; 1312 } 1313 ++z->z_FirstFreePg; 1314 } 1315 1316 /* 1317 * No chunks are available but NFree said we had some memory, 1318 * so it must be available in the never-before-used-memory 1319 * area governed by UIndex. The consequences are very 1320 * serious if our zone got corrupted so we use an explicit 1321 * panic rather then a KASSERT. 1322 */ 1323 for (;;) { 1324 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1325 --z->z_NFree; 1326 if (++z->z_UIndex == z->z_NMax) 1327 z->z_UIndex = 0; 1328 if (z->z_UIndex == z->z_UEndIndex) { 1329 if (z->z_NFree != 0) { 1330 slgd_unlock(slgd); 1331 _mpanic("slaballoc: corrupted zone"); 1332 } 1333 } 1334 if (count == 0) 1335 break; 1336 mp->objects[mp->rounds++] = chunk; 1337 --count; 1338 } 1339 1340 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1341 flags &= ~SAFLAG_ZERO; 1342 flags |= SAFLAG_PASSIVE; 1343 } 1344 1345 done: 1346 /* 1347 * Remove us from the ZoneAry[] when we become empty 1348 */ 1349 if (z->z_NFree == 0) { 1350 slgd->ZoneAry[zi] = z->z_Next; 1351 z->z_Next = NULL; 1352 } 1353 slgd_unlock(slgd); 1354 if (flags & SAFLAG_ZERO) 1355 bzero(chunk, size); 1356 1357 return(chunk); 1358 fail: 1359 return(NULL); 1360 } 1361 1362 /* 1363 * Reallocate memory within the chunk 1364 */ 1365 static void * 1366 _slabrealloc(void *ptr, size_t size) 1367 { 1368 bigalloc_t *bigp; 1369 void *nptr; 1370 slzone_t z; 1371 size_t chunking; 1372 1373 if (ptr == NULL) { 1374 return(_slaballoc(size, 0)); 1375 } 1376 1377 if (size == 0) 1378 size = 1; 1379 1380 /* 1381 * Handle oversized allocations. 1382 */ 1383 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1384 bigalloc_t big; 1385 size_t bigbytes; 1386 1387 while ((big = *bigp) != NULL) { 1388 if (big->base == ptr) { 1389 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1390 bigbytes = big->bytes; 1391 1392 /* 1393 * If it already fits determine if it makes 1394 * sense to shrink/reallocate. Try to optimize 1395 * programs which stupidly make incremental 1396 * reallocations larger or smaller by scaling 1397 * the allocation. Also deal with potential 1398 * coloring. 1399 */ 1400 if (size >= (bigbytes >> 1) && 1401 size <= bigbytes) { 1402 if (big->active != size) { 1403 atomic_add_long(&excess_alloc, 1404 big->active - 1405 size); 1406 } 1407 big->active = size; 1408 bigalloc_unlock(ptr); 1409 return(ptr); 1410 } 1411 1412 /* 1413 * For large reallocations, allocate more space 1414 * than we need to try to avoid excessive 1415 * reallocations later on. 1416 */ 1417 chunking = size + (size >> 3); 1418 chunking = (chunking + PAGE_MASK) & 1419 ~(size_t)PAGE_MASK; 1420 1421 /* 1422 * Try to allocate adjacently in case the 1423 * program is idiotically realloc()ing a 1424 * huge memory block just slightly bigger. 1425 * (llvm's llc tends to do this a lot). 1426 * 1427 * (MAP_TRYFIXED forces mmap to fail if there 1428 * is already something at the address). 1429 */ 1430 if (chunking > bigbytes) { 1431 char *addr; 1432 int errno_save = errno; 1433 1434 addr = mmap((char *)ptr + bigbytes, 1435 chunking - bigbytes, 1436 PROT_READ|PROT_WRITE, 1437 MAP_PRIVATE|MAP_ANON| 1438 MAP_TRYFIXED, 1439 -1, 0); 1440 errno = errno_save; 1441 if (addr == (char *)ptr + bigbytes) { 1442 atomic_add_long(&excess_alloc, 1443 big->active - 1444 big->bytes + 1445 chunking - 1446 size); 1447 big->bytes = chunking; 1448 big->active = size; 1449 bigalloc_unlock(ptr); 1450 1451 return(ptr); 1452 } 1453 MASSERT_WTHUNLK( 1454 (void *)addr == MAP_FAILED, 1455 bigalloc_unlock(ptr)); 1456 } 1457 1458 /* 1459 * Failed, unlink big and allocate fresh. 1460 * (note that we have to leave (big) intact 1461 * in case the slaballoc fails). 1462 */ 1463 *bigp = big->next; 1464 bigalloc_unlock(ptr); 1465 if ((nptr = _slaballoc(size, 0)) == NULL) { 1466 /* Relink block */ 1467 bigp = bigalloc_lock(ptr); 1468 big->next = *bigp; 1469 *bigp = big; 1470 bigalloc_unlock(ptr); 1471 return(NULL); 1472 } 1473 if (size > bigbytes) 1474 size = bigbytes; 1475 bcopy(ptr, nptr, size); 1476 atomic_add_long(&excess_alloc, big->active - 1477 big->bytes); 1478 _slabfree(ptr, FASTSLABREALLOC, &big); 1479 1480 return(nptr); 1481 } 1482 bigp = &big->next; 1483 } 1484 bigalloc_unlock(ptr); 1485 handle_excess_big(); 1486 } 1487 1488 /* 1489 * Get the original allocation's zone. If the new request winds 1490 * up using the same chunk size we do not have to do anything. 1491 * 1492 * NOTE: We don't have to lock the globaldata here, the fields we 1493 * access here will not change at least as long as we have control 1494 * over the allocation. 1495 */ 1496 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1497 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1498 1499 /* 1500 * Use zoneindex() to chunk-align the new size, as long as the 1501 * new size is not too large. 1502 */ 1503 if (size < ZoneLimit) { 1504 zoneindex(&size, &chunking); 1505 if (z->z_ChunkSize == size) { 1506 return(ptr); 1507 } 1508 } 1509 1510 /* 1511 * Allocate memory for the new request size and copy as appropriate. 1512 */ 1513 if ((nptr = _slaballoc(size, 0)) != NULL) { 1514 if (size > z->z_ChunkSize) 1515 size = z->z_ChunkSize; 1516 bcopy(ptr, nptr, size); 1517 _slabfree(ptr, 0, NULL); 1518 } 1519 1520 return(nptr); 1521 } 1522 1523 /* 1524 * Returns the usable area of an allocated pointer 1525 */ 1526 static size_t 1527 _slabusablesize(const void *ptr) 1528 { 1529 size_t size; 1530 bigalloc_t *bigp; 1531 slzone_t z; 1532 1533 if (ptr == NULL) 1534 return 0; 1535 1536 /* 1537 * Handle oversized allocations. 1538 */ 1539 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1540 bigalloc_t big; 1541 1542 while ((big = *bigp) != NULL) { 1543 const char *base = big->base; 1544 1545 if ((const char *)ptr >= base && 1546 (const char *)ptr < base + big->bytes) 1547 { 1548 size = base + big->bytes - (const char *)ptr; 1549 1550 bigalloc_unlock(ptr); 1551 1552 return size; 1553 } 1554 bigp = &big->next; 1555 } 1556 bigalloc_unlock(ptr); 1557 handle_excess_big(); 1558 } 1559 1560 /* 1561 * Get the original allocation's zone. If the new request winds 1562 * up using the same chunk size we do not have to do anything. 1563 * 1564 * NOTE: We don't have to lock the globaldata here, the fields we 1565 * access here will not change at least as long as we have control 1566 * over the allocation. 1567 */ 1568 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1569 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1570 1571 size = z->z_ChunkSize - 1572 ((const char *)ptr - (const char *)z->z_BasePtr) % 1573 z->z_ChunkSize; 1574 return size; 1575 } 1576 1577 /* 1578 * free (SLAB ALLOCATOR) 1579 * 1580 * Free a memory block previously allocated by malloc. Note that we do not 1581 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1582 * checking memory limits in malloc. 1583 * 1584 * flags: 1585 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1586 * unlinked. 1587 * 1588 * MPSAFE 1589 */ 1590 static void 1591 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1592 { 1593 slzone_t z; 1594 slchunk_t chunk; 1595 bigalloc_t big; 1596 bigalloc_t *bigp; 1597 slglobaldata_t slgd; 1598 size_t size; 1599 int zi; 1600 int pgno; 1601 1602 /* Fast realloc path for big allocations */ 1603 if (flags & FASTSLABREALLOC) { 1604 big = *rbigp; 1605 goto fastslabrealloc; 1606 } 1607 1608 /* 1609 * Handle NULL frees and special 0-byte allocations 1610 */ 1611 if (ptr == NULL) 1612 return; 1613 1614 /* 1615 * Handle oversized allocations. 1616 */ 1617 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1618 while ((big = *bigp) != NULL) { 1619 if (big->base == ptr) { 1620 *bigp = big->next; 1621 atomic_add_long(&excess_alloc, big->active - 1622 big->bytes); 1623 bigalloc_unlock(ptr); 1624 1625 /* 1626 * Try to stash the block we are freeing, 1627 * potentially receiving another block in 1628 * return which must be freed. 1629 */ 1630 fastslabrealloc: 1631 if (big->bytes <= BIGCACHE_LIMIT) { 1632 big = bigcache_find_free(big); 1633 if (big == NULL) 1634 return; 1635 } 1636 ptr = big->base; /* reload */ 1637 size = big->bytes; 1638 _slabfree(big, 0, NULL); 1639 _vmem_free(ptr, size); 1640 return; 1641 } 1642 bigp = &big->next; 1643 } 1644 bigalloc_unlock(ptr); 1645 handle_excess_big(); 1646 } 1647 1648 /* 1649 * Zone case. Figure out the zone based on the fact that it is 1650 * ZoneSize aligned. 1651 */ 1652 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1653 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1654 1655 size = z->z_ChunkSize; 1656 zi = z->z_ZoneIndex; 1657 1658 if (g_malloc_flags & SAFLAG_ZERO) 1659 bzero(ptr, size); 1660 1661 if (mtmagazine_free(zi, ptr) == 0) 1662 return; 1663 1664 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1665 chunk = ptr; 1666 1667 /* 1668 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1669 * z_FirstFreePg. 1670 */ 1671 slgd = &SLGlobalData; 1672 slgd_lock(slgd); 1673 1674 chunk->c_Next = z->z_PageAry[pgno]; 1675 z->z_PageAry[pgno] = chunk; 1676 if (z->z_FirstFreePg > pgno) 1677 z->z_FirstFreePg = pgno; 1678 1679 /* 1680 * Bump the number of free chunks. If it becomes non-zero the zone 1681 * must be added back onto the appropriate list. 1682 */ 1683 if (z->z_NFree++ == 0) { 1684 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1685 slgd->ZoneAry[z->z_ZoneIndex] = z; 1686 } 1687 1688 /* 1689 * If the zone becomes totally free we get rid of it. 1690 */ 1691 if (z->z_NFree == z->z_NMax) { 1692 slzone_t *pz; 1693 1694 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1695 while (z != *pz) 1696 pz = &(*pz)->z_Next; 1697 *pz = z->z_Next; 1698 z->z_Magic = -1; 1699 z->z_Next = NULL; 1700 slgd_unlock(slgd); 1701 zone_free(z); 1702 } else { 1703 slgd_unlock(slgd); 1704 } 1705 } 1706 1707 /* 1708 * Allocate and return a magazine. Return NULL if no magazines are 1709 * available. 1710 */ 1711 static __inline void * 1712 magazine_alloc(struct magazine *mp) 1713 { 1714 void *obj; 1715 1716 if (mp && MAGAZINE_NOTEMPTY(mp)) { 1717 obj = mp->objects[--mp->rounds]; 1718 } else { 1719 obj = NULL; 1720 } 1721 return (obj); 1722 } 1723 1724 static __inline int 1725 magazine_free(struct magazine *mp, void *p) 1726 { 1727 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1728 mp->objects[mp->rounds++] = p; 1729 return 0; 1730 } 1731 1732 return -1; 1733 } 1734 1735 static void * 1736 mtmagazine_alloc(int zi, int flags) 1737 { 1738 thr_mags *tp; 1739 struct magazine *mp, *emptymag; 1740 magazine_depot *d; 1741 void *obj; 1742 1743 /* 1744 * Do not try to access per-thread magazines while the mtmagazine 1745 * is being initialized or destroyed. 1746 */ 1747 tp = &thread_mags; 1748 if (tp->init < 0) 1749 return(NULL); 1750 1751 /* 1752 * Primary per-thread allocation loop 1753 */ 1754 for (;;) { 1755 /* 1756 * Make sure we have a magazine available for use. 1757 */ 1758 if (tp->newmag == NULL && (flags & SAFLAG_MAGS) == 0) { 1759 mp = _slaballoc(sizeof(struct magazine), 1760 SAFLAG_ZERO | SAFLAG_MAGS); 1761 if (mp == NULL) { 1762 obj = NULL; 1763 break; 1764 } 1765 if (tp->newmag) { 1766 _slabfree(mp, 0, NULL); 1767 } else { 1768 tp->newmag = mp; 1769 } 1770 } 1771 1772 /* 1773 * If the loaded magazine has rounds, allocate and return 1774 */ 1775 mp = tp->mags[zi].loaded; 1776 obj = magazine_alloc(mp); 1777 if (obj) 1778 break; 1779 1780 /* 1781 * The prev magazine can only be completely empty or completely 1782 * full. If it is full, swap it with the loaded magazine 1783 * and retry. 1784 */ 1785 mp = tp->mags[zi].prev; 1786 if (mp && MAGAZINE_FULL(mp)) { 1787 MASSERT(mp->rounds != 0); 1788 swap_mags(&tp->mags[zi]); /* prev now empty */ 1789 continue; 1790 } 1791 1792 /* 1793 * If the depot has no loaded magazines ensure that tp->loaded 1794 * is not NULL and return NULL. This will allow _slaballoc() 1795 * to cache referals to SLGlobalData in a magazine. 1796 */ 1797 d = &depots[zi]; 1798 if (SLIST_EMPTY(&d->full)) { /* UNLOCKED TEST IS SAFE */ 1799 mp = tp->mags[zi].loaded; 1800 if (mp == NULL && tp->newmag) { 1801 mp = tp->newmag; 1802 tp->newmag = NULL; 1803 mp->capacity = zonecapacity(zi); 1804 mp->rounds = 0; 1805 mp->flags = 0; 1806 tp->mags[zi].loaded = mp; 1807 } 1808 break; 1809 } 1810 1811 /* 1812 * Cycle: depot(loaded) -> loaded -> prev -> depot(empty) 1813 * 1814 * If we race and the depot has no full magazines, retry. 1815 */ 1816 depot_lock(d); 1817 mp = SLIST_FIRST(&d->full); 1818 if (mp) { 1819 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1820 emptymag = tp->mags[zi].prev; 1821 if (emptymag) { 1822 SLIST_INSERT_HEAD(&d->empty, emptymag, 1823 nextmagazine); 1824 } 1825 tp->mags[zi].prev = tp->mags[zi].loaded; 1826 tp->mags[zi].loaded = mp; 1827 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1828 } 1829 depot_unlock(d); 1830 continue; 1831 } 1832 1833 return (obj); 1834 } 1835 1836 static int 1837 mtmagazine_free(int zi, void *ptr) 1838 { 1839 thr_mags *tp; 1840 struct magazine *mp, *loadedmag; 1841 magazine_depot *d; 1842 int rc = -1; 1843 1844 /* 1845 * Do not try to access per-thread magazines while the mtmagazine 1846 * is being initialized or destroyed. 1847 */ 1848 tp = &thread_mags; 1849 if (tp->init < 0) 1850 return(-1); 1851 1852 /* 1853 * Primary per-thread freeing loop 1854 */ 1855 for (;;) { 1856 /* 1857 * Make sure a new magazine is available in case we have 1858 * to use it. Staging the newmag allows us to avoid 1859 * some locking/reentrancy complexity. 1860 * 1861 * Temporarily disable the per-thread caches for this 1862 * allocation to avoid reentrancy and/or to avoid a 1863 * stack overflow if the [zi] happens to be the same that 1864 * would be used to allocate the new magazine. 1865 * 1866 * WARNING! Calling _slaballoc() can indirectly modify 1867 * tp->newmag. 1868 */ 1869 if (tp->newmag == NULL) { 1870 mp = _slaballoc(sizeof(struct magazine), 1871 SAFLAG_ZERO | SAFLAG_MAGS); 1872 if (tp->newmag && mp) 1873 _slabfree(mp, 0, NULL); 1874 else 1875 tp->newmag = mp; 1876 if (tp->newmag == NULL) { 1877 rc = -1; 1878 break; 1879 } 1880 } 1881 1882 /* 1883 * If the loaded magazine has space, free directly to it 1884 */ 1885 rc = magazine_free(tp->mags[zi].loaded, ptr); 1886 if (rc == 0) 1887 break; 1888 1889 /* 1890 * The prev magazine can only be completely empty or completely 1891 * full. If it is empty, swap it with the loaded magazine 1892 * and retry. 1893 */ 1894 mp = tp->mags[zi].prev; 1895 if (mp && MAGAZINE_EMPTY(mp)) { 1896 MASSERT(mp->rounds == 0); 1897 swap_mags(&tp->mags[zi]); /* prev now full */ 1898 continue; 1899 } 1900 1901 /* 1902 * Try to get an empty magazine from the depot. Cycle 1903 * through depot(empty)->loaded->prev->depot(full). 1904 * Retry if an empty magazine was available from the depot. 1905 */ 1906 d = &depots[zi]; 1907 depot_lock(d); 1908 1909 if ((loadedmag = tp->mags[zi].prev) != NULL) 1910 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1911 tp->mags[zi].prev = tp->mags[zi].loaded; 1912 mp = SLIST_FIRST(&d->empty); 1913 if (mp) { 1914 tp->mags[zi].loaded = mp; 1915 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1916 depot_unlock(d); 1917 MASSERT(MAGAZINE_NOTFULL(mp)); 1918 } else { 1919 mp = tp->newmag; 1920 tp->newmag = NULL; 1921 mp->capacity = zonecapacity(zi); 1922 mp->rounds = 0; 1923 mp->flags = 0; 1924 tp->mags[zi].loaded = mp; 1925 depot_unlock(d); 1926 } 1927 } 1928 1929 return rc; 1930 } 1931 1932 static void 1933 mtmagazine_init(void) 1934 { 1935 int error; 1936 1937 error = _pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1938 if (error) 1939 abort(); 1940 } 1941 1942 /* 1943 * This function is only used by the thread exit destructor 1944 */ 1945 static void 1946 mtmagazine_drain(struct magazine *mp) 1947 { 1948 void *obj; 1949 1950 nmalloc_sigblockall(); 1951 while (MAGAZINE_NOTEMPTY(mp)) { 1952 obj = magazine_alloc(mp); 1953 _slabfree(obj, 0, NULL); 1954 } 1955 nmalloc_sigunblockall(); 1956 } 1957 1958 /* 1959 * mtmagazine_destructor() 1960 * 1961 * When a thread exits, we reclaim all its resources; all its magazines are 1962 * drained and the structures are freed. 1963 * 1964 * WARNING! The destructor can be called multiple times if the larger user 1965 * program has its own destructors which run after ours which 1966 * allocate or free memory. 1967 */ 1968 static void 1969 mtmagazine_destructor(void *thrp) 1970 { 1971 thr_mags *tp = thrp; 1972 struct magazine *mp; 1973 int i; 1974 1975 if (__isexiting) 1976 return; 1977 1978 /* 1979 * Prevent further use of mtmagazines while we are destructing 1980 * them, as well as for any destructors which are run after us 1981 * prior to the thread actually being destroyed. 1982 */ 1983 tp->init = -1; 1984 1985 nmalloc_sigblockall(); 1986 for (i = 0; i < NZONES; i++) { 1987 mp = tp->mags[i].loaded; 1988 tp->mags[i].loaded = NULL; 1989 if (mp) { 1990 if (MAGAZINE_NOTEMPTY(mp)) 1991 mtmagazine_drain(mp); 1992 _slabfree(mp, 0, NULL); 1993 } 1994 1995 mp = tp->mags[i].prev; 1996 tp->mags[i].prev = NULL; 1997 if (mp) { 1998 if (MAGAZINE_NOTEMPTY(mp)) 1999 mtmagazine_drain(mp); 2000 _slabfree(mp, 0, NULL); 2001 } 2002 } 2003 if (tp->newmag) { 2004 mp = tp->newmag; 2005 tp->newmag = NULL; 2006 _slabfree(mp, 0, NULL); 2007 } 2008 nmalloc_sigunblockall(); 2009 } 2010 2011 /* 2012 * zone_alloc() 2013 * 2014 * Attempt to allocate a zone from the zone magazine. 2015 */ 2016 static slzone_t 2017 zone_alloc(int flags) 2018 { 2019 slzone_t z; 2020 2021 zone_magazine_lock(); 2022 2023 z = magazine_alloc(&zone_magazine); 2024 if (z == NULL) { 2025 zone_magazine_unlock(); 2026 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 2027 } else { 2028 z->z_Flags |= SLZF_UNOTZEROD; 2029 zone_magazine_unlock(); 2030 } 2031 return z; 2032 } 2033 2034 /* 2035 * Free a zone. 2036 */ 2037 static void 2038 zone_free(void *z) 2039 { 2040 void *excess[M_ZONE_HYSTERESIS]; 2041 int i; 2042 2043 zone_magazine_lock(); 2044 2045 bzero(z, sizeof(struct slzone)); 2046 2047 if (opt_madvise) 2048 madvise(z, ZoneSize, MADV_FREE); 2049 2050 i = magazine_free(&zone_magazine, z); 2051 2052 /* 2053 * If we failed to free, collect excess magazines; release the zone 2054 * magazine lock, and then free to the system via _vmem_free. Re-enable 2055 * BURST mode for the magazine. 2056 */ 2057 if (i == -1) { 2058 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) { 2059 excess[i] = magazine_alloc(&zone_magazine); 2060 MASSERT_WTHUNLK(excess[i] != NULL, 2061 zone_magazine_unlock()); 2062 } 2063 zone_magazine_unlock(); 2064 2065 for (i = 0; i < M_ZONE_HYSTERESIS; ++i) 2066 _vmem_free(excess[i], ZoneSize); 2067 _vmem_free(z, ZoneSize); 2068 } else { 2069 zone_magazine_unlock(); 2070 } 2071 } 2072 2073 /* 2074 * _vmem_alloc() 2075 * 2076 * Directly map memory in PAGE_SIZE'd chunks with the specified 2077 * alignment. 2078 * 2079 * Alignment must be a multiple of PAGE_SIZE. 2080 * 2081 * Size must be >= alignment. 2082 */ 2083 static void * 2084 _vmem_alloc(size_t size, size_t align, int flags) 2085 { 2086 static char *addr_hint; 2087 static int reset_hint = 16; 2088 char *addr; 2089 char *save; 2090 2091 if (--reset_hint <= 0) { 2092 addr_hint = NULL; 2093 reset_hint = 16; 2094 } 2095 2096 /* 2097 * Map anonymous private memory. 2098 */ 2099 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2100 MAP_PRIVATE|MAP_ANON, -1, 0); 2101 if (save == MAP_FAILED) 2102 goto worst_case; 2103 if (((uintptr_t)save & (align - 1)) == 0) 2104 return((void *)save); 2105 2106 addr_hint = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2107 munmap(save, size); 2108 2109 save = mmap(addr_hint, size, PROT_READ|PROT_WRITE, 2110 MAP_PRIVATE|MAP_ANON, -1, 0); 2111 if (save == MAP_FAILED) 2112 goto worst_case; 2113 if (((size_t)save & (align - 1)) == 0) 2114 return((void *)save); 2115 munmap(save, size); 2116 2117 worst_case: 2118 save = mmap(NULL, size + align, PROT_READ|PROT_WRITE, 2119 MAP_PRIVATE|MAP_ANON, -1, 0); 2120 if (save == MAP_FAILED) 2121 return NULL; 2122 2123 addr = (char *)(((size_t)save + (align - 1)) & ~(align - 1)); 2124 if (save != addr) 2125 munmap(save, addr - save); 2126 if (addr + size != save + size + align) 2127 munmap(addr + size, save + align - addr); 2128 2129 addr_hint = addr + size; 2130 2131 return ((void *)addr); 2132 } 2133 2134 /* 2135 * _vmem_free() 2136 * 2137 * Free a chunk of memory allocated with _vmem_alloc() 2138 */ 2139 static void 2140 _vmem_free(void *ptr, size_t size) 2141 { 2142 munmap(ptr, size); 2143 } 2144 2145 /* 2146 * Panic on fatal conditions 2147 */ 2148 static void 2149 _mpanic(const char *ctl, ...) 2150 { 2151 va_list va; 2152 2153 if (malloc_panic == 0) { 2154 malloc_panic = 1; 2155 va_start(va, ctl); 2156 vfprintf(stderr, ctl, va); 2157 fprintf(stderr, "\n"); 2158 fflush(stderr); 2159 va_end(va); 2160 } 2161 abort(); 2162 } 2163 2164 __weak_reference(__aligned_alloc, aligned_alloc); 2165 __weak_reference(__malloc, malloc); 2166 __weak_reference(__calloc, calloc); 2167 __weak_reference(__posix_memalign, posix_memalign); 2168 __weak_reference(__realloc, realloc); 2169 __weak_reference(__free, free); 2170 __weak_reference(__malloc_usable_size, malloc_usable_size); 2171