1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * Oversized allocations employ the BIGCACHE mechanic whereby large 88 * allocations may be handed significantly larger buffers, allowing them 89 * to avoid mmap/munmap operations even through significant realloc()s. 90 * The excess space is only trimmed if too many large allocations have been 91 * given this treatment. 92 * 93 * TUNING 94 * 95 * The value of the environment variable MALLOC_OPTIONS is a character string 96 * containing various flags to tune nmalloc. 97 * 98 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 99 * This will generate utrace events for all malloc, 100 * realloc, and free calls. There are tools (mtrplay) to 101 * replay and allocation pattern or to graph heap structure 102 * (mtrgraph) which can interpret these logs. 103 * 'Z' / ['z'] Zero out / do not zero all allocations. 104 * Each new byte of memory allocated by malloc, realloc, or 105 * reallocf will be initialized to 0. This is intended for 106 * debugging and will affect performance negatively. 107 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 108 * allocation functions. 109 */ 110 111 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 112 113 #include "libc_private.h" 114 115 #include <sys/param.h> 116 #include <sys/types.h> 117 #include <sys/mman.h> 118 #include <sys/queue.h> 119 #include <sys/uio.h> 120 #include <sys/ktrace.h> 121 #include <stdio.h> 122 #include <stdint.h> 123 #include <stdlib.h> 124 #include <stdarg.h> 125 #include <stddef.h> 126 #include <unistd.h> 127 #include <string.h> 128 #include <fcntl.h> 129 #include <errno.h> 130 #include <pthread.h> 131 #include <machine/atomic.h> 132 133 #include "spinlock.h" 134 #include "un-namespace.h" 135 136 137 /* 138 * Linked list of large allocations 139 */ 140 typedef struct bigalloc { 141 struct bigalloc *next; /* hash link */ 142 void *base; /* base pointer */ 143 u_long active; /* bytes active */ 144 u_long bytes; /* bytes allocated */ 145 } *bigalloc_t; 146 147 /* 148 * Note that any allocations which are exact multiples of PAGE_SIZE, or 149 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 150 */ 151 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 152 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 153 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 154 #define ZALLOC_ZONE_SIZE (64 * 1024) 155 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 156 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 157 158 #if ZALLOC_ZONE_LIMIT == 16384 159 #define NZONES 72 160 #elif ZALLOC_ZONE_LIMIT == 32768 161 #define NZONES 80 162 #else 163 #error "I couldn't figure out NZONES" 164 #endif 165 166 /* 167 * Chunk structure for free elements 168 */ 169 typedef struct slchunk { 170 struct slchunk *c_Next; 171 } *slchunk_t; 172 173 /* 174 * The IN-BAND zone header is placed at the beginning of each zone. 175 */ 176 struct slglobaldata; 177 178 typedef struct slzone { 179 int32_t z_Magic; /* magic number for sanity check */ 180 int z_NFree; /* total free chunks / ualloc space */ 181 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 182 int z_NMax; /* maximum free chunks */ 183 char *z_BasePtr; /* pointer to start of chunk array */ 184 int z_UIndex; /* current initial allocation index */ 185 int z_UEndIndex; /* last (first) allocation index */ 186 int z_ChunkSize; /* chunk size for validation */ 187 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 188 int z_ZoneIndex; 189 int z_Flags; 190 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 191 } *slzone_t; 192 193 typedef struct slglobaldata { 194 spinlock_t Spinlock; 195 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 196 int JunkIndex; 197 } *slglobaldata_t; 198 199 #define SLZF_UNOTZEROD 0x0001 200 201 #define FASTSLABREALLOC 0x02 202 203 /* 204 * Misc constants. Note that allocations that are exact multiples of 205 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 206 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 207 */ 208 #define MIN_CHUNK_SIZE 8 /* in bytes */ 209 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 210 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 211 212 /* 213 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 214 * not be larger then 64. 215 */ 216 #define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) 217 218 #define BIGHSHIFT 10 /* bigalloc hash table */ 219 #define BIGHSIZE (1 << BIGHSHIFT) 220 #define BIGHMASK (BIGHSIZE - 1) 221 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 222 #define BIGXMASK (BIGXSIZE - 1) 223 224 /* 225 * BIGCACHE caches oversized allocations. Note that a linear search is 226 * performed, so do not make the cache too large. 227 * 228 * BIGCACHE will garbage-collect excess space when the excess exceeds the 229 * specified value. A relatively large number should be used here because 230 * garbage collection is expensive. 231 */ 232 #define BIGCACHE 16 233 #define BIGCACHE_MASK (BIGCACHE - 1) 234 #define BIGCACHE_LIMIT (1024 * 1024) /* size limit */ 235 #define BIGCACHE_EXCESS (16 * 1024 * 1024) /* garbage collect */ 236 237 #define SAFLAG_ZERO 0x0001 238 #define SAFLAG_PASSIVE 0x0002 239 240 /* 241 * Thread control 242 */ 243 244 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 245 246 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 247 _mpanic("assertion: %s in %s", \ 248 #exp, __func__); \ 249 } while (0) 250 251 /* 252 * Magazines 253 */ 254 255 #define M_MAX_ROUNDS 64 256 #define M_ZONE_ROUNDS 64 257 #define M_LOW_ROUNDS 32 258 #define M_INIT_ROUNDS 8 259 #define M_BURST_FACTOR 8 260 #define M_BURST_NSCALE 2 261 262 #define M_BURST 0x0001 263 #define M_BURST_EARLY 0x0002 264 265 struct magazine { 266 SLIST_ENTRY(magazine) nextmagazine; 267 268 int flags; 269 int capacity; /* Max rounds in this magazine */ 270 int rounds; /* Current number of free rounds */ 271 int burst_factor; /* Number of blocks to prefill with */ 272 int low_factor; /* Free till low_factor from full mag */ 273 void *objects[M_MAX_ROUNDS]; 274 }; 275 276 SLIST_HEAD(magazinelist, magazine); 277 278 static spinlock_t zone_mag_lock; 279 static struct magazine zone_magazine = { 280 .flags = M_BURST | M_BURST_EARLY, 281 .capacity = M_ZONE_ROUNDS, 282 .rounds = 0, 283 .burst_factor = M_BURST_FACTOR, 284 .low_factor = M_LOW_ROUNDS 285 }; 286 287 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 288 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 289 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 290 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 291 292 /* 293 * Each thread will have a pair of magazines per size-class (NZONES) 294 * The loaded magazine will support immediate allocations, the previous 295 * magazine will either be full or empty and can be swapped at need 296 */ 297 typedef struct magazine_pair { 298 struct magazine *loaded; 299 struct magazine *prev; 300 } magazine_pair; 301 302 /* A depot is a collection of magazines for a single zone. */ 303 typedef struct magazine_depot { 304 struct magazinelist full; 305 struct magazinelist empty; 306 spinlock_t lock; 307 } magazine_depot; 308 309 typedef struct thr_mags { 310 magazine_pair mags[NZONES]; 311 struct magazine *newmag; 312 int init; 313 } thr_mags; 314 315 /* 316 * With this attribute set, do not require a function call for accessing 317 * this variable when the code is compiled -fPIC. Empty for libc_rtld 318 * (like __thread). 319 */ 320 #ifdef __LIBC_RTLD 321 #define TLS_ATTRIBUTE 322 #else 323 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) 324 #endif 325 326 static int mtmagazine_free_live; 327 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 328 static pthread_key_t thread_mags_key; 329 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 330 static magazine_depot depots[NZONES]; 331 332 /* 333 * Fixed globals (not per-cpu) 334 */ 335 static const int ZoneSize = ZALLOC_ZONE_SIZE; 336 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 337 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 338 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 339 340 static int opt_madvise = 0; 341 static int opt_utrace = 0; 342 static int g_malloc_flags = 0; 343 static struct slglobaldata SLGlobalData; 344 static bigalloc_t bigalloc_array[BIGHSIZE]; 345 static spinlock_t bigspin_array[BIGXSIZE]; 346 static volatile void *bigcache_array[BIGCACHE]; /* atomic swap */ 347 static volatile size_t bigcache_size_array[BIGCACHE]; /* SMP races ok */ 348 static volatile int bigcache_index; /* SMP races ok */ 349 static int malloc_panic; 350 static int malloc_dummy_pointer; 351 static size_t excess_alloc; /* excess big allocs */ 352 353 static void *_slaballoc(size_t size, int flags); 354 static void *_slabrealloc(void *ptr, size_t size); 355 static void _slabfree(void *ptr, int, bigalloc_t *); 356 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 357 static void _vmem_free(void *ptr, size_t bytes); 358 static void *magazine_alloc(struct magazine *, int *); 359 static int magazine_free(struct magazine *, void *); 360 static void *mtmagazine_alloc(int zi); 361 static int mtmagazine_free(int zi, void *); 362 static void mtmagazine_init(void); 363 static void mtmagazine_destructor(void *); 364 static slzone_t zone_alloc(int flags); 365 static void zone_free(void *z); 366 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 367 static void malloc_init(void) __constructor(101); 368 369 struct nmalloc_utrace { 370 void *p; 371 size_t s; 372 void *r; 373 }; 374 375 #define UTRACE(a, b, c) \ 376 if (opt_utrace) { \ 377 struct nmalloc_utrace ut = { \ 378 .p = (a), \ 379 .s = (b), \ 380 .r = (c) \ 381 }; \ 382 utrace(&ut, sizeof(ut)); \ 383 } 384 385 static void 386 malloc_init(void) 387 { 388 const char *p = NULL; 389 390 if (issetugid() == 0) 391 p = getenv("MALLOC_OPTIONS"); 392 393 for (; p != NULL && *p != '\0'; p++) { 394 switch(*p) { 395 case 'u': opt_utrace = 0; break; 396 case 'U': opt_utrace = 1; break; 397 case 'h': opt_madvise = 0; break; 398 case 'H': opt_madvise = 1; break; 399 case 'z': g_malloc_flags = 0; break; 400 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 401 default: 402 break; 403 } 404 } 405 406 UTRACE((void *) -1, 0, NULL); 407 } 408 409 /* 410 * We have to install a handler for nmalloc thread teardowns when 411 * the thread is created. We cannot delay this because destructors in 412 * sophisticated userland programs can call malloc() for the first time 413 * during their thread exit. 414 * 415 * This routine is called directly from pthreads. 416 */ 417 void 418 _nmalloc_thr_init(void) 419 { 420 thr_mags *tp; 421 422 /* 423 * Disallow mtmagazine operations until the mtmagazine is 424 * initialized. 425 */ 426 tp = &thread_mags; 427 tp->init = -1; 428 429 if (mtmagazine_free_live == 0) { 430 mtmagazine_free_live = 1; 431 pthread_once(&thread_mags_once, mtmagazine_init); 432 } 433 pthread_setspecific(thread_mags_key, tp); 434 tp->init = 1; 435 } 436 437 /* 438 * Thread locks. 439 */ 440 static __inline void 441 slgd_lock(slglobaldata_t slgd) 442 { 443 if (__isthreaded) 444 _SPINLOCK(&slgd->Spinlock); 445 } 446 447 static __inline void 448 slgd_unlock(slglobaldata_t slgd) 449 { 450 if (__isthreaded) 451 _SPINUNLOCK(&slgd->Spinlock); 452 } 453 454 static __inline void 455 depot_lock(magazine_depot *dp) 456 { 457 if (__isthreaded) 458 _SPINLOCK(&dp->lock); 459 } 460 461 static __inline void 462 depot_unlock(magazine_depot *dp) 463 { 464 if (__isthreaded) 465 _SPINUNLOCK(&dp->lock); 466 } 467 468 static __inline void 469 zone_magazine_lock(void) 470 { 471 if (__isthreaded) 472 _SPINLOCK(&zone_mag_lock); 473 } 474 475 static __inline void 476 zone_magazine_unlock(void) 477 { 478 if (__isthreaded) 479 _SPINUNLOCK(&zone_mag_lock); 480 } 481 482 static __inline void 483 swap_mags(magazine_pair *mp) 484 { 485 struct magazine *tmp; 486 tmp = mp->loaded; 487 mp->loaded = mp->prev; 488 mp->prev = tmp; 489 } 490 491 /* 492 * bigalloc hashing and locking support. 493 * 494 * Return an unmasked hash code for the passed pointer. 495 */ 496 static __inline int 497 _bigalloc_hash(void *ptr) 498 { 499 int hv; 500 501 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 502 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 503 504 return(hv); 505 } 506 507 /* 508 * Lock the hash chain and return a pointer to its base for the specified 509 * address. 510 */ 511 static __inline bigalloc_t * 512 bigalloc_lock(void *ptr) 513 { 514 int hv = _bigalloc_hash(ptr); 515 bigalloc_t *bigp; 516 517 bigp = &bigalloc_array[hv & BIGHMASK]; 518 if (__isthreaded) 519 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 520 return(bigp); 521 } 522 523 /* 524 * Lock the hash chain and return a pointer to its base for the specified 525 * address. 526 * 527 * BUT, if the hash chain is empty, just return NULL and do not bother 528 * to lock anything. 529 */ 530 static __inline bigalloc_t * 531 bigalloc_check_and_lock(void *ptr) 532 { 533 int hv = _bigalloc_hash(ptr); 534 bigalloc_t *bigp; 535 536 bigp = &bigalloc_array[hv & BIGHMASK]; 537 if (*bigp == NULL) 538 return(NULL); 539 if (__isthreaded) { 540 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 541 } 542 return(bigp); 543 } 544 545 static __inline void 546 bigalloc_unlock(void *ptr) 547 { 548 int hv; 549 550 if (__isthreaded) { 551 hv = _bigalloc_hash(ptr); 552 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 553 } 554 } 555 556 /* 557 * Find a bigcache entry that might work for the allocation. SMP races are 558 * ok here except for the swap (that is, it is ok if bigcache_size_array[i] 559 * is wrong or if a NULL or too-small big is returned). 560 * 561 * Generally speaking it is ok to find a large entry even if the bytes 562 * requested are relatively small (but still oversized), because we really 563 * don't know *what* the application is going to do with the buffer. 564 */ 565 static __inline 566 bigalloc_t 567 bigcache_find_alloc(size_t bytes) 568 { 569 bigalloc_t big = NULL; 570 size_t test; 571 int i; 572 573 for (i = 0; i < BIGCACHE; ++i) { 574 test = bigcache_size_array[i]; 575 if (bytes <= test) { 576 bigcache_size_array[i] = 0; 577 big = atomic_swap_ptr(&bigcache_array[i], NULL); 578 break; 579 } 580 } 581 return big; 582 } 583 584 /* 585 * Free a bigcache entry, possibly returning one that the caller really must 586 * free. This is used to cache recent oversized memory blocks. Only 587 * big blocks smaller than BIGCACHE_LIMIT will be cached this way, so try 588 * to collect the biggest ones we can that are under the limit. 589 */ 590 static __inline 591 bigalloc_t 592 bigcache_find_free(bigalloc_t big) 593 { 594 int i; 595 int j; 596 int b; 597 598 b = ++bigcache_index; 599 for (i = 0; i < BIGCACHE; ++i) { 600 j = (b + i) & BIGCACHE_MASK; 601 if (bigcache_size_array[j] < big->bytes) { 602 bigcache_size_array[j] = big->bytes; 603 big = atomic_swap_ptr(&bigcache_array[j], big); 604 break; 605 } 606 } 607 return big; 608 } 609 610 static __inline 611 void 612 handle_excess_big(void) 613 { 614 int i; 615 bigalloc_t big; 616 bigalloc_t *bigp; 617 618 if (excess_alloc <= BIGCACHE_EXCESS) 619 return; 620 621 for (i = 0; i < BIGHSIZE; ++i) { 622 bigp = &bigalloc_array[i]; 623 if (*bigp == NULL) 624 continue; 625 if (__isthreaded) 626 _SPINLOCK(&bigspin_array[i & BIGXMASK]); 627 for (big = *bigp; big; big = big->next) { 628 if (big->active < big->bytes) { 629 MASSERT((big->active & PAGE_MASK) == 0); 630 MASSERT((big->bytes & PAGE_MASK) == 0); 631 munmap((char *)big->base + big->active, 632 big->bytes - big->active); 633 atomic_add_long(&excess_alloc, 634 big->active - big->bytes); 635 big->bytes = big->active; 636 } 637 } 638 if (__isthreaded) 639 _SPINUNLOCK(&bigspin_array[i & BIGXMASK]); 640 } 641 } 642 643 /* 644 * Calculate the zone index for the allocation request size and set the 645 * allocation request size to that particular zone's chunk size. 646 */ 647 static __inline int 648 zoneindex(size_t *bytes, size_t *chunking) 649 { 650 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 651 652 /* 653 * This used to be 8-byte chunks and 16 zones for n < 128. 654 * However some instructions may require 16-byte alignment 655 * (aka SIMD) and programs might not request an aligned size 656 * (aka GCC-7), so change this as follows: 657 * 658 * 0-15 bytes 8-byte alignment in two zones (0-1) 659 * 16-127 bytes 16-byte alignment in four zones (3-10) 660 * zone index 2 and 11-15 are currently unused. 661 */ 662 if (n < 16) { 663 *bytes = n = (n + 7) & ~7; 664 *chunking = 8; 665 return(n / 8 - 1); /* 8 byte chunks, 2 zones */ 666 /* zones 0,1, zone 2 is unused */ 667 } 668 if (n < 128) { 669 *bytes = n = (n + 15) & ~15; 670 *chunking = 16; 671 return(n / 16 + 2); /* 16 byte chunks, 8 zones */ 672 /* zones 3-10, zones 11-15 unused */ 673 } 674 if (n < 256) { 675 *bytes = n = (n + 15) & ~15; 676 *chunking = 16; 677 return(n / 16 + 7); 678 } 679 if (n < 8192) { 680 if (n < 512) { 681 *bytes = n = (n + 31) & ~31; 682 *chunking = 32; 683 return(n / 32 + 15); 684 } 685 if (n < 1024) { 686 *bytes = n = (n + 63) & ~63; 687 *chunking = 64; 688 return(n / 64 + 23); 689 } 690 if (n < 2048) { 691 *bytes = n = (n + 127) & ~127; 692 *chunking = 128; 693 return(n / 128 + 31); 694 } 695 if (n < 4096) { 696 *bytes = n = (n + 255) & ~255; 697 *chunking = 256; 698 return(n / 256 + 39); 699 } 700 *bytes = n = (n + 511) & ~511; 701 *chunking = 512; 702 return(n / 512 + 47); 703 } 704 #if ZALLOC_ZONE_LIMIT > 8192 705 if (n < 16384) { 706 *bytes = n = (n + 1023) & ~1023; 707 *chunking = 1024; 708 return(n / 1024 + 55); 709 } 710 #endif 711 #if ZALLOC_ZONE_LIMIT > 16384 712 if (n < 32768) { 713 *bytes = n = (n + 2047) & ~2047; 714 *chunking = 2048; 715 return(n / 2048 + 63); 716 } 717 #endif 718 _mpanic("Unexpected byte count %zu", n); 719 return(0); 720 } 721 722 /* 723 * malloc() - call internal slab allocator 724 */ 725 void * 726 __malloc(size_t size) 727 { 728 void *ptr; 729 730 ptr = _slaballoc(size, 0); 731 if (ptr == NULL) 732 errno = ENOMEM; 733 else 734 UTRACE(0, size, ptr); 735 return(ptr); 736 } 737 738 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 739 740 /* 741 * calloc() - call internal slab allocator 742 */ 743 void * 744 __calloc(size_t number, size_t size) 745 { 746 void *ptr; 747 748 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 749 number > 0 && SIZE_MAX / number < size) { 750 errno = ENOMEM; 751 return(NULL); 752 } 753 754 ptr = _slaballoc(number * size, SAFLAG_ZERO); 755 if (ptr == NULL) 756 errno = ENOMEM; 757 else 758 UTRACE(0, number * size, ptr); 759 return(ptr); 760 } 761 762 /* 763 * realloc() (SLAB ALLOCATOR) 764 * 765 * We do not attempt to optimize this routine beyond reusing the same 766 * pointer if the new size fits within the chunking of the old pointer's 767 * zone. 768 */ 769 void * 770 __realloc(void *ptr, size_t size) 771 { 772 void *ret; 773 ret = _slabrealloc(ptr, size); 774 if (ret == NULL) 775 errno = ENOMEM; 776 else 777 UTRACE(ptr, size, ret); 778 return(ret); 779 } 780 781 /* 782 * posix_memalign() 783 * 784 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 785 * is a power of 2 >= sizeof(void *). 786 * 787 * The slab allocator will allocate on power-of-2 boundaries up to 788 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 789 * zone matching the requirements, and _vmem_alloc() otherwise. 790 */ 791 int 792 __posix_memalign(void **memptr, size_t alignment, size_t size) 793 { 794 bigalloc_t *bigp; 795 bigalloc_t big; 796 size_t chunking; 797 int zi __unused; 798 799 /* 800 * OpenGroup spec issue 6 checks 801 */ 802 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 803 *memptr = NULL; 804 return(EINVAL); 805 } 806 if (alignment < sizeof(void *)) { 807 *memptr = NULL; 808 return(EINVAL); 809 } 810 811 /* 812 * Our zone mechanism guarantees same-sized alignment for any 813 * power-of-2 allocation. If size is a power-of-2 and reasonable 814 * we can just call _slaballoc() and be done. We round size up 815 * to the nearest alignment boundary to improve our odds of 816 * it becoming a power-of-2 if it wasn't before. 817 */ 818 if (size <= alignment) 819 size = alignment; 820 else 821 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 822 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 823 *memptr = _slaballoc(size, 0); 824 return(*memptr ? 0 : ENOMEM); 825 } 826 827 /* 828 * Otherwise locate a zone with a chunking that matches 829 * the requested alignment, within reason. Consider two cases: 830 * 831 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 832 * we find will be the best fit because the chunking will be 833 * greater or equal to the alignment. 834 * 835 * (2) A 513 allocation on a 256-byte alignment. In this case 836 * the first zoneindex we find will be for 576 byte allocations 837 * with a chunking of 64, which is not sufficient. To fix this 838 * we simply find the nearest power-of-2 >= size and use the 839 * same side-effect of _slaballoc() which guarantees 840 * same-alignment on a power-of-2 allocation. 841 */ 842 if (size < PAGE_SIZE) { 843 zi = zoneindex(&size, &chunking); 844 if (chunking >= alignment) { 845 *memptr = _slaballoc(size, 0); 846 return(*memptr ? 0 : ENOMEM); 847 } 848 if (size >= 1024) 849 alignment = 1024; 850 if (size >= 16384) 851 alignment = 16384; 852 while (alignment < size) 853 alignment <<= 1; 854 *memptr = _slaballoc(alignment, 0); 855 return(*memptr ? 0 : ENOMEM); 856 } 857 858 /* 859 * If the slab allocator cannot handle it use vmem_alloc(). 860 * 861 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 862 */ 863 if (alignment < PAGE_SIZE) 864 alignment = PAGE_SIZE; 865 if (size < alignment) 866 size = alignment; 867 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 868 *memptr = _vmem_alloc(size, alignment, 0); 869 if (*memptr == NULL) 870 return(ENOMEM); 871 872 big = _slaballoc(sizeof(struct bigalloc), 0); 873 if (big == NULL) { 874 _vmem_free(*memptr, size); 875 *memptr = NULL; 876 return(ENOMEM); 877 } 878 bigp = bigalloc_lock(*memptr); 879 big->base = *memptr; 880 big->active = size; 881 big->bytes = size; /* no excess */ 882 big->next = *bigp; 883 *bigp = big; 884 bigalloc_unlock(*memptr); 885 886 return(0); 887 } 888 889 /* 890 * free() (SLAB ALLOCATOR) - do the obvious 891 */ 892 void 893 __free(void *ptr) 894 { 895 UTRACE(ptr, 0, 0); 896 _slabfree(ptr, 0, NULL); 897 } 898 899 /* 900 * _slaballoc() (SLAB ALLOCATOR) 901 * 902 * Allocate memory via the slab allocator. If the request is too large, 903 * or if it page-aligned beyond a certain size, we fall back to the 904 * KMEM subsystem 905 */ 906 static void * 907 _slaballoc(size_t size, int flags) 908 { 909 slzone_t z; 910 slchunk_t chunk; 911 slglobaldata_t slgd; 912 size_t chunking; 913 int zi; 914 int off; 915 void *obj; 916 917 /* 918 * Handle the degenerate size == 0 case. Yes, this does happen. 919 * Return a special pointer. This is to maintain compatibility with 920 * the original malloc implementation. Certain devices, such as the 921 * adaptec driver, not only allocate 0 bytes, they check for NULL and 922 * also realloc() later on. Joy. 923 */ 924 if (size == 0) 925 return(ZERO_LENGTH_PTR); 926 927 /* Capture global flags */ 928 flags |= g_malloc_flags; 929 930 /* 931 * Handle large allocations directly. There should not be very many 932 * of these so performance is not a big issue. 933 * 934 * The backend allocator is pretty nasty on a SMP system. Use the 935 * slab allocator for one and two page-sized chunks even though we 936 * lose some efficiency. 937 */ 938 if (size >= ZoneLimit || 939 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 940 bigalloc_t big; 941 bigalloc_t *bigp; 942 943 /* 944 * Page-align and cache-color in case of virtually indexed 945 * physically tagged L1 caches (aka SandyBridge). No sweat 946 * otherwise, so just do it. 947 * 948 * (don't count as excess). 949 */ 950 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 951 if ((size & (PAGE_SIZE * 2 - 1)) == 0) 952 size += PAGE_SIZE; 953 954 /* 955 * Try to reuse a cached big block to avoid mmap'ing. If it 956 * turns out not to fit our requirements we throw it away 957 * and allocate normally. 958 */ 959 big = NULL; 960 if (size <= BIGCACHE_LIMIT) { 961 big = bigcache_find_alloc(size); 962 if (big && big->bytes < size) { 963 _slabfree(big->base, FASTSLABREALLOC, &big); 964 big = NULL; 965 } 966 } 967 if (big) { 968 chunk = big->base; 969 if (flags & SAFLAG_ZERO) 970 bzero(chunk, size); 971 } else { 972 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 973 if (chunk == NULL) 974 return(NULL); 975 976 big = _slaballoc(sizeof(struct bigalloc), 0); 977 if (big == NULL) { 978 _vmem_free(chunk, size); 979 return(NULL); 980 } 981 big->base = chunk; 982 big->bytes = size; 983 } 984 big->active = size; 985 986 bigp = bigalloc_lock(chunk); 987 if (big->active < big->bytes) { 988 atomic_add_long(&excess_alloc, 989 big->bytes - big->active); 990 } 991 big->next = *bigp; 992 *bigp = big; 993 bigalloc_unlock(chunk); 994 handle_excess_big(); 995 996 return(chunk); 997 } 998 999 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 1000 zi = zoneindex(&size, &chunking); 1001 MASSERT(zi < NZONES); 1002 1003 obj = mtmagazine_alloc(zi); 1004 if (obj != NULL) { 1005 if (flags & SAFLAG_ZERO) 1006 bzero(obj, size); 1007 return (obj); 1008 } 1009 1010 slgd = &SLGlobalData; 1011 slgd_lock(slgd); 1012 1013 /* 1014 * Attempt to allocate out of an existing zone. If all zones are 1015 * exhausted pull one off the free list or allocate a new one. 1016 */ 1017 if ((z = slgd->ZoneAry[zi]) == NULL) { 1018 z = zone_alloc(flags); 1019 if (z == NULL) 1020 goto fail; 1021 1022 /* 1023 * How big is the base structure? 1024 */ 1025 off = sizeof(struct slzone); 1026 1027 /* 1028 * Align the storage in the zone based on the chunking. 1029 * 1030 * Guarantee power-of-2 alignment for power-of-2-sized 1031 * chunks. Otherwise align based on the chunking size 1032 * (typically 8 or 16 bytes for small allocations). 1033 * 1034 * NOTE: Allocations >= ZoneLimit are governed by the 1035 * bigalloc code and typically only guarantee page-alignment. 1036 * 1037 * Set initial conditions for UIndex near the zone header 1038 * to reduce unecessary page faults, vs semi-randomization 1039 * to improve L1 cache saturation. 1040 */ 1041 if ((size | (size - 1)) + 1 == (size << 1)) 1042 off = roundup2(off, size); 1043 else 1044 off = roundup2(off, chunking); 1045 z->z_Magic = ZALLOC_SLAB_MAGIC; 1046 z->z_ZoneIndex = zi; 1047 z->z_NMax = (ZoneSize - off) / size; 1048 z->z_NFree = z->z_NMax; 1049 z->z_BasePtr = (char *)z + off; 1050 z->z_UIndex = z->z_UEndIndex = 0; 1051 z->z_ChunkSize = size; 1052 z->z_FirstFreePg = ZonePageCount; 1053 z->z_Next = slgd->ZoneAry[zi]; 1054 slgd->ZoneAry[zi] = z; 1055 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1056 flags &= ~SAFLAG_ZERO; /* already zero'd */ 1057 flags |= SAFLAG_PASSIVE; 1058 } 1059 1060 /* 1061 * Slide the base index for initial allocations out of the 1062 * next zone we create so we do not over-weight the lower 1063 * part of the cpu memory caches. 1064 */ 1065 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 1066 & (ZALLOC_MAX_ZONE_SIZE - 1); 1067 } 1068 1069 /* 1070 * Ok, we have a zone from which at least one chunk is available. 1071 * 1072 * Remove us from the ZoneAry[] when we become empty 1073 */ 1074 MASSERT(z->z_NFree > 0); 1075 1076 if (--z->z_NFree == 0) { 1077 slgd->ZoneAry[zi] = z->z_Next; 1078 z->z_Next = NULL; 1079 } 1080 1081 /* 1082 * Locate a chunk in a free page. This attempts to localize 1083 * reallocations into earlier pages without us having to sort 1084 * the chunk list. A chunk may still overlap a page boundary. 1085 */ 1086 while (z->z_FirstFreePg < ZonePageCount) { 1087 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 1088 MASSERT((uintptr_t)chunk & ZoneMask); 1089 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 1090 goto done; 1091 } 1092 ++z->z_FirstFreePg; 1093 } 1094 1095 /* 1096 * No chunks are available but NFree said we had some memory, 1097 * so it must be available in the never-before-used-memory 1098 * area governed by UIndex. The consequences are very 1099 * serious if our zone got corrupted so we use an explicit 1100 * panic rather then a KASSERT. 1101 */ 1102 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 1103 1104 if (++z->z_UIndex == z->z_NMax) 1105 z->z_UIndex = 0; 1106 if (z->z_UIndex == z->z_UEndIndex) { 1107 if (z->z_NFree != 0) 1108 _mpanic("slaballoc: corrupted zone"); 1109 } 1110 1111 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1112 flags &= ~SAFLAG_ZERO; 1113 flags |= SAFLAG_PASSIVE; 1114 } 1115 1116 done: 1117 slgd_unlock(slgd); 1118 if (flags & SAFLAG_ZERO) 1119 bzero(chunk, size); 1120 return(chunk); 1121 fail: 1122 slgd_unlock(slgd); 1123 return(NULL); 1124 } 1125 1126 /* 1127 * Reallocate memory within the chunk 1128 */ 1129 static void * 1130 _slabrealloc(void *ptr, size_t size) 1131 { 1132 bigalloc_t *bigp; 1133 void *nptr; 1134 slzone_t z; 1135 size_t chunking; 1136 1137 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) { 1138 return(_slaballoc(size, 0)); 1139 } 1140 1141 if (size == 0) { 1142 free(ptr); 1143 return(ZERO_LENGTH_PTR); 1144 } 1145 1146 /* 1147 * Handle oversized allocations. 1148 */ 1149 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1150 bigalloc_t big; 1151 size_t bigbytes; 1152 1153 while ((big = *bigp) != NULL) { 1154 if (big->base == ptr) { 1155 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1156 bigbytes = big->bytes; 1157 1158 /* 1159 * If it already fits determine if it makes 1160 * sense to shrink/reallocate. Try to optimize 1161 * programs which stupidly make incremental 1162 * reallocations larger or smaller by scaling 1163 * the allocation. Also deal with potential 1164 * coloring. 1165 */ 1166 if (size >= (bigbytes >> 1) && 1167 size <= bigbytes) { 1168 if (big->active != size) { 1169 atomic_add_long(&excess_alloc, 1170 big->active - 1171 size); 1172 } 1173 big->active = size; 1174 bigalloc_unlock(ptr); 1175 return(ptr); 1176 } 1177 1178 /* 1179 * For large reallocations, allocate more space 1180 * than we need to try to avoid excessive 1181 * reallocations later on. 1182 */ 1183 chunking = size + (size >> 3); 1184 chunking = (chunking + PAGE_MASK) & 1185 ~(size_t)PAGE_MASK; 1186 1187 /* 1188 * Try to allocate adjacently in case the 1189 * program is idiotically realloc()ing a 1190 * huge memory block just slightly bigger. 1191 * (llvm's llc tends to do this a lot). 1192 * 1193 * (MAP_TRYFIXED forces mmap to fail if there 1194 * is already something at the address). 1195 */ 1196 if (chunking > bigbytes) { 1197 char *addr; 1198 int errno_save = errno; 1199 1200 addr = mmap((char *)ptr + bigbytes, 1201 chunking - bigbytes, 1202 PROT_READ|PROT_WRITE, 1203 MAP_PRIVATE|MAP_ANON| 1204 MAP_TRYFIXED, 1205 -1, 0); 1206 errno = errno_save; 1207 if (addr == (char *)ptr + bigbytes) { 1208 atomic_add_long(&excess_alloc, 1209 big->active - 1210 big->bytes + 1211 chunking - 1212 size); 1213 big->bytes = chunking; 1214 big->active = size; 1215 bigalloc_unlock(ptr); 1216 1217 return(ptr); 1218 } 1219 MASSERT((void *)addr == MAP_FAILED); 1220 } 1221 1222 /* 1223 * Failed, unlink big and allocate fresh. 1224 * (note that we have to leave (big) intact 1225 * in case the slaballoc fails). 1226 */ 1227 *bigp = big->next; 1228 bigalloc_unlock(ptr); 1229 if ((nptr = _slaballoc(size, 0)) == NULL) { 1230 /* Relink block */ 1231 bigp = bigalloc_lock(ptr); 1232 big->next = *bigp; 1233 *bigp = big; 1234 bigalloc_unlock(ptr); 1235 return(NULL); 1236 } 1237 if (size > bigbytes) 1238 size = bigbytes; 1239 bcopy(ptr, nptr, size); 1240 atomic_add_long(&excess_alloc, big->active - 1241 big->bytes); 1242 _slabfree(ptr, FASTSLABREALLOC, &big); 1243 1244 return(nptr); 1245 } 1246 bigp = &big->next; 1247 } 1248 bigalloc_unlock(ptr); 1249 handle_excess_big(); 1250 } 1251 1252 /* 1253 * Get the original allocation's zone. If the new request winds 1254 * up using the same chunk size we do not have to do anything. 1255 * 1256 * NOTE: We don't have to lock the globaldata here, the fields we 1257 * access here will not change at least as long as we have control 1258 * over the allocation. 1259 */ 1260 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1261 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1262 1263 /* 1264 * Use zoneindex() to chunk-align the new size, as long as the 1265 * new size is not too large. 1266 */ 1267 if (size < ZoneLimit) { 1268 zoneindex(&size, &chunking); 1269 if (z->z_ChunkSize == size) { 1270 return(ptr); 1271 } 1272 } 1273 1274 /* 1275 * Allocate memory for the new request size and copy as appropriate. 1276 */ 1277 if ((nptr = _slaballoc(size, 0)) != NULL) { 1278 if (size > z->z_ChunkSize) 1279 size = z->z_ChunkSize; 1280 bcopy(ptr, nptr, size); 1281 _slabfree(ptr, 0, NULL); 1282 } 1283 1284 return(nptr); 1285 } 1286 1287 /* 1288 * free (SLAB ALLOCATOR) 1289 * 1290 * Free a memory block previously allocated by malloc. Note that we do not 1291 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1292 * checking memory limits in malloc. 1293 * 1294 * flags: 1295 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1296 * unlinked. 1297 * 1298 * MPSAFE 1299 */ 1300 static void 1301 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1302 { 1303 slzone_t z; 1304 slchunk_t chunk; 1305 bigalloc_t big; 1306 bigalloc_t *bigp; 1307 slglobaldata_t slgd; 1308 size_t size; 1309 int zi; 1310 int pgno; 1311 1312 /* Fast realloc path for big allocations */ 1313 if (flags & FASTSLABREALLOC) { 1314 big = *rbigp; 1315 goto fastslabrealloc; 1316 } 1317 1318 /* 1319 * Handle NULL frees and special 0-byte allocations 1320 */ 1321 if (ptr == NULL) 1322 return; 1323 if (ptr == ZERO_LENGTH_PTR) 1324 return; 1325 1326 /* 1327 * Handle oversized allocations. 1328 */ 1329 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1330 while ((big = *bigp) != NULL) { 1331 if (big->base == ptr) { 1332 *bigp = big->next; 1333 atomic_add_long(&excess_alloc, big->active - 1334 big->bytes); 1335 bigalloc_unlock(ptr); 1336 1337 /* 1338 * Try to stash the block we are freeing, 1339 * potentially receiving another block in 1340 * return which must be freed. 1341 */ 1342 fastslabrealloc: 1343 if (big->bytes <= BIGCACHE_LIMIT) { 1344 big = bigcache_find_free(big); 1345 if (big == NULL) 1346 return; 1347 } 1348 ptr = big->base; /* reload */ 1349 size = big->bytes; 1350 _slabfree(big, 0, NULL); 1351 _vmem_free(ptr, size); 1352 return; 1353 } 1354 bigp = &big->next; 1355 } 1356 bigalloc_unlock(ptr); 1357 handle_excess_big(); 1358 } 1359 1360 /* 1361 * Zone case. Figure out the zone based on the fact that it is 1362 * ZoneSize aligned. 1363 */ 1364 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1365 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1366 1367 size = z->z_ChunkSize; 1368 zi = z->z_ZoneIndex; 1369 1370 if (g_malloc_flags & SAFLAG_ZERO) 1371 bzero(ptr, size); 1372 1373 if (mtmagazine_free(zi, ptr) == 0) 1374 return; 1375 1376 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1377 chunk = ptr; 1378 slgd = &SLGlobalData; 1379 slgd_lock(slgd); 1380 1381 /* 1382 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1383 * z_FirstFreePg. 1384 */ 1385 chunk->c_Next = z->z_PageAry[pgno]; 1386 z->z_PageAry[pgno] = chunk; 1387 if (z->z_FirstFreePg > pgno) 1388 z->z_FirstFreePg = pgno; 1389 1390 /* 1391 * Bump the number of free chunks. If it becomes non-zero the zone 1392 * must be added back onto the appropriate list. 1393 */ 1394 if (z->z_NFree++ == 0) { 1395 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1396 slgd->ZoneAry[z->z_ZoneIndex] = z; 1397 } 1398 1399 /* 1400 * If the zone becomes totally free then release it. 1401 */ 1402 if (z->z_NFree == z->z_NMax) { 1403 slzone_t *pz; 1404 1405 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1406 while (z != *pz) 1407 pz = &(*pz)->z_Next; 1408 *pz = z->z_Next; 1409 z->z_Magic = -1; 1410 z->z_Next = NULL; 1411 zone_free(z); 1412 /* slgd lock released */ 1413 return; 1414 } 1415 slgd_unlock(slgd); 1416 } 1417 1418 /* 1419 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1420 * if the magazine is empty. 1421 */ 1422 static __inline void * 1423 magazine_alloc(struct magazine *mp, int *burst) 1424 { 1425 void *obj; 1426 1427 if (mp == NULL) 1428 return(NULL); 1429 if (MAGAZINE_NOTEMPTY(mp)) { 1430 obj = mp->objects[--mp->rounds]; 1431 return(obj); 1432 } 1433 1434 /* 1435 * Return burst factor to caller along with NULL 1436 */ 1437 if ((mp->flags & M_BURST) && (burst != NULL)) { 1438 *burst = mp->burst_factor; 1439 } 1440 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1441 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1442 (burst != NULL)) { 1443 mp->burst_factor -= M_BURST_NSCALE; 1444 if (mp->burst_factor <= 1) { 1445 mp->burst_factor = 1; 1446 mp->flags &= ~(M_BURST); 1447 mp->flags &= ~(M_BURST_EARLY); 1448 } 1449 } 1450 return (NULL); 1451 } 1452 1453 static __inline int 1454 magazine_free(struct magazine *mp, void *p) 1455 { 1456 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1457 mp->objects[mp->rounds++] = p; 1458 return 0; 1459 } 1460 1461 return -1; 1462 } 1463 1464 static void * 1465 mtmagazine_alloc(int zi) 1466 { 1467 thr_mags *tp; 1468 struct magazine *mp, *emptymag; 1469 magazine_depot *d; 1470 void *obj; 1471 1472 /* 1473 * Do not try to access per-thread magazines while the mtmagazine 1474 * is being initialized or destroyed. 1475 */ 1476 tp = &thread_mags; 1477 if (tp->init < 0) 1478 return(NULL); 1479 1480 /* 1481 * Primary per-thread allocation loop 1482 */ 1483 for (;;) { 1484 /* 1485 * If the loaded magazine has rounds, allocate and return 1486 */ 1487 mp = tp->mags[zi].loaded; 1488 obj = magazine_alloc(mp, NULL); 1489 if (obj) 1490 break; 1491 1492 /* 1493 * If the prev magazine is full, swap with the loaded 1494 * magazine and retry. 1495 */ 1496 mp = tp->mags[zi].prev; 1497 if (mp && MAGAZINE_FULL(mp)) { 1498 MASSERT(mp->rounds != 0); 1499 swap_mags(&tp->mags[zi]); /* prev now empty */ 1500 continue; 1501 } 1502 1503 /* 1504 * Try to get a full magazine from the depot. Cycle 1505 * through depot(full)->loaded->prev->depot(empty). 1506 * Retry if a full magazine was available from the depot. 1507 * 1508 * Return NULL (caller will fall through) if no magazines 1509 * can be found anywhere. 1510 */ 1511 d = &depots[zi]; 1512 depot_lock(d); 1513 emptymag = tp->mags[zi].prev; 1514 if (emptymag) 1515 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1516 tp->mags[zi].prev = tp->mags[zi].loaded; 1517 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1518 tp->mags[zi].loaded = mp; 1519 if (mp) { 1520 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1521 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1522 depot_unlock(d); 1523 continue; 1524 } 1525 depot_unlock(d); 1526 break; 1527 } 1528 1529 return (obj); 1530 } 1531 1532 static int 1533 mtmagazine_free(int zi, void *ptr) 1534 { 1535 thr_mags *tp; 1536 struct magazine *mp, *loadedmag; 1537 magazine_depot *d; 1538 int rc = -1; 1539 1540 /* 1541 * Do not try to access per-thread magazines while the mtmagazine 1542 * is being initialized or destroyed. 1543 */ 1544 tp = &thread_mags; 1545 if (tp->init < 0) 1546 return(-1); 1547 1548 /* 1549 * Primary per-thread freeing loop 1550 */ 1551 for (;;) { 1552 /* 1553 * Make sure a new magazine is available in case we have 1554 * to use it. Staging the newmag allows us to avoid 1555 * some locking/reentrancy complexity. 1556 * 1557 * Temporarily disable the per-thread caches for this 1558 * allocation to avoid reentrancy and/or to avoid a 1559 * stack overflow if the [zi] happens to be the same that 1560 * would be used to allocate the new magazine. 1561 */ 1562 if (tp->newmag == NULL) { 1563 tp->init = -1; 1564 tp->newmag = _slaballoc(sizeof(struct magazine), 1565 SAFLAG_ZERO); 1566 tp->init = 1; 1567 if (tp->newmag == NULL) { 1568 rc = -1; 1569 break; 1570 } 1571 } 1572 1573 /* 1574 * If the loaded magazine has space, free directly to it 1575 */ 1576 rc = magazine_free(tp->mags[zi].loaded, ptr); 1577 if (rc == 0) 1578 break; 1579 1580 /* 1581 * If the prev magazine is empty, swap with the loaded 1582 * magazine and retry. 1583 */ 1584 mp = tp->mags[zi].prev; 1585 if (mp && MAGAZINE_EMPTY(mp)) { 1586 MASSERT(mp->rounds == 0); 1587 swap_mags(&tp->mags[zi]); /* prev now full */ 1588 continue; 1589 } 1590 1591 /* 1592 * Try to get an empty magazine from the depot. Cycle 1593 * through depot(empty)->loaded->prev->depot(full). 1594 * Retry if an empty magazine was available from the depot. 1595 */ 1596 d = &depots[zi]; 1597 depot_lock(d); 1598 1599 if ((loadedmag = tp->mags[zi].prev) != NULL) 1600 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1601 tp->mags[zi].prev = tp->mags[zi].loaded; 1602 mp = SLIST_FIRST(&d->empty); 1603 if (mp) { 1604 tp->mags[zi].loaded = mp; 1605 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1606 MASSERT(MAGAZINE_NOTFULL(mp)); 1607 } else { 1608 mp = tp->newmag; 1609 tp->newmag = NULL; 1610 mp->capacity = M_MAX_ROUNDS; 1611 mp->rounds = 0; 1612 mp->flags = 0; 1613 tp->mags[zi].loaded = mp; 1614 } 1615 depot_unlock(d); 1616 } 1617 1618 return rc; 1619 } 1620 1621 static void 1622 mtmagazine_init(void) 1623 { 1624 int error; 1625 1626 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1627 if (error) 1628 abort(); 1629 } 1630 1631 /* 1632 * This function is only used by the thread exit destructor 1633 */ 1634 static void 1635 mtmagazine_drain(struct magazine *mp) 1636 { 1637 void *obj; 1638 1639 while (MAGAZINE_NOTEMPTY(mp)) { 1640 obj = magazine_alloc(mp, NULL); 1641 _slabfree(obj, 0, NULL); 1642 } 1643 } 1644 1645 /* 1646 * mtmagazine_destructor() 1647 * 1648 * When a thread exits, we reclaim all its resources; all its magazines are 1649 * drained and the structures are freed. 1650 * 1651 * WARNING! The destructor can be called multiple times if the larger user 1652 * program has its own destructors which run after ours which 1653 * allocate or free memory. 1654 */ 1655 static void 1656 mtmagazine_destructor(void *thrp) 1657 { 1658 thr_mags *tp = thrp; 1659 struct magazine *mp; 1660 int i; 1661 1662 /* 1663 * Prevent further use of mtmagazines while we are destructing 1664 * them, as well as for any destructors which are run after us 1665 * prior to the thread actually being destroyed. 1666 */ 1667 tp->init = -1; 1668 1669 for (i = 0; i < NZONES; i++) { 1670 mp = tp->mags[i].loaded; 1671 tp->mags[i].loaded = NULL; 1672 if (mp) { 1673 if (MAGAZINE_NOTEMPTY(mp)) 1674 mtmagazine_drain(mp); 1675 _slabfree(mp, 0, NULL); 1676 } 1677 1678 mp = tp->mags[i].prev; 1679 tp->mags[i].prev = NULL; 1680 if (mp) { 1681 if (MAGAZINE_NOTEMPTY(mp)) 1682 mtmagazine_drain(mp); 1683 _slabfree(mp, 0, NULL); 1684 } 1685 } 1686 1687 if (tp->newmag) { 1688 mp = tp->newmag; 1689 tp->newmag = NULL; 1690 _slabfree(mp, 0, NULL); 1691 } 1692 } 1693 1694 /* 1695 * zone_alloc() 1696 * 1697 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1698 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1699 */ 1700 static slzone_t 1701 zone_alloc(int flags) 1702 { 1703 slglobaldata_t slgd = &SLGlobalData; 1704 int burst = 1; 1705 int i, j; 1706 slzone_t z; 1707 1708 zone_magazine_lock(); 1709 slgd_unlock(slgd); 1710 1711 z = magazine_alloc(&zone_magazine, &burst); 1712 if (z == NULL && burst == 1) { 1713 zone_magazine_unlock(); 1714 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1715 } else if (z == NULL) { 1716 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1717 if (z) { 1718 for (i = 1; i < burst; i++) { 1719 j = magazine_free(&zone_magazine, 1720 (char *) z + (ZoneSize * i)); 1721 MASSERT(j == 0); 1722 } 1723 } 1724 zone_magazine_unlock(); 1725 } else { 1726 z->z_Flags |= SLZF_UNOTZEROD; 1727 zone_magazine_unlock(); 1728 } 1729 slgd_lock(slgd); 1730 return z; 1731 } 1732 1733 /* 1734 * zone_free() 1735 * 1736 * Release a zone and unlock the slgd lock. 1737 */ 1738 static void 1739 zone_free(void *z) 1740 { 1741 slglobaldata_t slgd = &SLGlobalData; 1742 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1743 int i, j; 1744 1745 zone_magazine_lock(); 1746 slgd_unlock(slgd); 1747 1748 bzero(z, sizeof(struct slzone)); 1749 1750 if (opt_madvise) 1751 madvise(z, ZoneSize, MADV_FREE); 1752 1753 i = magazine_free(&zone_magazine, z); 1754 1755 /* 1756 * If we failed to free, collect excess magazines; release the zone 1757 * magazine lock, and then free to the system via _vmem_free. Re-enable 1758 * BURST mode for the magazine. 1759 */ 1760 if (i == -1) { 1761 j = zone_magazine.rounds - zone_magazine.low_factor; 1762 for (i = 0; i < j; i++) { 1763 excess[i] = magazine_alloc(&zone_magazine, NULL); 1764 MASSERT(excess[i] != NULL); 1765 } 1766 1767 zone_magazine_unlock(); 1768 1769 for (i = 0; i < j; i++) 1770 _vmem_free(excess[i], ZoneSize); 1771 1772 _vmem_free(z, ZoneSize); 1773 } else { 1774 zone_magazine_unlock(); 1775 } 1776 } 1777 1778 /* 1779 * _vmem_alloc() 1780 * 1781 * Directly map memory in PAGE_SIZE'd chunks with the specified 1782 * alignment. 1783 * 1784 * Alignment must be a multiple of PAGE_SIZE. 1785 * 1786 * Size must be >= alignment. 1787 */ 1788 static void * 1789 _vmem_alloc(size_t size, size_t align, int flags) 1790 { 1791 char *addr; 1792 char *save; 1793 size_t excess; 1794 1795 /* 1796 * Map anonymous private memory. 1797 */ 1798 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1799 MAP_PRIVATE|MAP_ANON, -1, 0); 1800 if (addr == MAP_FAILED) 1801 return(NULL); 1802 1803 /* 1804 * Check alignment. The misaligned offset is also the excess 1805 * amount. If misaligned unmap the excess so we have a chance of 1806 * mapping at the next alignment point and recursively try again. 1807 * 1808 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1809 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1810 * xxxxxxxxx final excess calculation 1811 * ^ returned address 1812 */ 1813 excess = (uintptr_t)addr & (align - 1); 1814 1815 if (excess) { 1816 excess = align - excess; 1817 save = addr; 1818 1819 munmap(save + excess, size - excess); 1820 addr = _vmem_alloc(size, align, flags); 1821 munmap(save, excess); 1822 } 1823 return((void *)addr); 1824 } 1825 1826 /* 1827 * _vmem_free() 1828 * 1829 * Free a chunk of memory allocated with _vmem_alloc() 1830 */ 1831 static void 1832 _vmem_free(void *ptr, size_t size) 1833 { 1834 munmap(ptr, size); 1835 } 1836 1837 /* 1838 * Panic on fatal conditions 1839 */ 1840 static void 1841 _mpanic(const char *ctl, ...) 1842 { 1843 va_list va; 1844 1845 if (malloc_panic == 0) { 1846 malloc_panic = 1; 1847 va_start(va, ctl); 1848 vfprintf(stderr, ctl, va); 1849 fprintf(stderr, "\n"); 1850 fflush(stderr); 1851 va_end(va); 1852 } 1853 abort(); 1854 } 1855 1856 __weak_reference(__malloc, malloc); 1857 __weak_reference(__calloc, calloc); 1858 __weak_reference(__posix_memalign, posix_memalign); 1859 __weak_reference(__realloc, realloc); 1860 __weak_reference(__free, free); 1861