1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. 91 * 92 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 93 * This will generate utrace events for all malloc, 94 * realloc, and free calls. There are tools (mtrplay) to 95 * replay and allocation pattern or to graph heap structure 96 * (mtrgraph) which can interpret these logs. 97 * 'Z' / ['z'] Zero out / do not zero all allocations. 98 * Each new byte of memory allocated by malloc, realloc, or 99 * reallocf will be initialized to 0. This is intended for 100 * debugging and will affect performance negatively. 101 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 102 * allocation functions. 103 */ 104 105 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 106 107 #include "libc_private.h" 108 109 #include <sys/param.h> 110 #include <sys/types.h> 111 #include <sys/mman.h> 112 #include <sys/queue.h> 113 #include <sys/uio.h> 114 #include <sys/ktrace.h> 115 #include <stdio.h> 116 #include <stdint.h> 117 #include <stdlib.h> 118 #include <stdarg.h> 119 #include <stddef.h> 120 #include <unistd.h> 121 #include <string.h> 122 #include <fcntl.h> 123 #include <errno.h> 124 #include <pthread.h> 125 126 #include "spinlock.h" 127 #include "un-namespace.h" 128 129 /* 130 * Linked list of large allocations 131 */ 132 typedef struct bigalloc { 133 struct bigalloc *next; /* hash link */ 134 void *base; /* base pointer */ 135 u_long bytes; /* bytes allocated */ 136 } *bigalloc_t; 137 138 /* 139 * Note that any allocations which are exact multiples of PAGE_SIZE, or 140 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 141 */ 142 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 143 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 144 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 145 #define ZALLOC_ZONE_SIZE (64 * 1024) 146 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 147 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 148 149 #if ZALLOC_ZONE_LIMIT == 16384 150 #define NZONES 72 151 #elif ZALLOC_ZONE_LIMIT == 32768 152 #define NZONES 80 153 #else 154 #error "I couldn't figure out NZONES" 155 #endif 156 157 /* 158 * Chunk structure for free elements 159 */ 160 typedef struct slchunk { 161 struct slchunk *c_Next; 162 } *slchunk_t; 163 164 /* 165 * The IN-BAND zone header is placed at the beginning of each zone. 166 */ 167 struct slglobaldata; 168 169 typedef struct slzone { 170 int32_t z_Magic; /* magic number for sanity check */ 171 int z_NFree; /* total free chunks / ualloc space */ 172 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 173 int z_NMax; /* maximum free chunks */ 174 char *z_BasePtr; /* pointer to start of chunk array */ 175 int z_UIndex; /* current initial allocation index */ 176 int z_UEndIndex; /* last (first) allocation index */ 177 int z_ChunkSize; /* chunk size for validation */ 178 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 179 int z_ZoneIndex; 180 int z_Flags; 181 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 182 #if defined(INVARIANTS) 183 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 184 #endif 185 } *slzone_t; 186 187 typedef struct slglobaldata { 188 spinlock_t Spinlock; 189 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 190 int JunkIndex; 191 } *slglobaldata_t; 192 193 #define SLZF_UNOTZEROD 0x0001 194 195 #define FASTSLABREALLOC 0x02 196 197 /* 198 * Misc constants. Note that allocations that are exact multiples of 199 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 200 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 201 */ 202 #define MIN_CHUNK_SIZE 8 /* in bytes */ 203 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 204 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 205 206 /* 207 * The WEIRD_ADDR is used as known text to copy into free objects to 208 * try to create deterministic failure cases if the data is accessed after 209 * free. 210 * 211 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 212 * not be larger then 64. 213 */ 214 #define WEIRD_ADDR 0xdeadc0de 215 #define MAX_COPY sizeof(weirdary) 216 #define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) 217 218 #define BIGHSHIFT 10 /* bigalloc hash table */ 219 #define BIGHSIZE (1 << BIGHSHIFT) 220 #define BIGHMASK (BIGHSIZE - 1) 221 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 222 #define BIGXMASK (BIGXSIZE - 1) 223 224 #define SAFLAG_ZERO 0x0001 225 #define SAFLAG_PASSIVE 0x0002 226 227 /* 228 * Thread control 229 */ 230 231 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 232 233 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 234 _mpanic("assertion: %s in %s", \ 235 #exp, __func__); \ 236 } while (0) 237 238 /* 239 * Magazines 240 */ 241 242 #define M_MAX_ROUNDS 64 243 #define M_ZONE_ROUNDS 64 244 #define M_LOW_ROUNDS 32 245 #define M_INIT_ROUNDS 8 246 #define M_BURST_FACTOR 8 247 #define M_BURST_NSCALE 2 248 249 #define M_BURST 0x0001 250 #define M_BURST_EARLY 0x0002 251 252 struct magazine { 253 SLIST_ENTRY(magazine) nextmagazine; 254 255 int flags; 256 int capacity; /* Max rounds in this magazine */ 257 int rounds; /* Current number of free rounds */ 258 int burst_factor; /* Number of blocks to prefill with */ 259 int low_factor; /* Free till low_factor from full mag */ 260 void *objects[M_MAX_ROUNDS]; 261 }; 262 263 SLIST_HEAD(magazinelist, magazine); 264 265 static spinlock_t zone_mag_lock; 266 static struct magazine zone_magazine = { 267 .flags = M_BURST | M_BURST_EARLY, 268 .capacity = M_ZONE_ROUNDS, 269 .rounds = 0, 270 .burst_factor = M_BURST_FACTOR, 271 .low_factor = M_LOW_ROUNDS 272 }; 273 274 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 275 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 276 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 277 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 278 279 /* Each thread will have a pair of magazines per size-class (NZONES) 280 * The loaded magazine will support immediate allocations, the previous 281 * magazine will either be full or empty and can be swapped at need */ 282 typedef struct magazine_pair { 283 struct magazine *loaded; 284 struct magazine *prev; 285 } magazine_pair; 286 287 /* A depot is a collection of magazines for a single zone. */ 288 typedef struct magazine_depot { 289 struct magazinelist full; 290 struct magazinelist empty; 291 spinlock_t lock; 292 } magazine_depot; 293 294 typedef struct thr_mags { 295 magazine_pair mags[NZONES]; 296 struct magazine *newmag; 297 int init; 298 } thr_mags; 299 300 /* 301 * With this attribute set, do not require a function call for accessing 302 * this variable when the code is compiled -fPIC. Empty for libc_rtld 303 * (like __thread). 304 */ 305 #ifdef __LIBC_RTLD 306 #define TLS_ATTRIBUTE 307 #else 308 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))) 309 #endif 310 311 static int mtmagazine_free_live; 312 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 313 static pthread_key_t thread_mags_key; 314 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 315 static magazine_depot depots[NZONES]; 316 317 /* 318 * Fixed globals (not per-cpu) 319 */ 320 static const int ZoneSize = ZALLOC_ZONE_SIZE; 321 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 322 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 323 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 324 325 static int opt_madvise = 0; 326 static int opt_utrace = 0; 327 static int g_malloc_flags = 0; 328 static struct slglobaldata SLGlobalData; 329 static bigalloc_t bigalloc_array[BIGHSIZE]; 330 static spinlock_t bigspin_array[BIGXSIZE]; 331 static int malloc_panic; 332 static int malloc_dummy_pointer; 333 334 static const int32_t weirdary[16] = { 335 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 336 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 337 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 338 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 339 }; 340 341 static void *_slaballoc(size_t size, int flags); 342 static void *_slabrealloc(void *ptr, size_t size); 343 static void _slabfree(void *ptr, int, bigalloc_t *); 344 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 345 static void _vmem_free(void *ptr, size_t bytes); 346 static void *magazine_alloc(struct magazine *, int *); 347 static int magazine_free(struct magazine *, void *); 348 static void *mtmagazine_alloc(int zi); 349 static int mtmagazine_free(int zi, void *); 350 static void mtmagazine_init(void); 351 static void mtmagazine_destructor(void *); 352 static slzone_t zone_alloc(int flags); 353 static void zone_free(void *z); 354 static void _mpanic(const char *ctl, ...) __printflike(1, 2); 355 static void malloc_init(void) __constructor(101); 356 #if defined(INVARIANTS) 357 static void chunk_mark_allocated(slzone_t z, void *chunk); 358 static void chunk_mark_free(slzone_t z, void *chunk); 359 #endif 360 361 struct nmalloc_utrace { 362 void *p; 363 size_t s; 364 void *r; 365 }; 366 367 #define UTRACE(a, b, c) \ 368 if (opt_utrace) { \ 369 struct nmalloc_utrace ut = { \ 370 .p = (a), \ 371 .s = (b), \ 372 .r = (c) \ 373 }; \ 374 utrace(&ut, sizeof(ut)); \ 375 } 376 377 #ifdef INVARIANTS 378 /* 379 * If enabled any memory allocated without M_ZERO is initialized to -1. 380 */ 381 static int use_malloc_pattern; 382 #endif 383 384 static void 385 malloc_init(void) 386 { 387 const char *p = NULL; 388 389 if (issetugid() == 0) 390 p = getenv("MALLOC_OPTIONS"); 391 392 for (; p != NULL && *p != '\0'; p++) { 393 switch(*p) { 394 case 'u': opt_utrace = 0; break; 395 case 'U': opt_utrace = 1; break; 396 case 'h': opt_madvise = 0; break; 397 case 'H': opt_madvise = 1; break; 398 case 'z': g_malloc_flags = 0; break; 399 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 400 default: 401 break; 402 } 403 } 404 405 UTRACE((void *) -1, 0, NULL); 406 } 407 408 /* 409 * We have to install a handler for nmalloc thread teardowns when 410 * the thread is created. We cannot delay this because destructors in 411 * sophisticated userland programs can call malloc() for the first time 412 * during their thread exit. 413 * 414 * This routine is called directly from pthreads. 415 */ 416 void 417 _nmalloc_thr_init(void) 418 { 419 thr_mags *tp; 420 421 /* 422 * Disallow mtmagazine operations until the mtmagazine is 423 * initialized. 424 */ 425 tp = &thread_mags; 426 tp->init = -1; 427 428 if (mtmagazine_free_live == 0) { 429 mtmagazine_free_live = 1; 430 pthread_once(&thread_mags_once, mtmagazine_init); 431 } 432 pthread_setspecific(thread_mags_key, tp); 433 tp->init = 1; 434 } 435 436 /* 437 * Thread locks. 438 */ 439 static __inline void 440 slgd_lock(slglobaldata_t slgd) 441 { 442 if (__isthreaded) 443 _SPINLOCK(&slgd->Spinlock); 444 } 445 446 static __inline void 447 slgd_unlock(slglobaldata_t slgd) 448 { 449 if (__isthreaded) 450 _SPINUNLOCK(&slgd->Spinlock); 451 } 452 453 static __inline void 454 depot_lock(magazine_depot *dp) 455 { 456 if (__isthreaded) 457 _SPINLOCK(&dp->lock); 458 } 459 460 static __inline void 461 depot_unlock(magazine_depot *dp) 462 { 463 if (__isthreaded) 464 _SPINUNLOCK(&dp->lock); 465 } 466 467 static __inline void 468 zone_magazine_lock(void) 469 { 470 if (__isthreaded) 471 _SPINLOCK(&zone_mag_lock); 472 } 473 474 static __inline void 475 zone_magazine_unlock(void) 476 { 477 if (__isthreaded) 478 _SPINUNLOCK(&zone_mag_lock); 479 } 480 481 static __inline void 482 swap_mags(magazine_pair *mp) 483 { 484 struct magazine *tmp; 485 tmp = mp->loaded; 486 mp->loaded = mp->prev; 487 mp->prev = tmp; 488 } 489 490 /* 491 * bigalloc hashing and locking support. 492 * 493 * Return an unmasked hash code for the passed pointer. 494 */ 495 static __inline int 496 _bigalloc_hash(void *ptr) 497 { 498 int hv; 499 500 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 501 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 502 503 return(hv); 504 } 505 506 /* 507 * Lock the hash chain and return a pointer to its base for the specified 508 * address. 509 */ 510 static __inline bigalloc_t * 511 bigalloc_lock(void *ptr) 512 { 513 int hv = _bigalloc_hash(ptr); 514 bigalloc_t *bigp; 515 516 bigp = &bigalloc_array[hv & BIGHMASK]; 517 if (__isthreaded) 518 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 519 return(bigp); 520 } 521 522 /* 523 * Lock the hash chain and return a pointer to its base for the specified 524 * address. 525 * 526 * BUT, if the hash chain is empty, just return NULL and do not bother 527 * to lock anything. 528 */ 529 static __inline bigalloc_t * 530 bigalloc_check_and_lock(void *ptr) 531 { 532 int hv = _bigalloc_hash(ptr); 533 bigalloc_t *bigp; 534 535 bigp = &bigalloc_array[hv & BIGHMASK]; 536 if (*bigp == NULL) 537 return(NULL); 538 if (__isthreaded) { 539 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 540 } 541 return(bigp); 542 } 543 544 static __inline void 545 bigalloc_unlock(void *ptr) 546 { 547 int hv; 548 549 if (__isthreaded) { 550 hv = _bigalloc_hash(ptr); 551 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 552 } 553 } 554 555 /* 556 * Calculate the zone index for the allocation request size and set the 557 * allocation request size to that particular zone's chunk size. 558 */ 559 static __inline int 560 zoneindex(size_t *bytes, size_t *chunking) 561 { 562 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 563 if (n < 128) { 564 *bytes = n = (n + 7) & ~7; 565 *chunking = 8; 566 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 567 } 568 if (n < 256) { 569 *bytes = n = (n + 15) & ~15; 570 *chunking = 16; 571 return(n / 16 + 7); 572 } 573 if (n < 8192) { 574 if (n < 512) { 575 *bytes = n = (n + 31) & ~31; 576 *chunking = 32; 577 return(n / 32 + 15); 578 } 579 if (n < 1024) { 580 *bytes = n = (n + 63) & ~63; 581 *chunking = 64; 582 return(n / 64 + 23); 583 } 584 if (n < 2048) { 585 *bytes = n = (n + 127) & ~127; 586 *chunking = 128; 587 return(n / 128 + 31); 588 } 589 if (n < 4096) { 590 *bytes = n = (n + 255) & ~255; 591 *chunking = 256; 592 return(n / 256 + 39); 593 } 594 *bytes = n = (n + 511) & ~511; 595 *chunking = 512; 596 return(n / 512 + 47); 597 } 598 #if ZALLOC_ZONE_LIMIT > 8192 599 if (n < 16384) { 600 *bytes = n = (n + 1023) & ~1023; 601 *chunking = 1024; 602 return(n / 1024 + 55); 603 } 604 #endif 605 #if ZALLOC_ZONE_LIMIT > 16384 606 if (n < 32768) { 607 *bytes = n = (n + 2047) & ~2047; 608 *chunking = 2048; 609 return(n / 2048 + 63); 610 } 611 #endif 612 _mpanic("Unexpected byte count %zu", n); 613 return(0); 614 } 615 616 /* 617 * malloc() - call internal slab allocator 618 */ 619 void * 620 malloc(size_t size) 621 { 622 void *ptr; 623 624 ptr = _slaballoc(size, 0); 625 if (ptr == NULL) 626 errno = ENOMEM; 627 else 628 UTRACE(0, size, ptr); 629 return(ptr); 630 } 631 632 #define MUL_NO_OVERFLOW (1UL << (sizeof(size_t) * 4)) 633 634 /* 635 * calloc() - call internal slab allocator 636 */ 637 void * 638 calloc(size_t number, size_t size) 639 { 640 void *ptr; 641 642 if ((number >= MUL_NO_OVERFLOW || size >= MUL_NO_OVERFLOW) && 643 number > 0 && SIZE_MAX / number < size) { 644 errno = ENOMEM; 645 return(NULL); 646 } 647 648 ptr = _slaballoc(number * size, SAFLAG_ZERO); 649 if (ptr == NULL) 650 errno = ENOMEM; 651 else 652 UTRACE(0, number * size, ptr); 653 return(ptr); 654 } 655 656 /* 657 * realloc() (SLAB ALLOCATOR) 658 * 659 * We do not attempt to optimize this routine beyond reusing the same 660 * pointer if the new size fits within the chunking of the old pointer's 661 * zone. 662 */ 663 void * 664 realloc(void *ptr, size_t size) 665 { 666 void *ret; 667 ret = _slabrealloc(ptr, size); 668 if (ret == NULL) 669 errno = ENOMEM; 670 else 671 UTRACE(ptr, size, ret); 672 return(ret); 673 } 674 675 /* 676 * posix_memalign() 677 * 678 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 679 * is a power of 2 >= sizeof(void *). 680 * 681 * The slab allocator will allocate on power-of-2 boundaries up to 682 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 683 * zone matching the requirements, and _vmem_alloc() otherwise. 684 */ 685 int 686 posix_memalign(void **memptr, size_t alignment, size_t size) 687 { 688 bigalloc_t *bigp; 689 bigalloc_t big; 690 size_t chunking; 691 int zi __unused; 692 693 /* 694 * OpenGroup spec issue 6 checks 695 */ 696 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 697 *memptr = NULL; 698 return(EINVAL); 699 } 700 if (alignment < sizeof(void *)) { 701 *memptr = NULL; 702 return(EINVAL); 703 } 704 705 /* 706 * Our zone mechanism guarantees same-sized alignment for any 707 * power-of-2 allocation. If size is a power-of-2 and reasonable 708 * we can just call _slaballoc() and be done. We round size up 709 * to the nearest alignment boundary to improve our odds of 710 * it becoming a power-of-2 if it wasn't before. 711 */ 712 if (size <= alignment) 713 size = alignment; 714 else 715 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 716 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 717 *memptr = _slaballoc(size, 0); 718 return(*memptr ? 0 : ENOMEM); 719 } 720 721 /* 722 * Otherwise locate a zone with a chunking that matches 723 * the requested alignment, within reason. Consider two cases: 724 * 725 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 726 * we find will be the best fit because the chunking will be 727 * greater or equal to the alignment. 728 * 729 * (2) A 513 allocation on a 256-byte alignment. In this case 730 * the first zoneindex we find will be for 576 byte allocations 731 * with a chunking of 64, which is not sufficient. To fix this 732 * we simply find the nearest power-of-2 >= size and use the 733 * same side-effect of _slaballoc() which guarantees 734 * same-alignment on a power-of-2 allocation. 735 */ 736 if (size < PAGE_SIZE) { 737 zi = zoneindex(&size, &chunking); 738 if (chunking >= alignment) { 739 *memptr = _slaballoc(size, 0); 740 return(*memptr ? 0 : ENOMEM); 741 } 742 if (size >= 1024) 743 alignment = 1024; 744 if (size >= 16384) 745 alignment = 16384; 746 while (alignment < size) 747 alignment <<= 1; 748 *memptr = _slaballoc(alignment, 0); 749 return(*memptr ? 0 : ENOMEM); 750 } 751 752 /* 753 * If the slab allocator cannot handle it use vmem_alloc(). 754 * 755 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 756 */ 757 if (alignment < PAGE_SIZE) 758 alignment = PAGE_SIZE; 759 if (size < alignment) 760 size = alignment; 761 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 762 *memptr = _vmem_alloc(size, alignment, 0); 763 if (*memptr == NULL) 764 return(ENOMEM); 765 766 big = _slaballoc(sizeof(struct bigalloc), 0); 767 if (big == NULL) { 768 _vmem_free(*memptr, size); 769 *memptr = NULL; 770 return(ENOMEM); 771 } 772 bigp = bigalloc_lock(*memptr); 773 big->base = *memptr; 774 big->bytes = size; 775 big->next = *bigp; 776 *bigp = big; 777 bigalloc_unlock(*memptr); 778 779 return(0); 780 } 781 782 /* 783 * free() (SLAB ALLOCATOR) - do the obvious 784 */ 785 void 786 free(void *ptr) 787 { 788 UTRACE(ptr, 0, 0); 789 _slabfree(ptr, 0, NULL); 790 } 791 792 /* 793 * _slaballoc() (SLAB ALLOCATOR) 794 * 795 * Allocate memory via the slab allocator. If the request is too large, 796 * or if it page-aligned beyond a certain size, we fall back to the 797 * KMEM subsystem 798 */ 799 static void * 800 _slaballoc(size_t size, int flags) 801 { 802 slzone_t z; 803 slchunk_t chunk; 804 slglobaldata_t slgd; 805 size_t chunking; 806 int zi; 807 #ifdef INVARIANTS 808 int i; 809 #endif 810 int off; 811 void *obj; 812 813 /* 814 * Handle the degenerate size == 0 case. Yes, this does happen. 815 * Return a special pointer. This is to maintain compatibility with 816 * the original malloc implementation. Certain devices, such as the 817 * adaptec driver, not only allocate 0 bytes, they check for NULL and 818 * also realloc() later on. Joy. 819 */ 820 if (size == 0) 821 return(ZERO_LENGTH_PTR); 822 823 /* Capture global flags */ 824 flags |= g_malloc_flags; 825 826 /* 827 * Handle large allocations directly. There should not be very many 828 * of these so performance is not a big issue. 829 * 830 * The backend allocator is pretty nasty on a SMP system. Use the 831 * slab allocator for one and two page-sized chunks even though we 832 * lose some efficiency. 833 */ 834 if (size >= ZoneLimit || 835 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 836 bigalloc_t big; 837 bigalloc_t *bigp; 838 839 /* 840 * Page-align and cache-color in case of virtually indexed 841 * physically tagged L1 caches (aka SandyBridge). No sweat 842 * otherwise, so just do it. 843 */ 844 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 845 if ((size & 8191) == 0) 846 size += 4096; 847 848 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 849 if (chunk == NULL) 850 return(NULL); 851 852 big = _slaballoc(sizeof(struct bigalloc), 0); 853 if (big == NULL) { 854 _vmem_free(chunk, size); 855 return(NULL); 856 } 857 bigp = bigalloc_lock(chunk); 858 big->base = chunk; 859 big->bytes = size; 860 big->next = *bigp; 861 *bigp = big; 862 bigalloc_unlock(chunk); 863 864 return(chunk); 865 } 866 867 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 868 zi = zoneindex(&size, &chunking); 869 MASSERT(zi < NZONES); 870 871 obj = mtmagazine_alloc(zi); 872 if (obj != NULL) { 873 if (flags & SAFLAG_ZERO) 874 bzero(obj, size); 875 return (obj); 876 } 877 878 slgd = &SLGlobalData; 879 slgd_lock(slgd); 880 881 /* 882 * Attempt to allocate out of an existing zone. If all zones are 883 * exhausted pull one off the free list or allocate a new one. 884 */ 885 if ((z = slgd->ZoneAry[zi]) == NULL) { 886 z = zone_alloc(flags); 887 if (z == NULL) 888 goto fail; 889 890 /* 891 * How big is the base structure? 892 */ 893 #if defined(INVARIANTS) 894 /* 895 * Make room for z_Bitmap. An exact calculation is 896 * somewhat more complicated so don't make an exact 897 * calculation. 898 */ 899 off = offsetof(struct slzone, 900 z_Bitmap[(ZoneSize / size + 31) / 32]); 901 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 902 #else 903 off = sizeof(struct slzone); 904 #endif 905 906 /* 907 * Align the storage in the zone based on the chunking. 908 * 909 * Guarantee power-of-2 alignment for power-of-2-sized 910 * chunks. Otherwise align based on the chunking size 911 * (typically 8 or 16 bytes for small allocations). 912 * 913 * NOTE: Allocations >= ZoneLimit are governed by the 914 * bigalloc code and typically only guarantee page-alignment. 915 * 916 * Set initial conditions for UIndex near the zone header 917 * to reduce unecessary page faults, vs semi-randomization 918 * to improve L1 cache saturation. 919 */ 920 if ((size | (size - 1)) + 1 == (size << 1)) 921 off = roundup2(off, size); 922 else 923 off = roundup2(off, chunking); 924 z->z_Magic = ZALLOC_SLAB_MAGIC; 925 z->z_ZoneIndex = zi; 926 z->z_NMax = (ZoneSize - off) / size; 927 z->z_NFree = z->z_NMax; 928 z->z_BasePtr = (char *)z + off; 929 z->z_UIndex = z->z_UEndIndex = 0; 930 z->z_ChunkSize = size; 931 z->z_FirstFreePg = ZonePageCount; 932 z->z_Next = slgd->ZoneAry[zi]; 933 slgd->ZoneAry[zi] = z; 934 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 935 flags &= ~SAFLAG_ZERO; /* already zero'd */ 936 flags |= SAFLAG_PASSIVE; 937 } 938 939 /* 940 * Slide the base index for initial allocations out of the 941 * next zone we create so we do not over-weight the lower 942 * part of the cpu memory caches. 943 */ 944 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 945 & (ZALLOC_MAX_ZONE_SIZE - 1); 946 } 947 948 /* 949 * Ok, we have a zone from which at least one chunk is available. 950 * 951 * Remove us from the ZoneAry[] when we become empty 952 */ 953 MASSERT(z->z_NFree > 0); 954 955 if (--z->z_NFree == 0) { 956 slgd->ZoneAry[zi] = z->z_Next; 957 z->z_Next = NULL; 958 } 959 960 /* 961 * Locate a chunk in a free page. This attempts to localize 962 * reallocations into earlier pages without us having to sort 963 * the chunk list. A chunk may still overlap a page boundary. 964 */ 965 while (z->z_FirstFreePg < ZonePageCount) { 966 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 967 #ifdef DIAGNOSTIC 968 /* 969 * Diagnostic: c_Next is not total garbage. 970 */ 971 MASSERT(chunk->c_Next == NULL || 972 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 973 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 974 #endif 975 #ifdef INVARIANTS 976 chunk_mark_allocated(z, chunk); 977 #endif 978 MASSERT((uintptr_t)chunk & ZoneMask); 979 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 980 goto done; 981 } 982 ++z->z_FirstFreePg; 983 } 984 985 /* 986 * No chunks are available but NFree said we had some memory, 987 * so it must be available in the never-before-used-memory 988 * area governed by UIndex. The consequences are very 989 * serious if our zone got corrupted so we use an explicit 990 * panic rather then a KASSERT. 991 */ 992 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 993 994 if (++z->z_UIndex == z->z_NMax) 995 z->z_UIndex = 0; 996 if (z->z_UIndex == z->z_UEndIndex) { 997 if (z->z_NFree != 0) 998 _mpanic("slaballoc: corrupted zone"); 999 } 1000 1001 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 1002 flags &= ~SAFLAG_ZERO; 1003 flags |= SAFLAG_PASSIVE; 1004 } 1005 #if defined(INVARIANTS) 1006 chunk_mark_allocated(z, chunk); 1007 #endif 1008 1009 done: 1010 slgd_unlock(slgd); 1011 if (flags & SAFLAG_ZERO) { 1012 bzero(chunk, size); 1013 #ifdef INVARIANTS 1014 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 1015 if (use_malloc_pattern) { 1016 for (i = 0; i < size; i += sizeof(int)) { 1017 *(int *)((char *)chunk + i) = -1; 1018 } 1019 } 1020 /* avoid accidental double-free check */ 1021 chunk->c_Next = (void *)-1; 1022 #endif 1023 } 1024 return(chunk); 1025 fail: 1026 slgd_unlock(slgd); 1027 return(NULL); 1028 } 1029 1030 /* 1031 * Reallocate memory within the chunk 1032 */ 1033 static void * 1034 _slabrealloc(void *ptr, size_t size) 1035 { 1036 bigalloc_t *bigp; 1037 void *nptr; 1038 slzone_t z; 1039 size_t chunking; 1040 1041 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) { 1042 return(_slaballoc(size, 0)); 1043 } 1044 1045 if (size == 0) { 1046 free(ptr); 1047 return(ZERO_LENGTH_PTR); 1048 } 1049 1050 /* 1051 * Handle oversized allocations. 1052 */ 1053 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1054 bigalloc_t big; 1055 size_t bigbytes; 1056 1057 while ((big = *bigp) != NULL) { 1058 if (big->base == ptr) { 1059 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1060 bigbytes = big->bytes; 1061 1062 /* 1063 * If it already fits determine if it makes 1064 * sense to shrink/reallocate. Try to optimize 1065 * programs which stupidly make incremental 1066 * reallocations larger or smaller by scaling 1067 * the allocation. Also deal with potential 1068 * coloring. 1069 */ 1070 if (size <= bigbytes && 1071 (size + 4096 == bigbytes || 1072 size >= bigbytes - (size >> 2))) { 1073 bigalloc_unlock(ptr); 1074 return(ptr); 1075 } 1076 1077 /* 1078 * For large allocations, allocate more space 1079 * than we need to try to avoid excessive 1080 * reallocations later on. 1081 */ 1082 if (size > PAGE_SIZE * 16) { 1083 size += size >> 3; 1084 size = (size + PAGE_MASK) & 1085 ~(size_t)PAGE_MASK; 1086 } 1087 1088 *bigp = big->next; 1089 bigalloc_unlock(ptr); 1090 if ((nptr = _slaballoc(size, 0)) == NULL) { 1091 /* Relink block */ 1092 bigp = bigalloc_lock(ptr); 1093 big->next = *bigp; 1094 *bigp = big; 1095 bigalloc_unlock(ptr); 1096 return(NULL); 1097 } 1098 if (size > bigbytes) 1099 size = bigbytes; 1100 bcopy(ptr, nptr, size); 1101 _slabfree(ptr, FASTSLABREALLOC, &big); 1102 return(nptr); 1103 } 1104 bigp = &big->next; 1105 } 1106 bigalloc_unlock(ptr); 1107 } 1108 1109 /* 1110 * Get the original allocation's zone. If the new request winds 1111 * up using the same chunk size we do not have to do anything. 1112 * 1113 * NOTE: We don't have to lock the globaldata here, the fields we 1114 * access here will not change at least as long as we have control 1115 * over the allocation. 1116 */ 1117 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1118 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1119 1120 /* 1121 * Use zoneindex() to chunk-align the new size, as long as the 1122 * new size is not too large. 1123 */ 1124 if (size < ZoneLimit) { 1125 zoneindex(&size, &chunking); 1126 if (z->z_ChunkSize == size) { 1127 return(ptr); 1128 } 1129 } 1130 1131 /* 1132 * Allocate memory for the new request size and copy as appropriate. 1133 */ 1134 if ((nptr = _slaballoc(size, 0)) != NULL) { 1135 if (size > z->z_ChunkSize) 1136 size = z->z_ChunkSize; 1137 bcopy(ptr, nptr, size); 1138 _slabfree(ptr, 0, NULL); 1139 } 1140 1141 return(nptr); 1142 } 1143 1144 /* 1145 * free (SLAB ALLOCATOR) 1146 * 1147 * Free a memory block previously allocated by malloc. Note that we do not 1148 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1149 * checking memory limits in malloc. 1150 * 1151 * flags: 1152 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1153 * unlinked. 1154 * 1155 * MPSAFE 1156 */ 1157 static void 1158 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1159 { 1160 slzone_t z; 1161 slchunk_t chunk; 1162 bigalloc_t big; 1163 bigalloc_t *bigp; 1164 slglobaldata_t slgd; 1165 size_t size; 1166 int zi; 1167 int pgno; 1168 1169 /* Fast realloc path for big allocations */ 1170 if (flags & FASTSLABREALLOC) { 1171 big = *rbigp; 1172 goto fastslabrealloc; 1173 } 1174 1175 /* 1176 * Handle NULL frees and special 0-byte allocations 1177 */ 1178 if (ptr == NULL) 1179 return; 1180 if (ptr == ZERO_LENGTH_PTR) 1181 return; 1182 1183 /* 1184 * Handle oversized allocations. 1185 */ 1186 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1187 while ((big = *bigp) != NULL) { 1188 if (big->base == ptr) { 1189 *bigp = big->next; 1190 bigalloc_unlock(ptr); 1191 fastslabrealloc: 1192 size = big->bytes; 1193 _slabfree(big, 0, NULL); 1194 #ifdef INVARIANTS 1195 MASSERT(sizeof(weirdary) <= size); 1196 bcopy(weirdary, ptr, sizeof(weirdary)); 1197 #endif 1198 _vmem_free(ptr, size); 1199 return; 1200 } 1201 bigp = &big->next; 1202 } 1203 bigalloc_unlock(ptr); 1204 } 1205 1206 /* 1207 * Zone case. Figure out the zone based on the fact that it is 1208 * ZoneSize aligned. 1209 */ 1210 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1211 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1212 1213 size = z->z_ChunkSize; 1214 zi = z->z_ZoneIndex; 1215 1216 if (g_malloc_flags & SAFLAG_ZERO) 1217 bzero(ptr, size); 1218 1219 if (mtmagazine_free(zi, ptr) == 0) 1220 return; 1221 1222 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1223 chunk = ptr; 1224 slgd = &SLGlobalData; 1225 slgd_lock(slgd); 1226 1227 #ifdef INVARIANTS 1228 /* 1229 * Attempt to detect a double-free. To reduce overhead we only check 1230 * if there appears to be link pointer at the base of the data. 1231 */ 1232 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 1233 slchunk_t scan; 1234 1235 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 1236 if (scan == chunk) 1237 _mpanic("Double free at %p", chunk); 1238 } 1239 } 1240 chunk_mark_free(z, chunk); 1241 #endif 1242 1243 /* 1244 * Put weird data into the memory to detect modifications after 1245 * freeing, illegal pointer use after freeing (we should fault on 1246 * the odd address), and so forth. 1247 */ 1248 #ifdef INVARIANTS 1249 if (z->z_ChunkSize < sizeof(weirdary)) 1250 bcopy(weirdary, chunk, z->z_ChunkSize); 1251 else 1252 bcopy(weirdary, chunk, sizeof(weirdary)); 1253 #endif 1254 1255 /* 1256 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1257 * z_FirstFreePg. 1258 */ 1259 chunk->c_Next = z->z_PageAry[pgno]; 1260 z->z_PageAry[pgno] = chunk; 1261 if (z->z_FirstFreePg > pgno) 1262 z->z_FirstFreePg = pgno; 1263 1264 /* 1265 * Bump the number of free chunks. If it becomes non-zero the zone 1266 * must be added back onto the appropriate list. 1267 */ 1268 if (z->z_NFree++ == 0) { 1269 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1270 slgd->ZoneAry[z->z_ZoneIndex] = z; 1271 } 1272 1273 /* 1274 * If the zone becomes totally free then release it. 1275 */ 1276 if (z->z_NFree == z->z_NMax) { 1277 slzone_t *pz; 1278 1279 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1280 while (z != *pz) 1281 pz = &(*pz)->z_Next; 1282 *pz = z->z_Next; 1283 z->z_Magic = -1; 1284 z->z_Next = NULL; 1285 zone_free(z); 1286 /* slgd lock released */ 1287 return; 1288 } 1289 slgd_unlock(slgd); 1290 } 1291 1292 #if defined(INVARIANTS) 1293 /* 1294 * Helper routines for sanity checks 1295 */ 1296 static 1297 void 1298 chunk_mark_allocated(slzone_t z, void *chunk) 1299 { 1300 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1301 __uint32_t *bitptr; 1302 1303 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1304 bitptr = &z->z_Bitmap[bitdex >> 5]; 1305 bitdex &= 31; 1306 MASSERT((*bitptr & (1 << bitdex)) == 0); 1307 *bitptr |= 1 << bitdex; 1308 } 1309 1310 static 1311 void 1312 chunk_mark_free(slzone_t z, void *chunk) 1313 { 1314 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1315 __uint32_t *bitptr; 1316 1317 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1318 bitptr = &z->z_Bitmap[bitdex >> 5]; 1319 bitdex &= 31; 1320 MASSERT((*bitptr & (1 << bitdex)) != 0); 1321 *bitptr &= ~(1 << bitdex); 1322 } 1323 1324 #endif 1325 1326 /* 1327 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1328 * if the magazine is empty. 1329 */ 1330 static __inline void * 1331 magazine_alloc(struct magazine *mp, int *burst) 1332 { 1333 void *obj; 1334 1335 if (mp == NULL) 1336 return(NULL); 1337 if (MAGAZINE_NOTEMPTY(mp)) { 1338 obj = mp->objects[--mp->rounds]; 1339 return(obj); 1340 } 1341 1342 /* 1343 * Return burst factor to caller along with NULL 1344 */ 1345 if ((mp->flags & M_BURST) && (burst != NULL)) { 1346 *burst = mp->burst_factor; 1347 } 1348 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1349 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1350 (burst != NULL)) { 1351 mp->burst_factor -= M_BURST_NSCALE; 1352 if (mp->burst_factor <= 1) { 1353 mp->burst_factor = 1; 1354 mp->flags &= ~(M_BURST); 1355 mp->flags &= ~(M_BURST_EARLY); 1356 } 1357 } 1358 return (NULL); 1359 } 1360 1361 static __inline int 1362 magazine_free(struct magazine *mp, void *p) 1363 { 1364 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1365 mp->objects[mp->rounds++] = p; 1366 return 0; 1367 } 1368 1369 return -1; 1370 } 1371 1372 static void * 1373 mtmagazine_alloc(int zi) 1374 { 1375 thr_mags *tp; 1376 struct magazine *mp, *emptymag; 1377 magazine_depot *d; 1378 void *obj; 1379 1380 /* 1381 * Do not try to access per-thread magazines while the mtmagazine 1382 * is being initialized or destroyed. 1383 */ 1384 tp = &thread_mags; 1385 if (tp->init < 0) 1386 return(NULL); 1387 1388 /* 1389 * Primary per-thread allocation loop 1390 */ 1391 for (;;) { 1392 /* 1393 * If the loaded magazine has rounds, allocate and return 1394 */ 1395 mp = tp->mags[zi].loaded; 1396 obj = magazine_alloc(mp, NULL); 1397 if (obj) 1398 break; 1399 1400 /* 1401 * If the prev magazine is full, swap with the loaded 1402 * magazine and retry. 1403 */ 1404 mp = tp->mags[zi].prev; 1405 if (mp && MAGAZINE_FULL(mp)) { 1406 MASSERT(mp->rounds != 0); 1407 swap_mags(&tp->mags[zi]); /* prev now empty */ 1408 continue; 1409 } 1410 1411 /* 1412 * Try to get a full magazine from the depot. Cycle 1413 * through depot(full)->loaded->prev->depot(empty). 1414 * Retry if a full magazine was available from the depot. 1415 * 1416 * Return NULL (caller will fall through) if no magazines 1417 * can be found anywhere. 1418 */ 1419 d = &depots[zi]; 1420 depot_lock(d); 1421 emptymag = tp->mags[zi].prev; 1422 if (emptymag) 1423 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1424 tp->mags[zi].prev = tp->mags[zi].loaded; 1425 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1426 tp->mags[zi].loaded = mp; 1427 if (mp) { 1428 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1429 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1430 depot_unlock(d); 1431 continue; 1432 } 1433 depot_unlock(d); 1434 break; 1435 } 1436 1437 return (obj); 1438 } 1439 1440 static int 1441 mtmagazine_free(int zi, void *ptr) 1442 { 1443 thr_mags *tp; 1444 struct magazine *mp, *loadedmag; 1445 magazine_depot *d; 1446 int rc = -1; 1447 1448 /* 1449 * Do not try to access per-thread magazines while the mtmagazine 1450 * is being initialized or destroyed. 1451 */ 1452 tp = &thread_mags; 1453 if (tp->init < 0) 1454 return(-1); 1455 1456 /* 1457 * Primary per-thread freeing loop 1458 */ 1459 for (;;) { 1460 /* 1461 * Make sure a new magazine is available in case we have 1462 * to use it. Staging the newmag allows us to avoid 1463 * some locking/reentrancy complexity. 1464 * 1465 * Temporarily disable the per-thread caches for this 1466 * allocation to avoid reentrancy and/or to avoid a 1467 * stack overflow if the [zi] happens to be the same that 1468 * would be used to allocate the new magazine. 1469 */ 1470 if (tp->newmag == NULL) { 1471 tp->init = -1; 1472 tp->newmag = _slaballoc(sizeof(struct magazine), 1473 SAFLAG_ZERO); 1474 tp->init = 1; 1475 if (tp->newmag == NULL) { 1476 rc = -1; 1477 break; 1478 } 1479 } 1480 1481 /* 1482 * If the loaded magazine has space, free directly to it 1483 */ 1484 rc = magazine_free(tp->mags[zi].loaded, ptr); 1485 if (rc == 0) 1486 break; 1487 1488 /* 1489 * If the prev magazine is empty, swap with the loaded 1490 * magazine and retry. 1491 */ 1492 mp = tp->mags[zi].prev; 1493 if (mp && MAGAZINE_EMPTY(mp)) { 1494 MASSERT(mp->rounds == 0); 1495 swap_mags(&tp->mags[zi]); /* prev now full */ 1496 continue; 1497 } 1498 1499 /* 1500 * Try to get an empty magazine from the depot. Cycle 1501 * through depot(empty)->loaded->prev->depot(full). 1502 * Retry if an empty magazine was available from the depot. 1503 */ 1504 d = &depots[zi]; 1505 depot_lock(d); 1506 1507 if ((loadedmag = tp->mags[zi].prev) != NULL) 1508 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1509 tp->mags[zi].prev = tp->mags[zi].loaded; 1510 mp = SLIST_FIRST(&d->empty); 1511 if (mp) { 1512 tp->mags[zi].loaded = mp; 1513 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1514 MASSERT(MAGAZINE_NOTFULL(mp)); 1515 } else { 1516 mp = tp->newmag; 1517 tp->newmag = NULL; 1518 mp->capacity = M_MAX_ROUNDS; 1519 mp->rounds = 0; 1520 mp->flags = 0; 1521 tp->mags[zi].loaded = mp; 1522 } 1523 depot_unlock(d); 1524 } 1525 1526 return rc; 1527 } 1528 1529 static void 1530 mtmagazine_init(void) 1531 { 1532 int error; 1533 1534 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1535 if (error) 1536 abort(); 1537 } 1538 1539 /* 1540 * This function is only used by the thread exit destructor 1541 */ 1542 static void 1543 mtmagazine_drain(struct magazine *mp) 1544 { 1545 void *obj; 1546 1547 while (MAGAZINE_NOTEMPTY(mp)) { 1548 obj = magazine_alloc(mp, NULL); 1549 _slabfree(obj, 0, NULL); 1550 } 1551 } 1552 1553 /* 1554 * mtmagazine_destructor() 1555 * 1556 * When a thread exits, we reclaim all its resources; all its magazines are 1557 * drained and the structures are freed. 1558 * 1559 * WARNING! The destructor can be called multiple times if the larger user 1560 * program has its own destructors which run after ours which 1561 * allocate or free memory. 1562 */ 1563 static void 1564 mtmagazine_destructor(void *thrp) 1565 { 1566 thr_mags *tp = thrp; 1567 struct magazine *mp; 1568 int i; 1569 1570 /* 1571 * Prevent further use of mtmagazines while we are destructing 1572 * them, as well as for any destructors which are run after us 1573 * prior to the thread actually being destroyed. 1574 */ 1575 tp->init = -1; 1576 1577 for (i = 0; i < NZONES; i++) { 1578 mp = tp->mags[i].loaded; 1579 tp->mags[i].loaded = NULL; 1580 if (mp) { 1581 if (MAGAZINE_NOTEMPTY(mp)) 1582 mtmagazine_drain(mp); 1583 _slabfree(mp, 0, NULL); 1584 } 1585 1586 mp = tp->mags[i].prev; 1587 tp->mags[i].prev = NULL; 1588 if (mp) { 1589 if (MAGAZINE_NOTEMPTY(mp)) 1590 mtmagazine_drain(mp); 1591 _slabfree(mp, 0, NULL); 1592 } 1593 } 1594 1595 if (tp->newmag) { 1596 mp = tp->newmag; 1597 tp->newmag = NULL; 1598 _slabfree(mp, 0, NULL); 1599 } 1600 } 1601 1602 /* 1603 * zone_alloc() 1604 * 1605 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1606 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1607 */ 1608 static slzone_t 1609 zone_alloc(int flags) 1610 { 1611 slglobaldata_t slgd = &SLGlobalData; 1612 int burst = 1; 1613 int i, j; 1614 slzone_t z; 1615 1616 zone_magazine_lock(); 1617 slgd_unlock(slgd); 1618 1619 z = magazine_alloc(&zone_magazine, &burst); 1620 if (z == NULL && burst == 1) { 1621 zone_magazine_unlock(); 1622 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1623 } else if (z == NULL) { 1624 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1625 if (z) { 1626 for (i = 1; i < burst; i++) { 1627 j = magazine_free(&zone_magazine, 1628 (char *) z + (ZoneSize * i)); 1629 MASSERT(j == 0); 1630 } 1631 } 1632 zone_magazine_unlock(); 1633 } else { 1634 z->z_Flags |= SLZF_UNOTZEROD; 1635 zone_magazine_unlock(); 1636 } 1637 slgd_lock(slgd); 1638 return z; 1639 } 1640 1641 /* 1642 * zone_free() 1643 * 1644 * Release a zone and unlock the slgd lock. 1645 */ 1646 static void 1647 zone_free(void *z) 1648 { 1649 slglobaldata_t slgd = &SLGlobalData; 1650 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1651 int i, j; 1652 1653 zone_magazine_lock(); 1654 slgd_unlock(slgd); 1655 1656 bzero(z, sizeof(struct slzone)); 1657 1658 if (opt_madvise) 1659 madvise(z, ZoneSize, MADV_FREE); 1660 1661 i = magazine_free(&zone_magazine, z); 1662 1663 /* 1664 * If we failed to free, collect excess magazines; release the zone 1665 * magazine lock, and then free to the system via _vmem_free. Re-enable 1666 * BURST mode for the magazine. 1667 */ 1668 if (i == -1) { 1669 j = zone_magazine.rounds - zone_magazine.low_factor; 1670 for (i = 0; i < j; i++) { 1671 excess[i] = magazine_alloc(&zone_magazine, NULL); 1672 MASSERT(excess[i] != NULL); 1673 } 1674 1675 zone_magazine_unlock(); 1676 1677 for (i = 0; i < j; i++) 1678 _vmem_free(excess[i], ZoneSize); 1679 1680 _vmem_free(z, ZoneSize); 1681 } else { 1682 zone_magazine_unlock(); 1683 } 1684 } 1685 1686 /* 1687 * _vmem_alloc() 1688 * 1689 * Directly map memory in PAGE_SIZE'd chunks with the specified 1690 * alignment. 1691 * 1692 * Alignment must be a multiple of PAGE_SIZE. 1693 * 1694 * Size must be >= alignment. 1695 */ 1696 static void * 1697 _vmem_alloc(size_t size, size_t align, int flags) 1698 { 1699 char *addr; 1700 char *save; 1701 size_t excess; 1702 1703 /* 1704 * Map anonymous private memory. 1705 */ 1706 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1707 MAP_PRIVATE|MAP_ANON, -1, 0); 1708 if (addr == MAP_FAILED) 1709 return(NULL); 1710 1711 /* 1712 * Check alignment. The misaligned offset is also the excess 1713 * amount. If misaligned unmap the excess so we have a chance of 1714 * mapping at the next alignment point and recursively try again. 1715 * 1716 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1717 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1718 * xxxxxxxxx final excess calculation 1719 * ^ returned address 1720 */ 1721 excess = (uintptr_t)addr & (align - 1); 1722 1723 if (excess) { 1724 excess = align - excess; 1725 save = addr; 1726 1727 munmap(save + excess, size - excess); 1728 addr = _vmem_alloc(size, align, flags); 1729 munmap(save, excess); 1730 } 1731 return((void *)addr); 1732 } 1733 1734 /* 1735 * _vmem_free() 1736 * 1737 * Free a chunk of memory allocated with _vmem_alloc() 1738 */ 1739 static void 1740 _vmem_free(void *ptr, size_t size) 1741 { 1742 munmap(ptr, size); 1743 } 1744 1745 /* 1746 * Panic on fatal conditions 1747 */ 1748 static void 1749 _mpanic(const char *ctl, ...) 1750 { 1751 va_list va; 1752 1753 if (malloc_panic == 0) { 1754 malloc_panic = 1; 1755 va_start(va, ctl); 1756 vfprintf(stderr, ctl, va); 1757 fprintf(stderr, "\n"); 1758 fflush(stderr); 1759 va_end(va); 1760 } 1761 abort(); 1762 } 1763