1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009,2010 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> and by 8 * Venkatesh Srinivas <me@endeavour.zapto.org>. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 * 37 * $Id: nmalloc.c,v 1.37 2010/07/23 08:20:35 vsrinivas Exp $ 38 */ 39 /* 40 * This module implements a slab allocator drop-in replacement for the 41 * libc malloc(). 42 * 43 * A slab allocator reserves a ZONE for each chunk size, then lays the 44 * chunks out in an array within the zone. Allocation and deallocation 45 * is nearly instantaneous, and overhead losses are limited to a fixed 46 * worst-case amount. 47 * 48 * The slab allocator does not have to pre-initialize the list of 49 * free chunks for each zone, and the underlying VM will not be 50 * touched at all beyond the zone header until an actual allocation 51 * needs it. 52 * 53 * Slab management and locking is done on a per-zone basis. 54 * 55 * Alloc Size Chunking Number of zones 56 * 0-127 8 16 57 * 128-255 16 8 58 * 256-511 32 8 59 * 512-1023 64 8 60 * 1024-2047 128 8 61 * 2048-4095 256 8 62 * 4096-8191 512 8 63 * 8192-16383 1024 8 64 * 16384-32767 2048 8 65 * 66 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 67 * is used to locate for free. One and Two-page allocations use the 68 * zone mechanic to avoid excessive mmap()/munmap() calls. 69 * 70 * API FEATURES AND SIDE EFFECTS 71 * 72 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 73 * Above that power-of-2 sized allocations are page-aligned. Non 74 * power-of-2 sized allocations are aligned the same as the chunk 75 * size for their zone. 76 * + malloc(0) returns a special non-NULL value 77 * + ability to allocate arbitrarily large chunks of memory 78 * + realloc will reuse the passed pointer if possible, within the 79 * limitations of the zone chunking. 80 * 81 * Multithreaded enhancements for small allocations introduced August 2010. 82 * These are in the spirit of 'libumem'. See: 83 * Bonwick, J.; Adams, J. (2001). "Magazines and Vmem: Extending the 84 * slab allocator to many CPUs and arbitrary resources". In Proc. 2001 85 * USENIX Technical Conference. USENIX Association. 86 * 87 * TUNING 88 * 89 * The value of the environment variable MALLOC_OPTIONS is a character string 90 * containing various flags to tune nmalloc. 91 * 92 * 'U' / ['u'] Generate / do not generate utrace entries for ktrace(1) 93 * This will generate utrace events for all malloc, 94 * realloc, and free calls. There are tools (mtrplay) to 95 * replay and allocation pattern or to graph heap structure 96 * (mtrgraph) which can interpret these logs. 97 * 'Z' / ['z'] Zero out / do not zero all allocations. 98 * Each new byte of memory allocated by malloc, realloc, or 99 * reallocf will be initialized to 0. This is intended for 100 * debugging and will affect performance negatively. 101 * 'H' / ['h'] Pass a hint to the kernel about pages unused by the 102 * allocation functions. 103 */ 104 105 /* cc -shared -fPIC -g -O -I/usr/src/lib/libc/include -o nmalloc.so nmalloc.c */ 106 107 #include "libc_private.h" 108 109 #include <sys/param.h> 110 #include <sys/types.h> 111 #include <sys/mman.h> 112 #include <sys/queue.h> 113 #include <sys/uio.h> 114 #include <sys/ktrace.h> 115 #include <stdio.h> 116 #include <stdint.h> 117 #include <stdlib.h> 118 #include <stdarg.h> 119 #include <stddef.h> 120 #include <unistd.h> 121 #include <string.h> 122 #include <fcntl.h> 123 #include <errno.h> 124 #include <pthread.h> 125 126 #include "spinlock.h" 127 #include "un-namespace.h" 128 129 /* 130 * Linked list of large allocations 131 */ 132 typedef struct bigalloc { 133 struct bigalloc *next; /* hash link */ 134 void *base; /* base pointer */ 135 u_long bytes; /* bytes allocated */ 136 } *bigalloc_t; 137 138 /* 139 * Note that any allocations which are exact multiples of PAGE_SIZE, or 140 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 141 */ 142 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 143 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 144 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 145 #define ZALLOC_ZONE_SIZE (64 * 1024) 146 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 147 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 148 149 #if ZALLOC_ZONE_LIMIT == 16384 150 #define NZONES 72 151 #elif ZALLOC_ZONE_LIMIT == 32768 152 #define NZONES 80 153 #else 154 #error "I couldn't figure out NZONES" 155 #endif 156 157 /* 158 * Chunk structure for free elements 159 */ 160 typedef struct slchunk { 161 struct slchunk *c_Next; 162 } *slchunk_t; 163 164 /* 165 * The IN-BAND zone header is placed at the beginning of each zone. 166 */ 167 struct slglobaldata; 168 169 typedef struct slzone { 170 int32_t z_Magic; /* magic number for sanity check */ 171 int z_NFree; /* total free chunks / ualloc space */ 172 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 173 int z_NMax; /* maximum free chunks */ 174 char *z_BasePtr; /* pointer to start of chunk array */ 175 int z_UIndex; /* current initial allocation index */ 176 int z_UEndIndex; /* last (first) allocation index */ 177 int z_ChunkSize; /* chunk size for validation */ 178 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 179 int z_ZoneIndex; 180 int z_Flags; 181 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 182 #if defined(INVARIANTS) 183 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 184 #endif 185 } *slzone_t; 186 187 typedef struct slglobaldata { 188 spinlock_t Spinlock; 189 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 190 int JunkIndex; 191 } *slglobaldata_t; 192 193 #define SLZF_UNOTZEROD 0x0001 194 195 #define FASTSLABREALLOC 0x02 196 197 /* 198 * Misc constants. Note that allocations that are exact multiples of 199 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 200 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 201 */ 202 #define MIN_CHUNK_SIZE 8 /* in bytes */ 203 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 204 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 205 206 /* 207 * The WEIRD_ADDR is used as known text to copy into free objects to 208 * try to create deterministic failure cases if the data is accessed after 209 * free. 210 * 211 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 212 * not be larger then 64. 213 */ 214 #define WEIRD_ADDR 0xdeadc0de 215 #define MAX_COPY sizeof(weirdary) 216 #define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) 217 218 #define BIGHSHIFT 10 /* bigalloc hash table */ 219 #define BIGHSIZE (1 << BIGHSHIFT) 220 #define BIGHMASK (BIGHSIZE - 1) 221 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 222 #define BIGXMASK (BIGXSIZE - 1) 223 224 #define SAFLAG_ZERO 0x0001 225 #define SAFLAG_PASSIVE 0x0002 226 227 /* 228 * Thread control 229 */ 230 231 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 232 233 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 234 _mpanic("assertion: %s in %s", \ 235 #exp, __func__); \ 236 } while (0) 237 238 /* 239 * Magazines 240 */ 241 242 #define M_MAX_ROUNDS 64 243 #define M_ZONE_ROUNDS 64 244 #define M_LOW_ROUNDS 32 245 #define M_INIT_ROUNDS 8 246 #define M_BURST_FACTOR 8 247 #define M_BURST_NSCALE 2 248 249 #define M_BURST 0x0001 250 #define M_BURST_EARLY 0x0002 251 252 struct magazine { 253 SLIST_ENTRY(magazine) nextmagazine; 254 255 int flags; 256 int capacity; /* Max rounds in this magazine */ 257 int rounds; /* Current number of free rounds */ 258 int burst_factor; /* Number of blocks to prefill with */ 259 int low_factor; /* Free till low_factor from full mag */ 260 void *objects[M_MAX_ROUNDS]; 261 }; 262 263 SLIST_HEAD(magazinelist, magazine); 264 265 static spinlock_t zone_mag_lock; 266 static struct magazine zone_magazine = { 267 .flags = M_BURST | M_BURST_EARLY, 268 .capacity = M_ZONE_ROUNDS, 269 .rounds = 0, 270 .burst_factor = M_BURST_FACTOR, 271 .low_factor = M_LOW_ROUNDS 272 }; 273 274 #define MAGAZINE_FULL(mp) (mp->rounds == mp->capacity) 275 #define MAGAZINE_NOTFULL(mp) (mp->rounds < mp->capacity) 276 #define MAGAZINE_EMPTY(mp) (mp->rounds == 0) 277 #define MAGAZINE_NOTEMPTY(mp) (mp->rounds != 0) 278 279 /* Each thread will have a pair of magazines per size-class (NZONES) 280 * The loaded magazine will support immediate allocations, the previous 281 * magazine will either be full or empty and can be swapped at need */ 282 typedef struct magazine_pair { 283 struct magazine *loaded; 284 struct magazine *prev; 285 } magazine_pair; 286 287 /* A depot is a collection of magazines for a single zone. */ 288 typedef struct magazine_depot { 289 struct magazinelist full; 290 struct magazinelist empty; 291 spinlock_t lock; 292 } magazine_depot; 293 294 typedef struct thr_mags { 295 magazine_pair mags[NZONES]; 296 struct magazine *newmag; 297 int init; 298 } thr_mags; 299 300 /* With this attribute set, do not require a function call for accessing 301 * this variable when the code is compiled -fPIC */ 302 #define TLS_ATTRIBUTE __attribute__ ((tls_model ("initial-exec"))); 303 304 static int mtmagazine_free_live; 305 static __thread thr_mags thread_mags TLS_ATTRIBUTE; 306 static pthread_key_t thread_mags_key; 307 static pthread_once_t thread_mags_once = PTHREAD_ONCE_INIT; 308 static magazine_depot depots[NZONES]; 309 310 /* 311 * Fixed globals (not per-cpu) 312 */ 313 static const int ZoneSize = ZALLOC_ZONE_SIZE; 314 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 315 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 316 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 317 318 static int opt_madvise = 0; 319 static int opt_utrace = 0; 320 static int g_malloc_flags = 0; 321 static struct slglobaldata SLGlobalData; 322 static bigalloc_t bigalloc_array[BIGHSIZE]; 323 static spinlock_t bigspin_array[BIGXSIZE]; 324 static int malloc_panic; 325 static int malloc_dummy_pointer; 326 327 static const int32_t weirdary[16] = { 328 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 329 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 330 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 331 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 332 }; 333 334 static void *_slaballoc(size_t size, int flags); 335 static void *_slabrealloc(void *ptr, size_t size); 336 static void _slabfree(void *ptr, int, bigalloc_t *); 337 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 338 static void _vmem_free(void *ptr, size_t bytes); 339 static void *magazine_alloc(struct magazine *, int *); 340 static int magazine_free(struct magazine *, void *); 341 static void *mtmagazine_alloc(int zi); 342 static int mtmagazine_free(int zi, void *); 343 static void mtmagazine_init(void); 344 static void mtmagazine_destructor(void *); 345 static slzone_t zone_alloc(int flags); 346 static void zone_free(void *z); 347 static void _mpanic(const char *ctl, ...); 348 static void malloc_init(void) __constructor(0); 349 #if defined(INVARIANTS) 350 static void chunk_mark_allocated(slzone_t z, void *chunk); 351 static void chunk_mark_free(slzone_t z, void *chunk); 352 #endif 353 354 struct nmalloc_utrace { 355 void *p; 356 size_t s; 357 void *r; 358 }; 359 360 #define UTRACE(a, b, c) \ 361 if (opt_utrace) { \ 362 struct nmalloc_utrace ut = { \ 363 .p = (a), \ 364 .s = (b), \ 365 .r = (c) \ 366 }; \ 367 utrace(&ut, sizeof(ut)); \ 368 } 369 370 #ifdef INVARIANTS 371 /* 372 * If enabled any memory allocated without M_ZERO is initialized to -1. 373 */ 374 static int use_malloc_pattern; 375 #endif 376 377 static void 378 malloc_init(void) 379 { 380 const char *p = NULL; 381 382 if (issetugid() == 0) 383 p = getenv("MALLOC_OPTIONS"); 384 385 for (; p != NULL && *p != '\0'; p++) { 386 switch(*p) { 387 case 'u': opt_utrace = 0; break; 388 case 'U': opt_utrace = 1; break; 389 case 'h': opt_madvise = 0; break; 390 case 'H': opt_madvise = 1; break; 391 case 'z': g_malloc_flags = 0; break; 392 case 'Z': g_malloc_flags = SAFLAG_ZERO; break; 393 default: 394 break; 395 } 396 } 397 398 UTRACE((void *) -1, 0, NULL); 399 } 400 401 /* 402 * We have to install a handler for nmalloc thread teardowns when 403 * the thread is created. We cannot delay this because destructors in 404 * sophisticated userland programs can call malloc() for the first time 405 * during their thread exit. 406 * 407 * This routine is called directly from pthreads. 408 */ 409 void 410 _nmalloc_thr_init(void) 411 { 412 thr_mags *tp; 413 414 /* 415 * Disallow mtmagazine operations until the mtmagazine is 416 * initialized. 417 */ 418 tp = &thread_mags; 419 tp->init = -1; 420 421 pthread_setspecific(thread_mags_key, tp); 422 if (mtmagazine_free_live == 0) { 423 mtmagazine_free_live = 1; 424 pthread_once(&thread_mags_once, mtmagazine_init); 425 } 426 tp->init = 1; 427 } 428 429 /* 430 * Thread locks. 431 */ 432 static __inline void 433 slgd_lock(slglobaldata_t slgd) 434 { 435 if (__isthreaded) 436 _SPINLOCK(&slgd->Spinlock); 437 } 438 439 static __inline void 440 slgd_unlock(slglobaldata_t slgd) 441 { 442 if (__isthreaded) 443 _SPINUNLOCK(&slgd->Spinlock); 444 } 445 446 static __inline void 447 depot_lock(magazine_depot *dp) 448 { 449 if (__isthreaded) 450 _SPINLOCK(&dp->lock); 451 } 452 453 static __inline void 454 depot_unlock(magazine_depot *dp) 455 { 456 if (__isthreaded) 457 _SPINUNLOCK(&dp->lock); 458 } 459 460 static __inline void 461 zone_magazine_lock(void) 462 { 463 if (__isthreaded) 464 _SPINLOCK(&zone_mag_lock); 465 } 466 467 static __inline void 468 zone_magazine_unlock(void) 469 { 470 if (__isthreaded) 471 _SPINUNLOCK(&zone_mag_lock); 472 } 473 474 static __inline void 475 swap_mags(magazine_pair *mp) 476 { 477 struct magazine *tmp; 478 tmp = mp->loaded; 479 mp->loaded = mp->prev; 480 mp->prev = tmp; 481 } 482 483 /* 484 * bigalloc hashing and locking support. 485 * 486 * Return an unmasked hash code for the passed pointer. 487 */ 488 static __inline int 489 _bigalloc_hash(void *ptr) 490 { 491 int hv; 492 493 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 494 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 495 496 return(hv); 497 } 498 499 /* 500 * Lock the hash chain and return a pointer to its base for the specified 501 * address. 502 */ 503 static __inline bigalloc_t * 504 bigalloc_lock(void *ptr) 505 { 506 int hv = _bigalloc_hash(ptr); 507 bigalloc_t *bigp; 508 509 bigp = &bigalloc_array[hv & BIGHMASK]; 510 if (__isthreaded) 511 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 512 return(bigp); 513 } 514 515 /* 516 * Lock the hash chain and return a pointer to its base for the specified 517 * address. 518 * 519 * BUT, if the hash chain is empty, just return NULL and do not bother 520 * to lock anything. 521 */ 522 static __inline bigalloc_t * 523 bigalloc_check_and_lock(void *ptr) 524 { 525 int hv = _bigalloc_hash(ptr); 526 bigalloc_t *bigp; 527 528 bigp = &bigalloc_array[hv & BIGHMASK]; 529 if (*bigp == NULL) 530 return(NULL); 531 if (__isthreaded) { 532 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 533 } 534 return(bigp); 535 } 536 537 static __inline void 538 bigalloc_unlock(void *ptr) 539 { 540 int hv; 541 542 if (__isthreaded) { 543 hv = _bigalloc_hash(ptr); 544 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 545 } 546 } 547 548 /* 549 * Calculate the zone index for the allocation request size and set the 550 * allocation request size to that particular zone's chunk size. 551 */ 552 static __inline int 553 zoneindex(size_t *bytes, size_t *chunking) 554 { 555 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 556 if (n < 128) { 557 *bytes = n = (n + 7) & ~7; 558 *chunking = 8; 559 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 560 } 561 if (n < 256) { 562 *bytes = n = (n + 15) & ~15; 563 *chunking = 16; 564 return(n / 16 + 7); 565 } 566 if (n < 8192) { 567 if (n < 512) { 568 *bytes = n = (n + 31) & ~31; 569 *chunking = 32; 570 return(n / 32 + 15); 571 } 572 if (n < 1024) { 573 *bytes = n = (n + 63) & ~63; 574 *chunking = 64; 575 return(n / 64 + 23); 576 } 577 if (n < 2048) { 578 *bytes = n = (n + 127) & ~127; 579 *chunking = 128; 580 return(n / 128 + 31); 581 } 582 if (n < 4096) { 583 *bytes = n = (n + 255) & ~255; 584 *chunking = 256; 585 return(n / 256 + 39); 586 } 587 *bytes = n = (n + 511) & ~511; 588 *chunking = 512; 589 return(n / 512 + 47); 590 } 591 #if ZALLOC_ZONE_LIMIT > 8192 592 if (n < 16384) { 593 *bytes = n = (n + 1023) & ~1023; 594 *chunking = 1024; 595 return(n / 1024 + 55); 596 } 597 #endif 598 #if ZALLOC_ZONE_LIMIT > 16384 599 if (n < 32768) { 600 *bytes = n = (n + 2047) & ~2047; 601 *chunking = 2048; 602 return(n / 2048 + 63); 603 } 604 #endif 605 _mpanic("Unexpected byte count %d", n); 606 return(0); 607 } 608 609 /* 610 * malloc() - call internal slab allocator 611 */ 612 void * 613 malloc(size_t size) 614 { 615 void *ptr; 616 617 ptr = _slaballoc(size, 0); 618 if (ptr == NULL) 619 errno = ENOMEM; 620 else 621 UTRACE(0, size, ptr); 622 return(ptr); 623 } 624 625 /* 626 * calloc() - call internal slab allocator 627 */ 628 void * 629 calloc(size_t number, size_t size) 630 { 631 void *ptr; 632 633 ptr = _slaballoc(number * size, SAFLAG_ZERO); 634 if (ptr == NULL) 635 errno = ENOMEM; 636 else 637 UTRACE(0, number * size, ptr); 638 return(ptr); 639 } 640 641 /* 642 * realloc() (SLAB ALLOCATOR) 643 * 644 * We do not attempt to optimize this routine beyond reusing the same 645 * pointer if the new size fits within the chunking of the old pointer's 646 * zone. 647 */ 648 void * 649 realloc(void *ptr, size_t size) 650 { 651 void *ret; 652 ret = _slabrealloc(ptr, size); 653 if (ret == NULL) 654 errno = ENOMEM; 655 else 656 UTRACE(ptr, size, ret); 657 return(ret); 658 } 659 660 /* 661 * posix_memalign() 662 * 663 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 664 * is a power of 2 >= sizeof(void *). 665 * 666 * The slab allocator will allocate on power-of-2 boundaries up to 667 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 668 * zone matching the requirements, and _vmem_alloc() otherwise. 669 */ 670 int 671 posix_memalign(void **memptr, size_t alignment, size_t size) 672 { 673 bigalloc_t *bigp; 674 bigalloc_t big; 675 size_t chunking; 676 int zi; 677 678 /* 679 * OpenGroup spec issue 6 checks 680 */ 681 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 682 *memptr = NULL; 683 return(EINVAL); 684 } 685 if (alignment < sizeof(void *)) { 686 *memptr = NULL; 687 return(EINVAL); 688 } 689 690 /* 691 * Our zone mechanism guarantees same-sized alignment for any 692 * power-of-2 allocation. If size is a power-of-2 and reasonable 693 * we can just call _slaballoc() and be done. We round size up 694 * to the nearest alignment boundary to improve our odds of 695 * it becoming a power-of-2 if it wasn't before. 696 */ 697 if (size <= alignment) 698 size = alignment; 699 else 700 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 701 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 702 *memptr = _slaballoc(size, 0); 703 return(*memptr ? 0 : ENOMEM); 704 } 705 706 /* 707 * Otherwise locate a zone with a chunking that matches 708 * the requested alignment, within reason. Consider two cases: 709 * 710 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 711 * we find will be the best fit because the chunking will be 712 * greater or equal to the alignment. 713 * 714 * (2) A 513 allocation on a 256-byte alignment. In this case 715 * the first zoneindex we find will be for 576 byte allocations 716 * with a chunking of 64, which is not sufficient. To fix this 717 * we simply find the nearest power-of-2 >= size and use the 718 * same side-effect of _slaballoc() which guarantees 719 * same-alignment on a power-of-2 allocation. 720 */ 721 if (size < PAGE_SIZE) { 722 zi = zoneindex(&size, &chunking); 723 if (chunking >= alignment) { 724 *memptr = _slaballoc(size, 0); 725 return(*memptr ? 0 : ENOMEM); 726 } 727 if (size >= 1024) 728 alignment = 1024; 729 if (size >= 16384) 730 alignment = 16384; 731 while (alignment < size) 732 alignment <<= 1; 733 *memptr = _slaballoc(alignment, 0); 734 return(*memptr ? 0 : ENOMEM); 735 } 736 737 /* 738 * If the slab allocator cannot handle it use vmem_alloc(). 739 * 740 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 741 */ 742 if (alignment < PAGE_SIZE) 743 alignment = PAGE_SIZE; 744 if (size < alignment) 745 size = alignment; 746 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 747 *memptr = _vmem_alloc(size, alignment, 0); 748 if (*memptr == NULL) 749 return(ENOMEM); 750 751 big = _slaballoc(sizeof(struct bigalloc), 0); 752 if (big == NULL) { 753 _vmem_free(*memptr, size); 754 *memptr = NULL; 755 return(ENOMEM); 756 } 757 bigp = bigalloc_lock(*memptr); 758 big->base = *memptr; 759 big->bytes = size; 760 big->next = *bigp; 761 *bigp = big; 762 bigalloc_unlock(*memptr); 763 764 return(0); 765 } 766 767 /* 768 * free() (SLAB ALLOCATOR) - do the obvious 769 */ 770 void 771 free(void *ptr) 772 { 773 UTRACE(ptr, 0, 0); 774 _slabfree(ptr, 0, NULL); 775 } 776 777 /* 778 * _slaballoc() (SLAB ALLOCATOR) 779 * 780 * Allocate memory via the slab allocator. If the request is too large, 781 * or if it page-aligned beyond a certain size, we fall back to the 782 * KMEM subsystem 783 */ 784 static void * 785 _slaballoc(size_t size, int flags) 786 { 787 slzone_t z; 788 slchunk_t chunk; 789 slglobaldata_t slgd; 790 size_t chunking; 791 int zi; 792 #ifdef INVARIANTS 793 int i; 794 #endif 795 int off; 796 void *obj; 797 798 /* 799 * Handle the degenerate size == 0 case. Yes, this does happen. 800 * Return a special pointer. This is to maintain compatibility with 801 * the original malloc implementation. Certain devices, such as the 802 * adaptec driver, not only allocate 0 bytes, they check for NULL and 803 * also realloc() later on. Joy. 804 */ 805 if (size == 0) 806 return(ZERO_LENGTH_PTR); 807 808 /* Capture global flags */ 809 flags |= g_malloc_flags; 810 811 /* 812 * Handle large allocations directly. There should not be very many 813 * of these so performance is not a big issue. 814 * 815 * The backend allocator is pretty nasty on a SMP system. Use the 816 * slab allocator for one and two page-sized chunks even though we 817 * lose some efficiency. 818 */ 819 if (size >= ZoneLimit || 820 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 821 bigalloc_t big; 822 bigalloc_t *bigp; 823 824 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 825 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 826 if (chunk == NULL) 827 return(NULL); 828 829 big = _slaballoc(sizeof(struct bigalloc), 0); 830 if (big == NULL) { 831 _vmem_free(chunk, size); 832 return(NULL); 833 } 834 bigp = bigalloc_lock(chunk); 835 big->base = chunk; 836 big->bytes = size; 837 big->next = *bigp; 838 *bigp = big; 839 bigalloc_unlock(chunk); 840 841 return(chunk); 842 } 843 844 /* Compute allocation zone; zoneindex will panic on excessive sizes */ 845 zi = zoneindex(&size, &chunking); 846 MASSERT(zi < NZONES); 847 848 obj = mtmagazine_alloc(zi); 849 if (obj != NULL) { 850 if (flags & SAFLAG_ZERO) 851 bzero(obj, size); 852 return (obj); 853 } 854 855 slgd = &SLGlobalData; 856 slgd_lock(slgd); 857 858 /* 859 * Attempt to allocate out of an existing zone. If all zones are 860 * exhausted pull one off the free list or allocate a new one. 861 */ 862 if ((z = slgd->ZoneAry[zi]) == NULL) { 863 z = zone_alloc(flags); 864 if (z == NULL) 865 goto fail; 866 867 /* 868 * How big is the base structure? 869 */ 870 #if defined(INVARIANTS) 871 /* 872 * Make room for z_Bitmap. An exact calculation is 873 * somewhat more complicated so don't make an exact 874 * calculation. 875 */ 876 off = offsetof(struct slzone, 877 z_Bitmap[(ZoneSize / size + 31) / 32]); 878 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 879 #else 880 off = sizeof(struct slzone); 881 #endif 882 883 /* 884 * Align the storage in the zone based on the chunking. 885 * 886 * Guarantee power-of-2 alignment for power-of-2-sized 887 * chunks. Otherwise align based on the chunking size 888 * (typically 8 or 16 bytes for small allocations). 889 * 890 * NOTE: Allocations >= ZoneLimit are governed by the 891 * bigalloc code and typically only guarantee page-alignment. 892 * 893 * Set initial conditions for UIndex near the zone header 894 * to reduce unecessary page faults, vs semi-randomization 895 * to improve L1 cache saturation. 896 */ 897 if ((size | (size - 1)) + 1 == (size << 1)) 898 off = (off + size - 1) & ~(size - 1); 899 else 900 off = (off + chunking - 1) & ~(chunking - 1); 901 z->z_Magic = ZALLOC_SLAB_MAGIC; 902 z->z_ZoneIndex = zi; 903 z->z_NMax = (ZoneSize - off) / size; 904 z->z_NFree = z->z_NMax; 905 z->z_BasePtr = (char *)z + off; 906 z->z_UIndex = z->z_UEndIndex = 0; 907 z->z_ChunkSize = size; 908 z->z_FirstFreePg = ZonePageCount; 909 z->z_Next = slgd->ZoneAry[zi]; 910 slgd->ZoneAry[zi] = z; 911 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 912 flags &= ~SAFLAG_ZERO; /* already zero'd */ 913 flags |= SAFLAG_PASSIVE; 914 } 915 916 /* 917 * Slide the base index for initial allocations out of the 918 * next zone we create so we do not over-weight the lower 919 * part of the cpu memory caches. 920 */ 921 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 922 & (ZALLOC_MAX_ZONE_SIZE - 1); 923 } 924 925 /* 926 * Ok, we have a zone from which at least one chunk is available. 927 * 928 * Remove us from the ZoneAry[] when we become empty 929 */ 930 MASSERT(z->z_NFree > 0); 931 932 if (--z->z_NFree == 0) { 933 slgd->ZoneAry[zi] = z->z_Next; 934 z->z_Next = NULL; 935 } 936 937 /* 938 * Locate a chunk in a free page. This attempts to localize 939 * reallocations into earlier pages without us having to sort 940 * the chunk list. A chunk may still overlap a page boundary. 941 */ 942 while (z->z_FirstFreePg < ZonePageCount) { 943 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 944 #ifdef DIAGNOSTIC 945 /* 946 * Diagnostic: c_Next is not total garbage. 947 */ 948 MASSERT(chunk->c_Next == NULL || 949 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 950 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 951 #endif 952 #ifdef INVARIANTS 953 chunk_mark_allocated(z, chunk); 954 #endif 955 MASSERT((uintptr_t)chunk & ZoneMask); 956 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 957 goto done; 958 } 959 ++z->z_FirstFreePg; 960 } 961 962 /* 963 * No chunks are available but NFree said we had some memory, 964 * so it must be available in the never-before-used-memory 965 * area governed by UIndex. The consequences are very 966 * serious if our zone got corrupted so we use an explicit 967 * panic rather then a KASSERT. 968 */ 969 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 970 971 if (++z->z_UIndex == z->z_NMax) 972 z->z_UIndex = 0; 973 if (z->z_UIndex == z->z_UEndIndex) { 974 if (z->z_NFree != 0) 975 _mpanic("slaballoc: corrupted zone"); 976 } 977 978 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 979 flags &= ~SAFLAG_ZERO; 980 flags |= SAFLAG_PASSIVE; 981 } 982 #if defined(INVARIANTS) 983 chunk_mark_allocated(z, chunk); 984 #endif 985 986 done: 987 slgd_unlock(slgd); 988 if (flags & SAFLAG_ZERO) { 989 bzero(chunk, size); 990 #ifdef INVARIANTS 991 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 992 if (use_malloc_pattern) { 993 for (i = 0; i < size; i += sizeof(int)) { 994 *(int *)((char *)chunk + i) = -1; 995 } 996 } 997 /* avoid accidental double-free check */ 998 chunk->c_Next = (void *)-1; 999 #endif 1000 } 1001 return(chunk); 1002 fail: 1003 slgd_unlock(slgd); 1004 return(NULL); 1005 } 1006 1007 /* 1008 * Reallocate memory within the chunk 1009 */ 1010 static void * 1011 _slabrealloc(void *ptr, size_t size) 1012 { 1013 bigalloc_t *bigp; 1014 void *nptr; 1015 slzone_t z; 1016 size_t chunking; 1017 1018 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) { 1019 return(_slaballoc(size, 0)); 1020 } 1021 1022 if (size == 0) { 1023 free(ptr); 1024 return(ZERO_LENGTH_PTR); 1025 } 1026 1027 /* 1028 * Handle oversized allocations. 1029 */ 1030 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1031 bigalloc_t big; 1032 size_t bigbytes; 1033 1034 while ((big = *bigp) != NULL) { 1035 if (big->base == ptr) { 1036 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 1037 bigbytes = big->bytes; 1038 if (bigbytes == size) { 1039 bigalloc_unlock(ptr); 1040 return(ptr); 1041 } 1042 *bigp = big->next; 1043 bigalloc_unlock(ptr); 1044 if ((nptr = _slaballoc(size, 0)) == NULL) { 1045 /* Relink block */ 1046 bigp = bigalloc_lock(ptr); 1047 big->next = *bigp; 1048 *bigp = big; 1049 bigalloc_unlock(ptr); 1050 return(NULL); 1051 } 1052 if (size > bigbytes) 1053 size = bigbytes; 1054 bcopy(ptr, nptr, size); 1055 _slabfree(ptr, FASTSLABREALLOC, &big); 1056 return(nptr); 1057 } 1058 bigp = &big->next; 1059 } 1060 bigalloc_unlock(ptr); 1061 } 1062 1063 /* 1064 * Get the original allocation's zone. If the new request winds 1065 * up using the same chunk size we do not have to do anything. 1066 * 1067 * NOTE: We don't have to lock the globaldata here, the fields we 1068 * access here will not change at least as long as we have control 1069 * over the allocation. 1070 */ 1071 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1072 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1073 1074 /* 1075 * Use zoneindex() to chunk-align the new size, as long as the 1076 * new size is not too large. 1077 */ 1078 if (size < ZoneLimit) { 1079 zoneindex(&size, &chunking); 1080 if (z->z_ChunkSize == size) { 1081 return(ptr); 1082 } 1083 } 1084 1085 /* 1086 * Allocate memory for the new request size and copy as appropriate. 1087 */ 1088 if ((nptr = _slaballoc(size, 0)) != NULL) { 1089 if (size > z->z_ChunkSize) 1090 size = z->z_ChunkSize; 1091 bcopy(ptr, nptr, size); 1092 _slabfree(ptr, 0, NULL); 1093 } 1094 1095 return(nptr); 1096 } 1097 1098 /* 1099 * free (SLAB ALLOCATOR) 1100 * 1101 * Free a memory block previously allocated by malloc. Note that we do not 1102 * attempt to uplodate ks_loosememuse as MP races could prevent us from 1103 * checking memory limits in malloc. 1104 * 1105 * flags: 1106 * FASTSLABREALLOC Fast call from realloc, *rbigp already 1107 * unlinked. 1108 * 1109 * MPSAFE 1110 */ 1111 static void 1112 _slabfree(void *ptr, int flags, bigalloc_t *rbigp) 1113 { 1114 slzone_t z; 1115 slchunk_t chunk; 1116 bigalloc_t big; 1117 bigalloc_t *bigp; 1118 slglobaldata_t slgd; 1119 size_t size; 1120 int zi; 1121 int pgno; 1122 1123 /* Fast realloc path for big allocations */ 1124 if (flags & FASTSLABREALLOC) { 1125 big = *rbigp; 1126 goto fastslabrealloc; 1127 } 1128 1129 /* 1130 * Handle NULL frees and special 0-byte allocations 1131 */ 1132 if (ptr == NULL) 1133 return; 1134 if (ptr == ZERO_LENGTH_PTR) 1135 return; 1136 1137 /* 1138 * Handle oversized allocations. 1139 */ 1140 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 1141 while ((big = *bigp) != NULL) { 1142 if (big->base == ptr) { 1143 *bigp = big->next; 1144 bigalloc_unlock(ptr); 1145 fastslabrealloc: 1146 size = big->bytes; 1147 _slabfree(big, 0, NULL); 1148 #ifdef INVARIANTS 1149 MASSERT(sizeof(weirdary) <= size); 1150 bcopy(weirdary, ptr, sizeof(weirdary)); 1151 #endif 1152 _vmem_free(ptr, size); 1153 return; 1154 } 1155 bigp = &big->next; 1156 } 1157 bigalloc_unlock(ptr); 1158 } 1159 1160 /* 1161 * Zone case. Figure out the zone based on the fact that it is 1162 * ZoneSize aligned. 1163 */ 1164 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 1165 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 1166 1167 size = z->z_ChunkSize; 1168 zi = z->z_ZoneIndex; 1169 1170 if (g_malloc_flags & SAFLAG_ZERO) 1171 bzero(ptr, size); 1172 1173 if (mtmagazine_free(zi, ptr) == 0) 1174 return; 1175 1176 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 1177 chunk = ptr; 1178 slgd = &SLGlobalData; 1179 slgd_lock(slgd); 1180 1181 #ifdef INVARIANTS 1182 /* 1183 * Attempt to detect a double-free. To reduce overhead we only check 1184 * if there appears to be link pointer at the base of the data. 1185 */ 1186 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 1187 slchunk_t scan; 1188 1189 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 1190 if (scan == chunk) 1191 _mpanic("Double free at %p", chunk); 1192 } 1193 } 1194 chunk_mark_free(z, chunk); 1195 #endif 1196 1197 /* 1198 * Put weird data into the memory to detect modifications after 1199 * freeing, illegal pointer use after freeing (we should fault on 1200 * the odd address), and so forth. 1201 */ 1202 #ifdef INVARIANTS 1203 if (z->z_ChunkSize < sizeof(weirdary)) 1204 bcopy(weirdary, chunk, z->z_ChunkSize); 1205 else 1206 bcopy(weirdary, chunk, sizeof(weirdary)); 1207 #endif 1208 1209 /* 1210 * Add this free non-zero'd chunk to a linked list for reuse, adjust 1211 * z_FirstFreePg. 1212 */ 1213 chunk->c_Next = z->z_PageAry[pgno]; 1214 z->z_PageAry[pgno] = chunk; 1215 if (z->z_FirstFreePg > pgno) 1216 z->z_FirstFreePg = pgno; 1217 1218 /* 1219 * Bump the number of free chunks. If it becomes non-zero the zone 1220 * must be added back onto the appropriate list. 1221 */ 1222 if (z->z_NFree++ == 0) { 1223 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1224 slgd->ZoneAry[z->z_ZoneIndex] = z; 1225 } 1226 1227 /* 1228 * If the zone becomes totally free then release it. 1229 */ 1230 if (z->z_NFree == z->z_NMax) { 1231 slzone_t *pz; 1232 1233 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1234 while (z != *pz) 1235 pz = &(*pz)->z_Next; 1236 *pz = z->z_Next; 1237 z->z_Magic = -1; 1238 z->z_Next = NULL; 1239 zone_free(z); 1240 /* slgd lock released */ 1241 return; 1242 } 1243 slgd_unlock(slgd); 1244 } 1245 1246 #if defined(INVARIANTS) 1247 /* 1248 * Helper routines for sanity checks 1249 */ 1250 static 1251 void 1252 chunk_mark_allocated(slzone_t z, void *chunk) 1253 { 1254 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1255 __uint32_t *bitptr; 1256 1257 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1258 bitptr = &z->z_Bitmap[bitdex >> 5]; 1259 bitdex &= 31; 1260 MASSERT((*bitptr & (1 << bitdex)) == 0); 1261 *bitptr |= 1 << bitdex; 1262 } 1263 1264 static 1265 void 1266 chunk_mark_free(slzone_t z, void *chunk) 1267 { 1268 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1269 __uint32_t *bitptr; 1270 1271 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1272 bitptr = &z->z_Bitmap[bitdex >> 5]; 1273 bitdex &= 31; 1274 MASSERT((*bitptr & (1 << bitdex)) != 0); 1275 *bitptr &= ~(1 << bitdex); 1276 } 1277 1278 #endif 1279 1280 /* 1281 * Allocate and return a magazine. NULL is returned and *burst is adjusted 1282 * if the magazine is empty. 1283 */ 1284 static __inline void * 1285 magazine_alloc(struct magazine *mp, int *burst) 1286 { 1287 void *obj; 1288 1289 if (mp == NULL) 1290 return(NULL); 1291 if (MAGAZINE_NOTEMPTY(mp)) { 1292 obj = mp->objects[--mp->rounds]; 1293 return(obj); 1294 } 1295 1296 /* 1297 * Return burst factor to caller along with NULL 1298 */ 1299 if ((mp->flags & M_BURST) && (burst != NULL)) { 1300 *burst = mp->burst_factor; 1301 } 1302 /* Reduce burst factor by NSCALE; if it hits 1, disable BURST */ 1303 if ((mp->flags & M_BURST) && (mp->flags & M_BURST_EARLY) && 1304 (burst != NULL)) { 1305 mp->burst_factor -= M_BURST_NSCALE; 1306 if (mp->burst_factor <= 1) { 1307 mp->burst_factor = 1; 1308 mp->flags &= ~(M_BURST); 1309 mp->flags &= ~(M_BURST_EARLY); 1310 } 1311 } 1312 return (NULL); 1313 } 1314 1315 static __inline int 1316 magazine_free(struct magazine *mp, void *p) 1317 { 1318 if (mp != NULL && MAGAZINE_NOTFULL(mp)) { 1319 mp->objects[mp->rounds++] = p; 1320 return 0; 1321 } 1322 1323 return -1; 1324 } 1325 1326 static void * 1327 mtmagazine_alloc(int zi) 1328 { 1329 thr_mags *tp; 1330 struct magazine *mp, *emptymag; 1331 magazine_depot *d; 1332 void *obj; 1333 1334 /* 1335 * Do not try to access per-thread magazines while the mtmagazine 1336 * is being initialized or destroyed. 1337 */ 1338 tp = &thread_mags; 1339 if (tp->init < 0) 1340 return(NULL); 1341 1342 /* 1343 * Primary per-thread allocation loop 1344 */ 1345 for (;;) { 1346 /* 1347 * If the loaded magazine has rounds, allocate and return 1348 */ 1349 mp = tp->mags[zi].loaded; 1350 obj = magazine_alloc(mp, NULL); 1351 if (obj) 1352 break; 1353 1354 /* 1355 * If the prev magazine is full, swap with the loaded 1356 * magazine and retry. 1357 */ 1358 mp = tp->mags[zi].prev; 1359 if (mp && MAGAZINE_FULL(mp)) { 1360 MASSERT(mp->rounds != 0); 1361 swap_mags(&tp->mags[zi]); /* prev now empty */ 1362 continue; 1363 } 1364 1365 /* 1366 * Try to get a full magazine from the depot. Cycle 1367 * through depot(full)->loaded->prev->depot(empty). 1368 * Retry if a full magazine was available from the depot. 1369 * 1370 * Return NULL (caller will fall through) if no magazines 1371 * can be found anywhere. 1372 */ 1373 d = &depots[zi]; 1374 depot_lock(d); 1375 emptymag = tp->mags[zi].prev; 1376 if (emptymag) 1377 SLIST_INSERT_HEAD(&d->empty, emptymag, nextmagazine); 1378 tp->mags[zi].prev = tp->mags[zi].loaded; 1379 mp = SLIST_FIRST(&d->full); /* loaded magazine */ 1380 tp->mags[zi].loaded = mp; 1381 if (mp) { 1382 SLIST_REMOVE_HEAD(&d->full, nextmagazine); 1383 MASSERT(MAGAZINE_NOTEMPTY(mp)); 1384 depot_unlock(d); 1385 continue; 1386 } 1387 depot_unlock(d); 1388 break; 1389 } 1390 1391 return (obj); 1392 } 1393 1394 static int 1395 mtmagazine_free(int zi, void *ptr) 1396 { 1397 thr_mags *tp; 1398 struct magazine *mp, *loadedmag; 1399 magazine_depot *d; 1400 int rc = -1; 1401 1402 /* 1403 * Do not try to access per-thread magazines while the mtmagazine 1404 * is being initialized or destroyed. 1405 */ 1406 tp = &thread_mags; 1407 if (tp->init < 0) 1408 return(-1); 1409 1410 /* 1411 * Primary per-thread freeing loop 1412 */ 1413 for (;;) { 1414 /* 1415 * Make sure a new magazine is available in case we have 1416 * to use it. Staging the newmag allows us to avoid 1417 * some locking/reentrancy complexity. 1418 * 1419 * Temporarily disable the per-thread caches for this 1420 * allocation to avoid reentrancy and/or to avoid a 1421 * stack overflow if the [zi] happens to be the same that 1422 * would be used to allocate the new magazine. 1423 */ 1424 if (tp->newmag == NULL) { 1425 tp->init = -1; 1426 tp->newmag = _slaballoc(sizeof(struct magazine), 1427 SAFLAG_ZERO); 1428 tp->init = 1; 1429 if (tp->newmag == NULL) { 1430 rc = -1; 1431 break; 1432 } 1433 } 1434 1435 /* 1436 * If the loaded magazine has space, free directly to it 1437 */ 1438 rc = magazine_free(tp->mags[zi].loaded, ptr); 1439 if (rc == 0) 1440 break; 1441 1442 /* 1443 * If the prev magazine is empty, swap with the loaded 1444 * magazine and retry. 1445 */ 1446 mp = tp->mags[zi].prev; 1447 if (mp && MAGAZINE_EMPTY(mp)) { 1448 MASSERT(mp->rounds == 0); 1449 swap_mags(&tp->mags[zi]); /* prev now full */ 1450 continue; 1451 } 1452 1453 /* 1454 * Try to get an empty magazine from the depot. Cycle 1455 * through depot(empty)->loaded->prev->depot(full). 1456 * Retry if an empty magazine was available from the depot. 1457 */ 1458 d = &depots[zi]; 1459 depot_lock(d); 1460 1461 if ((loadedmag = tp->mags[zi].prev) != NULL) 1462 SLIST_INSERT_HEAD(&d->full, loadedmag, nextmagazine); 1463 tp->mags[zi].prev = tp->mags[zi].loaded; 1464 mp = SLIST_FIRST(&d->empty); 1465 if (mp) { 1466 tp->mags[zi].loaded = mp; 1467 SLIST_REMOVE_HEAD(&d->empty, nextmagazine); 1468 MASSERT(MAGAZINE_NOTFULL(mp)); 1469 } else { 1470 mp = tp->newmag; 1471 tp->newmag = NULL; 1472 mp->capacity = M_MAX_ROUNDS; 1473 mp->rounds = 0; 1474 mp->flags = 0; 1475 tp->mags[zi].loaded = mp; 1476 } 1477 depot_unlock(d); 1478 } 1479 1480 return rc; 1481 } 1482 1483 static void 1484 mtmagazine_init(void) 1485 { 1486 int error; 1487 1488 error = pthread_key_create(&thread_mags_key, mtmagazine_destructor); 1489 if (error) 1490 abort(); 1491 } 1492 1493 /* 1494 * This function is only used by the thread exit destructor 1495 */ 1496 static void 1497 mtmagazine_drain(struct magazine *mp) 1498 { 1499 void *obj; 1500 1501 while (MAGAZINE_NOTEMPTY(mp)) { 1502 obj = magazine_alloc(mp, NULL); 1503 _slabfree(obj, 0, NULL); 1504 } 1505 } 1506 1507 /* 1508 * mtmagazine_destructor() 1509 * 1510 * When a thread exits, we reclaim all its resources; all its magazines are 1511 * drained and the structures are freed. 1512 * 1513 * WARNING! The destructor can be called multiple times if the larger user 1514 * program has its own destructors which run after ours which 1515 * allocate or free memory. 1516 */ 1517 static void 1518 mtmagazine_destructor(void *thrp) 1519 { 1520 thr_mags *tp = thrp; 1521 struct magazine *mp; 1522 int i; 1523 1524 /* 1525 * Prevent further use of mtmagazines while we are destructing 1526 * them, as well as for any destructors which are run after us 1527 * prior to the thread actually being destroyed. 1528 */ 1529 tp->init = -1; 1530 1531 for (i = 0; i < NZONES; i++) { 1532 mp = tp->mags[i].loaded; 1533 tp->mags[i].loaded = NULL; 1534 if (mp) { 1535 if (MAGAZINE_NOTEMPTY(mp)) 1536 mtmagazine_drain(mp); 1537 _slabfree(mp, 0, NULL); 1538 } 1539 1540 mp = tp->mags[i].prev; 1541 tp->mags[i].prev = NULL; 1542 if (mp) { 1543 if (MAGAZINE_NOTEMPTY(mp)) 1544 mtmagazine_drain(mp); 1545 _slabfree(mp, 0, NULL); 1546 } 1547 } 1548 1549 if (tp->newmag) { 1550 mp = tp->newmag; 1551 tp->newmag = NULL; 1552 _slabfree(mp, 0, NULL); 1553 } 1554 } 1555 1556 /* 1557 * zone_alloc() 1558 * 1559 * Attempt to allocate a zone from the zone magazine; the zone magazine has 1560 * M_BURST_EARLY enabled, so honor the burst request from the magazine. 1561 */ 1562 static slzone_t 1563 zone_alloc(int flags) 1564 { 1565 slglobaldata_t slgd = &SLGlobalData; 1566 int burst = 1; 1567 int i, j; 1568 slzone_t z; 1569 1570 zone_magazine_lock(); 1571 slgd_unlock(slgd); 1572 1573 z = magazine_alloc(&zone_magazine, &burst); 1574 if (z == NULL && burst == 1) { 1575 zone_magazine_unlock(); 1576 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1577 } else if (z == NULL) { 1578 z = _vmem_alloc(ZoneSize * burst, ZoneSize, flags); 1579 if (z) { 1580 for (i = 1; i < burst; i++) { 1581 j = magazine_free(&zone_magazine, 1582 (char *) z + (ZoneSize * i)); 1583 MASSERT(j == 0); 1584 } 1585 } 1586 zone_magazine_unlock(); 1587 } else { 1588 z->z_Flags |= SLZF_UNOTZEROD; 1589 zone_magazine_unlock(); 1590 } 1591 slgd_lock(slgd); 1592 return z; 1593 } 1594 1595 /* 1596 * zone_free() 1597 * 1598 * Release a zone and unlock the slgd lock. 1599 */ 1600 static void 1601 zone_free(void *z) 1602 { 1603 slglobaldata_t slgd = &SLGlobalData; 1604 void *excess[M_ZONE_ROUNDS - M_LOW_ROUNDS] = {}; 1605 int i, j; 1606 1607 zone_magazine_lock(); 1608 slgd_unlock(slgd); 1609 1610 bzero(z, sizeof(struct slzone)); 1611 1612 if (opt_madvise) 1613 madvise(z, ZoneSize, MADV_FREE); 1614 1615 i = magazine_free(&zone_magazine, z); 1616 1617 /* 1618 * If we failed to free, collect excess magazines; release the zone 1619 * magazine lock, and then free to the system via _vmem_free. Re-enable 1620 * BURST mode for the magazine. 1621 */ 1622 if (i == -1) { 1623 j = zone_magazine.rounds - zone_magazine.low_factor; 1624 for (i = 0; i < j; i++) { 1625 excess[i] = magazine_alloc(&zone_magazine, NULL); 1626 MASSERT(excess[i] != NULL); 1627 } 1628 1629 zone_magazine_unlock(); 1630 1631 for (i = 0; i < j; i++) 1632 _vmem_free(excess[i], ZoneSize); 1633 1634 _vmem_free(z, ZoneSize); 1635 } else { 1636 zone_magazine_unlock(); 1637 } 1638 } 1639 1640 /* 1641 * _vmem_alloc() 1642 * 1643 * Directly map memory in PAGE_SIZE'd chunks with the specified 1644 * alignment. 1645 * 1646 * Alignment must be a multiple of PAGE_SIZE. 1647 * 1648 * Size must be >= alignment. 1649 */ 1650 static void * 1651 _vmem_alloc(size_t size, size_t align, int flags) 1652 { 1653 char *addr; 1654 char *save; 1655 size_t excess; 1656 1657 /* 1658 * Map anonymous private memory. 1659 */ 1660 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1661 MAP_PRIVATE|MAP_ANON, -1, 0); 1662 if (addr == MAP_FAILED) 1663 return(NULL); 1664 1665 /* 1666 * Check alignment. The misaligned offset is also the excess 1667 * amount. If misaligned unmap the excess so we have a chance of 1668 * mapping at the next alignment point and recursively try again. 1669 * 1670 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1671 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1672 * xxxxxxxxx final excess calculation 1673 * ^ returned address 1674 */ 1675 excess = (uintptr_t)addr & (align - 1); 1676 1677 if (excess) { 1678 excess = align - excess; 1679 save = addr; 1680 1681 munmap(save + excess, size - excess); 1682 addr = _vmem_alloc(size, align, flags); 1683 munmap(save, excess); 1684 } 1685 return((void *)addr); 1686 } 1687 1688 /* 1689 * _vmem_free() 1690 * 1691 * Free a chunk of memory allocated with _vmem_alloc() 1692 */ 1693 static void 1694 _vmem_free(void *ptr, size_t size) 1695 { 1696 munmap(ptr, size); 1697 } 1698 1699 /* 1700 * Panic on fatal conditions 1701 */ 1702 static void 1703 _mpanic(const char *ctl, ...) 1704 { 1705 va_list va; 1706 1707 if (malloc_panic == 0) { 1708 malloc_panic = 1; 1709 va_start(va, ctl); 1710 vfprintf(stderr, ctl, va); 1711 fprintf(stderr, "\n"); 1712 fflush(stderr); 1713 va_end(va); 1714 } 1715 abort(); 1716 } 1717