1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a slab allocator drop-in replacement for the 38 * libc malloc(). 39 * 40 * A slab allocator reserves a ZONE for each chunk size, then lays the 41 * chunks out in an array within the zone. Allocation and deallocation 42 * is nearly instantanious, and overhead losses are limited to a fixed 43 * worst-case amount. 44 * 45 * The slab allocator does not have to pre-initialize the list of 46 * free chunks for each zone, and the underlying VM will not be 47 * touched at all beyond the zone header until an actual allocation 48 * needs it. 49 * 50 * Slab management and locking is done on a per-zone basis. 51 * 52 * Alloc Size Chunking Number of zones 53 * 0-127 8 16 54 * 128-255 16 8 55 * 256-511 32 8 56 * 512-1023 64 8 57 * 1024-2047 128 8 58 * 2048-4095 256 8 59 * 4096-8191 512 8 60 * 8192-16383 1024 8 61 * 16384-32767 2048 8 62 * 63 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 64 * is used to locate for free. One and Two-page allocations use the 65 * zone mechanic to avoid excessive mmap()/munmap() calls. 66 * 67 * API FEATURES AND SIDE EFFECTS 68 * 69 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 70 * Above that power-of-2 sized allocations are page-aligned. Non 71 * power-of-2 sized allocations are aligned the same as the chunk 72 * size for their zone. 73 * + malloc(0) returns a special non-NULL value 74 * + ability to allocate arbitrarily large chunks of memory 75 * + realloc will reuse the passed pointer if possible, within the 76 * limitations of the zone chunking. 77 */ 78 79 #include "libc_private.h" 80 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/mman.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <stdarg.h> 87 #include <stddef.h> 88 #include <unistd.h> 89 #include <string.h> 90 #include <fcntl.h> 91 #include <errno.h> 92 93 #include "spinlock.h" 94 #include "un-namespace.h" 95 96 /* 97 * Linked list of large allocations 98 */ 99 typedef struct bigalloc { 100 struct bigalloc *next; /* hash link */ 101 void *base; /* base pointer */ 102 u_long bytes; /* bytes allocated */ 103 u_long unused01; 104 } *bigalloc_t; 105 106 /* 107 * Note that any allocations which are exact multiples of PAGE_SIZE, or 108 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 109 */ 110 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 111 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 112 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 113 #define ZALLOC_ZONE_SIZE (64 * 1024) 114 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 115 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 116 117 #if ZALLOC_ZONE_LIMIT == 16384 118 #define NZONES 72 119 #elif ZALLOC_ZONE_LIMIT == 32768 120 #define NZONES 80 121 #else 122 #error "I couldn't figure out NZONES" 123 #endif 124 125 /* 126 * Chunk structure for free elements 127 */ 128 typedef struct slchunk { 129 struct slchunk *c_Next; 130 } *slchunk_t; 131 132 /* 133 * The IN-BAND zone header is placed at the beginning of each zone. 134 */ 135 struct slglobaldata; 136 137 typedef struct slzone { 138 __int32_t z_Magic; /* magic number for sanity check */ 139 int z_NFree; /* total free chunks / ualloc space */ 140 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 141 struct slglobaldata *z_GlobalData; 142 int z_NMax; /* maximum free chunks */ 143 char *z_BasePtr; /* pointer to start of chunk array */ 144 int z_UIndex; /* current initial allocation index */ 145 int z_UEndIndex; /* last (first) allocation index */ 146 int z_ChunkSize; /* chunk size for validation */ 147 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 148 int z_ZoneIndex; 149 int z_Flags; 150 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 151 #if defined(INVARIANTS) 152 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 153 #endif 154 } *slzone_t; 155 156 typedef struct slglobaldata { 157 spinlock_t Spinlock; 158 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 159 slzone_t FreeZones; /* whole zones that have become free */ 160 int NFreeZones; /* free zone count */ 161 int JunkIndex; 162 } *slglobaldata_t; 163 164 #define SLZF_UNOTZEROD 0x0001 165 166 /* 167 * Misc constants. Note that allocations that are exact multiples of 168 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 169 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 170 */ 171 #define MIN_CHUNK_SIZE 8 /* in bytes */ 172 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 173 #define ZONE_RELS_THRESH 4 /* threshold number of zones */ 174 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 175 176 /* 177 * The WEIRD_ADDR is used as known text to copy into free objects to 178 * try to create deterministic failure cases if the data is accessed after 179 * free. 180 * 181 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 182 * not be larger then 64. 183 */ 184 #define WEIRD_ADDR 0xdeadc0de 185 #define MAX_COPY sizeof(weirdary) 186 #define ZERO_LENGTH_PTR ((void *)&malloc_dummy_pointer) 187 188 #define BIGHSHIFT 10 /* bigalloc hash table */ 189 #define BIGHSIZE (1 << BIGHSHIFT) 190 #define BIGHMASK (BIGHSIZE - 1) 191 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 192 #define BIGXMASK (BIGXSIZE - 1) 193 194 #define SLGD_MAX 4 /* parallel allocations */ 195 196 #define SAFLAG_ZERO 0x0001 197 #define SAFLAG_PASSIVE 0x0002 198 199 /* 200 * Thread control 201 */ 202 203 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 204 205 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 206 _mpanic("assertion: %s in %s", \ 207 #exp, __func__); \ 208 } while (0) 209 210 /* 211 * Fixed globals (not per-cpu) 212 */ 213 static const int ZoneSize = ZALLOC_ZONE_SIZE; 214 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 215 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 216 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 217 218 static struct slglobaldata SLGlobalData[SLGD_MAX]; 219 static bigalloc_t bigalloc_array[BIGHSIZE]; 220 static spinlock_t bigspin_array[BIGXSIZE]; 221 static int malloc_panic; 222 static int malloc_dummy_pointer; 223 224 static const int32_t weirdary[16] = { 225 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 226 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 227 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 228 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 229 }; 230 231 static __thread slglobaldata_t LastSLGD = &SLGlobalData[0]; 232 233 static void *_slaballoc(size_t size, int flags); 234 static void *_slabrealloc(void *ptr, size_t size); 235 static void _slabfree(void *ptr); 236 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 237 static void _vmem_free(void *ptr, size_t bytes); 238 static void _mpanic(const char *ctl, ...); 239 #if defined(INVARIANTS) 240 static void chunk_mark_allocated(slzone_t z, void *chunk); 241 static void chunk_mark_free(slzone_t z, void *chunk); 242 #endif 243 244 #ifdef INVARIANTS 245 /* 246 * If enabled any memory allocated without M_ZERO is initialized to -1. 247 */ 248 static int use_malloc_pattern; 249 #endif 250 251 /* 252 * Thread locks. 253 * 254 * NOTE: slgd_trylock() returns 0 or EBUSY 255 */ 256 static __inline void 257 slgd_lock(slglobaldata_t slgd) 258 { 259 if (__isthreaded) 260 _SPINLOCK(&slgd->Spinlock); 261 } 262 263 static __inline int 264 slgd_trylock(slglobaldata_t slgd) 265 { 266 if (__isthreaded) 267 return(_SPINTRYLOCK(&slgd->Spinlock)); 268 return(0); 269 } 270 271 static __inline void 272 slgd_unlock(slglobaldata_t slgd) 273 { 274 if (__isthreaded) 275 _SPINUNLOCK(&slgd->Spinlock); 276 } 277 278 /* 279 * bigalloc hashing and locking support. 280 * 281 * Return an unmasked hash code for the passed pointer. 282 */ 283 static __inline int 284 _bigalloc_hash(void *ptr) 285 { 286 int hv; 287 288 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 289 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 290 291 return(hv); 292 } 293 294 /* 295 * Lock the hash chain and return a pointer to its base for the specified 296 * address. 297 */ 298 static __inline bigalloc_t * 299 bigalloc_lock(void *ptr) 300 { 301 int hv = _bigalloc_hash(ptr); 302 bigalloc_t *bigp; 303 304 bigp = &bigalloc_array[hv & BIGHMASK]; 305 if (__isthreaded) 306 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 307 return(bigp); 308 } 309 310 /* 311 * Lock the hash chain and return a pointer to its base for the specified 312 * address. 313 * 314 * BUT, if the hash chain is empty, just return NULL and do not bother 315 * to lock anything. 316 */ 317 static __inline bigalloc_t * 318 bigalloc_check_and_lock(void *ptr) 319 { 320 int hv = _bigalloc_hash(ptr); 321 bigalloc_t *bigp; 322 323 bigp = &bigalloc_array[hv & BIGHMASK]; 324 if (*bigp == NULL) 325 return(NULL); 326 if (__isthreaded) { 327 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 328 } 329 return(bigp); 330 } 331 332 static __inline void 333 bigalloc_unlock(void *ptr) 334 { 335 int hv; 336 337 if (__isthreaded) { 338 hv = _bigalloc_hash(ptr); 339 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 340 } 341 } 342 343 /* 344 * Calculate the zone index for the allocation request size and set the 345 * allocation request size to that particular zone's chunk size. 346 */ 347 static __inline int 348 zoneindex(size_t *bytes, size_t *chunking) 349 { 350 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 351 if (n < 128) { 352 *bytes = n = (n + 7) & ~7; 353 *chunking = 8; 354 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 355 } 356 if (n < 256) { 357 *bytes = n = (n + 15) & ~15; 358 *chunking = 16; 359 return(n / 16 + 7); 360 } 361 if (n < 8192) { 362 if (n < 512) { 363 *bytes = n = (n + 31) & ~31; 364 *chunking = 32; 365 return(n / 32 + 15); 366 } 367 if (n < 1024) { 368 *bytes = n = (n + 63) & ~63; 369 *chunking = 64; 370 return(n / 64 + 23); 371 } 372 if (n < 2048) { 373 *bytes = n = (n + 127) & ~127; 374 *chunking = 128; 375 return(n / 128 + 31); 376 } 377 if (n < 4096) { 378 *bytes = n = (n + 255) & ~255; 379 *chunking = 256; 380 return(n / 256 + 39); 381 } 382 *bytes = n = (n + 511) & ~511; 383 *chunking = 512; 384 return(n / 512 + 47); 385 } 386 #if ZALLOC_ZONE_LIMIT > 8192 387 if (n < 16384) { 388 *bytes = n = (n + 1023) & ~1023; 389 *chunking = 1024; 390 return(n / 1024 + 55); 391 } 392 #endif 393 #if ZALLOC_ZONE_LIMIT > 16384 394 if (n < 32768) { 395 *bytes = n = (n + 2047) & ~2047; 396 *chunking = 2048; 397 return(n / 2048 + 63); 398 } 399 #endif 400 _mpanic("Unexpected byte count %d", n); 401 return(0); 402 } 403 404 /* 405 * malloc() - call internal slab allocator 406 */ 407 void * 408 malloc(size_t size) 409 { 410 void *ptr; 411 412 ptr = _slaballoc(size, 0); 413 if (ptr == NULL) 414 errno = ENOMEM; 415 return(ptr); 416 } 417 418 /* 419 * calloc() - call internal slab allocator 420 */ 421 void * 422 calloc(size_t number, size_t size) 423 { 424 void *ptr; 425 426 ptr = _slaballoc(number * size, SAFLAG_ZERO); 427 if (ptr == NULL) 428 errno = ENOMEM; 429 return(ptr); 430 } 431 432 /* 433 * realloc() (SLAB ALLOCATOR) 434 * 435 * We do not attempt to optimize this routine beyond reusing the same 436 * pointer if the new size fits within the chunking of the old pointer's 437 * zone. 438 */ 439 void * 440 realloc(void *ptr, size_t size) 441 { 442 ptr = _slabrealloc(ptr, size); 443 if (ptr == NULL) 444 errno = ENOMEM; 445 return(ptr); 446 } 447 448 /* 449 * posix_memalign() 450 * 451 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 452 * is a power of 2 >= sizeof(void *). 453 * 454 * The slab allocator will allocate on power-of-2 boundaries up to 455 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 456 * zone matching the requirements, and _vmem_alloc() otherwise. 457 */ 458 int 459 posix_memalign(void **memptr, size_t alignment, size_t size) 460 { 461 bigalloc_t *bigp; 462 bigalloc_t big; 463 size_t chunking; 464 int zi; 465 466 /* 467 * OpenGroup spec issue 6 checks 468 */ 469 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 470 *memptr = NULL; 471 return(EINVAL); 472 } 473 if (alignment < sizeof(void *)) { 474 *memptr = NULL; 475 return(EINVAL); 476 } 477 478 /* 479 * Our zone mechanism guarantees same-sized alignment for any 480 * power-of-2 allocation. If size is a power-of-2 and reasonable 481 * we can just call _slaballoc() and be done. We round size up 482 * to the nearest alignment boundary to improve our odds of 483 * it becoming a power-of-2 if it wasn't before. 484 */ 485 if (size <= alignment) 486 size = alignment; 487 else 488 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 489 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 490 *memptr = _slaballoc(size, 0); 491 return(*memptr ? 0 : ENOMEM); 492 } 493 494 /* 495 * Otherwise locate a zone with a chunking that matches 496 * the requested alignment, within reason. Consider two cases: 497 * 498 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 499 * we find will be the best fit because the chunking will be 500 * greater or equal to the alignment. 501 * 502 * (2) A 513 allocation on a 256-byte alignment. In this case 503 * the first zoneindex we find will be for 576 byte allocations 504 * with a chunking of 64, which is not sufficient. To fix this 505 * we simply find the nearest power-of-2 >= size and use the 506 * same side-effect of _slaballoc() which guarantees 507 * same-alignment on a power-of-2 allocation. 508 */ 509 if (size < PAGE_SIZE) { 510 zi = zoneindex(&size, &chunking); 511 if (chunking >= alignment) { 512 *memptr = _slaballoc(size, 0); 513 return(*memptr ? 0 : ENOMEM); 514 } 515 if (size >= 1024) 516 alignment = 1024; 517 if (size >= 16384) 518 alignment = 16384; 519 while (alignment < size) 520 alignment <<= 1; 521 *memptr = _slaballoc(alignment, 0); 522 return(*memptr ? 0 : ENOMEM); 523 } 524 525 /* 526 * If the slab allocator cannot handle it use vmem_alloc(). 527 * 528 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 529 */ 530 if (alignment < PAGE_SIZE) 531 alignment = PAGE_SIZE; 532 if (size < alignment) 533 size = alignment; 534 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 535 *memptr = _vmem_alloc(size, alignment, 0); 536 if (*memptr == NULL) 537 return(ENOMEM); 538 539 big = _slaballoc(sizeof(struct bigalloc), 0); 540 if (big == NULL) { 541 _vmem_free(*memptr, size); 542 *memptr = NULL; 543 return(ENOMEM); 544 } 545 bigp = bigalloc_lock(*memptr); 546 big->base = *memptr; 547 big->bytes = size; 548 big->unused01 = 0; 549 big->next = *bigp; 550 *bigp = big; 551 bigalloc_unlock(*memptr); 552 553 return(0); 554 } 555 556 /* 557 * free() (SLAB ALLOCATOR) - do the obvious 558 */ 559 void 560 free(void *ptr) 561 { 562 _slabfree(ptr); 563 } 564 565 /* 566 * _slaballoc() (SLAB ALLOCATOR) 567 * 568 * Allocate memory via the slab allocator. If the request is too large, 569 * or if it page-aligned beyond a certain size, we fall back to the 570 * KMEM subsystem 571 */ 572 static void * 573 _slaballoc(size_t size, int flags) 574 { 575 slzone_t z; 576 slchunk_t chunk; 577 slglobaldata_t slgd; 578 size_t chunking; 579 int zi; 580 #ifdef INVARIANTS 581 int i; 582 #endif 583 int off; 584 585 /* 586 * Handle the degenerate size == 0 case. Yes, this does happen. 587 * Return a special pointer. This is to maintain compatibility with 588 * the original malloc implementation. Certain devices, such as the 589 * adaptec driver, not only allocate 0 bytes, they check for NULL and 590 * also realloc() later on. Joy. 591 */ 592 if (size == 0) 593 return(ZERO_LENGTH_PTR); 594 595 /* 596 * Handle large allocations directly. There should not be very many 597 * of these so performance is not a big issue. 598 * 599 * The backend allocator is pretty nasty on a SMP system. Use the 600 * slab allocator for one and two page-sized chunks even though we 601 * lose some efficiency. 602 */ 603 if (size >= ZoneLimit || 604 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 605 bigalloc_t big; 606 bigalloc_t *bigp; 607 608 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 609 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 610 if (chunk == NULL) 611 return(NULL); 612 613 big = _slaballoc(sizeof(struct bigalloc), 0); 614 if (big == NULL) { 615 _vmem_free(chunk, size); 616 return(NULL); 617 } 618 bigp = bigalloc_lock(chunk); 619 big->base = chunk; 620 big->bytes = size; 621 big->unused01 = 0; 622 big->next = *bigp; 623 *bigp = big; 624 bigalloc_unlock(chunk); 625 626 return(chunk); 627 } 628 629 /* 630 * Multi-threading support. This needs work XXX. 631 * 632 * Choose a globaldata structure to allocate from. If we cannot 633 * immediately get the lock try a different one. 634 * 635 * LastSLGD is a per-thread global. 636 */ 637 slgd = LastSLGD; 638 if (slgd_trylock(slgd) != 0) { 639 if (++slgd == &SLGlobalData[SLGD_MAX]) 640 slgd = &SLGlobalData[0]; 641 LastSLGD = slgd; 642 slgd_lock(slgd); 643 } 644 645 /* 646 * Attempt to allocate out of an existing zone. If all zones are 647 * exhausted pull one off the free list or allocate a new one. 648 * 649 * Note: zoneindex() will panic of size is too large. 650 */ 651 zi = zoneindex(&size, &chunking); 652 MASSERT(zi < NZONES); 653 654 if ((z = slgd->ZoneAry[zi]) == NULL) { 655 /* 656 * Pull the zone off the free list. If the zone on 657 * the free list happens to be correctly set up we 658 * do not have to reinitialize it. 659 */ 660 if ((z = slgd->FreeZones) != NULL) { 661 slgd->FreeZones = z->z_Next; 662 --slgd->NFreeZones; 663 if (z->z_ChunkSize == size) { 664 z->z_Magic = ZALLOC_SLAB_MAGIC; 665 z->z_Next = slgd->ZoneAry[zi]; 666 slgd->ZoneAry[zi] = z; 667 goto have_zone; 668 } 669 bzero(z, sizeof(struct slzone)); 670 z->z_Flags |= SLZF_UNOTZEROD; 671 } else { 672 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 673 if (z == NULL) 674 goto fail; 675 } 676 677 /* 678 * How big is the base structure? 679 */ 680 #if defined(INVARIANTS) 681 /* 682 * Make room for z_Bitmap. An exact calculation is 683 * somewhat more complicated so don't make an exact 684 * calculation. 685 */ 686 off = offsetof(struct slzone, 687 z_Bitmap[(ZoneSize / size + 31) / 32]); 688 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 689 #else 690 off = sizeof(struct slzone); 691 #endif 692 693 /* 694 * Align the storage in the zone based on the chunking. 695 * 696 * Guarentee power-of-2 alignment for power-of-2-sized 697 * chunks. Otherwise align based on the chunking size 698 * (typically 8 or 16 bytes for small allocations). 699 * 700 * NOTE: Allocations >= ZoneLimit are governed by the 701 * bigalloc code and typically only guarantee page-alignment. 702 * 703 * Set initial conditions for UIndex near the zone header 704 * to reduce unecessary page faults, vs semi-randomization 705 * to improve L1 cache saturation. 706 */ 707 if ((size | (size - 1)) + 1 == (size << 1)) 708 off = (off + size - 1) & ~(size - 1); 709 else 710 off = (off + chunking - 1) & ~(chunking - 1); 711 z->z_Magic = ZALLOC_SLAB_MAGIC; 712 z->z_GlobalData = slgd; 713 z->z_ZoneIndex = zi; 714 z->z_NMax = (ZoneSize - off) / size; 715 z->z_NFree = z->z_NMax; 716 z->z_BasePtr = (char *)z + off; 717 /*z->z_UIndex = z->z_UEndIndex = slgd->JunkIndex % z->z_NMax;*/ 718 z->z_UIndex = z->z_UEndIndex = 0; 719 z->z_ChunkSize = size; 720 z->z_FirstFreePg = ZonePageCount; 721 z->z_Next = slgd->ZoneAry[zi]; 722 slgd->ZoneAry[zi] = z; 723 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 724 flags &= ~SAFLAG_ZERO; /* already zero'd */ 725 flags |= SAFLAG_PASSIVE; 726 } 727 728 /* 729 * Slide the base index for initial allocations out of the 730 * next zone we create so we do not over-weight the lower 731 * part of the cpu memory caches. 732 */ 733 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 734 & (ZALLOC_MAX_ZONE_SIZE - 1); 735 } 736 737 /* 738 * Ok, we have a zone from which at least one chunk is available. 739 * 740 * Remove us from the ZoneAry[] when we become empty 741 */ 742 have_zone: 743 MASSERT(z->z_NFree > 0); 744 745 if (--z->z_NFree == 0) { 746 slgd->ZoneAry[zi] = z->z_Next; 747 z->z_Next = NULL; 748 } 749 750 /* 751 * Locate a chunk in a free page. This attempts to localize 752 * reallocations into earlier pages without us having to sort 753 * the chunk list. A chunk may still overlap a page boundary. 754 */ 755 while (z->z_FirstFreePg < ZonePageCount) { 756 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 757 #ifdef DIAGNOSTIC 758 /* 759 * Diagnostic: c_Next is not total garbage. 760 */ 761 MASSERT(chunk->c_Next == NULL || 762 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 763 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 764 #endif 765 #ifdef INVARIANTS 766 chunk_mark_allocated(z, chunk); 767 #endif 768 MASSERT((uintptr_t)chunk & ZoneMask); 769 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 770 goto done; 771 } 772 ++z->z_FirstFreePg; 773 } 774 775 /* 776 * No chunks are available but NFree said we had some memory, 777 * so it must be available in the never-before-used-memory 778 * area governed by UIndex. The consequences are very 779 * serious if our zone got corrupted so we use an explicit 780 * panic rather then a KASSERT. 781 */ 782 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 783 784 if (++z->z_UIndex == z->z_NMax) 785 z->z_UIndex = 0; 786 if (z->z_UIndex == z->z_UEndIndex) { 787 if (z->z_NFree != 0) 788 _mpanic("slaballoc: corrupted zone"); 789 } 790 791 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 792 flags &= ~SAFLAG_ZERO; 793 flags |= SAFLAG_PASSIVE; 794 } 795 #if defined(INVARIANTS) 796 chunk_mark_allocated(z, chunk); 797 #endif 798 799 done: 800 slgd_unlock(slgd); 801 if (flags & SAFLAG_ZERO) { 802 bzero(chunk, size); 803 #ifdef INVARIANTS 804 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 805 if (use_malloc_pattern) { 806 for (i = 0; i < size; i += sizeof(int)) { 807 *(int *)((char *)chunk + i) = -1; 808 } 809 } 810 /* avoid accidental double-free check */ 811 chunk->c_Next = (void *)-1; 812 #endif 813 } 814 return(chunk); 815 fail: 816 slgd_unlock(slgd); 817 return(NULL); 818 } 819 820 /* 821 * Reallocate memory within the chunk 822 */ 823 static void * 824 _slabrealloc(void *ptr, size_t size) 825 { 826 bigalloc_t *bigp; 827 void *nptr; 828 slzone_t z; 829 size_t chunking; 830 831 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) 832 return(_slaballoc(size, 0)); 833 834 if (size == 0) { 835 free(ptr); 836 return(ZERO_LENGTH_PTR); 837 } 838 839 /* 840 * Handle oversized allocations. XXX we really should require 841 * that a size be passed to free() instead of this nonsense. 842 */ 843 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 844 bigalloc_t big; 845 size_t bigbytes; 846 847 while ((big = *bigp) != NULL) { 848 if (big->base == ptr) { 849 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 850 bigbytes = big->bytes; 851 bigalloc_unlock(ptr); 852 if (bigbytes == size) 853 return(ptr); 854 if ((nptr = _slaballoc(size, 0)) == NULL) 855 return(NULL); 856 if (size > bigbytes) 857 size = bigbytes; 858 bcopy(ptr, nptr, size); 859 _slabfree(ptr); 860 return(nptr); 861 } 862 bigp = &big->next; 863 } 864 bigalloc_unlock(ptr); 865 } 866 867 /* 868 * Get the original allocation's zone. If the new request winds 869 * up using the same chunk size we do not have to do anything. 870 * 871 * NOTE: We don't have to lock the globaldata here, the fields we 872 * access here will not change at least as long as we have control 873 * over the allocation. 874 */ 875 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 876 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 877 878 /* 879 * Use zoneindex() to chunk-align the new size, as long as the 880 * new size is not too large. 881 */ 882 if (size < ZoneLimit) { 883 zoneindex(&size, &chunking); 884 if (z->z_ChunkSize == size) 885 return(ptr); 886 } 887 888 /* 889 * Allocate memory for the new request size and copy as appropriate. 890 */ 891 if ((nptr = _slaballoc(size, 0)) != NULL) { 892 if (size > z->z_ChunkSize) 893 size = z->z_ChunkSize; 894 bcopy(ptr, nptr, size); 895 _slabfree(ptr); 896 } 897 898 return(nptr); 899 } 900 901 /* 902 * free (SLAB ALLOCATOR) 903 * 904 * Free a memory block previously allocated by malloc. Note that we do not 905 * attempt to uplodate ks_loosememuse as MP races could prevent us from 906 * checking memory limits in malloc. 907 * 908 * MPSAFE 909 */ 910 static void 911 _slabfree(void *ptr) 912 { 913 slzone_t z; 914 slchunk_t chunk; 915 bigalloc_t big; 916 bigalloc_t *bigp; 917 slglobaldata_t slgd; 918 size_t size; 919 int pgno; 920 921 /* 922 * Handle NULL frees and special 0-byte allocations 923 */ 924 if (ptr == NULL) 925 return; 926 if (ptr == ZERO_LENGTH_PTR) 927 return; 928 929 /* 930 * Handle oversized allocations. 931 */ 932 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 933 while ((big = *bigp) != NULL) { 934 if (big->base == ptr) { 935 *bigp = big->next; 936 bigalloc_unlock(ptr); 937 size = big->bytes; 938 _slabfree(big); 939 #ifdef INVARIANTS 940 MASSERT(sizeof(weirdary) <= size); 941 bcopy(weirdary, ptr, sizeof(weirdary)); 942 #endif 943 _vmem_free(ptr, size); 944 return; 945 } 946 bigp = &big->next; 947 } 948 bigalloc_unlock(ptr); 949 } 950 951 /* 952 * Zone case. Figure out the zone based on the fact that it is 953 * ZoneSize aligned. 954 */ 955 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 956 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 957 958 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 959 chunk = ptr; 960 slgd = z->z_GlobalData; 961 slgd_lock(slgd); 962 963 #ifdef INVARIANTS 964 /* 965 * Attempt to detect a double-free. To reduce overhead we only check 966 * if there appears to be link pointer at the base of the data. 967 */ 968 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 969 slchunk_t scan; 970 971 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 972 if (scan == chunk) 973 _mpanic("Double free at %p", chunk); 974 } 975 } 976 chunk_mark_free(z, chunk); 977 #endif 978 979 /* 980 * Put weird data into the memory to detect modifications after 981 * freeing, illegal pointer use after freeing (we should fault on 982 * the odd address), and so forth. 983 */ 984 #ifdef INVARIANTS 985 if (z->z_ChunkSize < sizeof(weirdary)) 986 bcopy(weirdary, chunk, z->z_ChunkSize); 987 else 988 bcopy(weirdary, chunk, sizeof(weirdary)); 989 #endif 990 991 /* 992 * Add this free non-zero'd chunk to a linked list for reuse, adjust 993 * z_FirstFreePg. 994 */ 995 chunk->c_Next = z->z_PageAry[pgno]; 996 z->z_PageAry[pgno] = chunk; 997 if (z->z_FirstFreePg > pgno) 998 z->z_FirstFreePg = pgno; 999 1000 /* 1001 * Bump the number of free chunks. If it becomes non-zero the zone 1002 * must be added back onto the appropriate list. 1003 */ 1004 if (z->z_NFree++ == 0) { 1005 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1006 slgd->ZoneAry[z->z_ZoneIndex] = z; 1007 } 1008 1009 /* 1010 * If the zone becomes totally free then move this zone to 1011 * the FreeZones list. 1012 * 1013 * Do not madvise here, avoiding the edge case where a malloc/free 1014 * loop is sitting on the edge of a new zone. 1015 * 1016 * We could leave at least one zone in the ZoneAry for the index, 1017 * using something like the below, but while this might be fine 1018 * for the kernel (who cares about ~10MB of wasted memory), it 1019 * probably isn't such a good idea for a user program. 1020 * 1021 * && (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) 1022 */ 1023 if (z->z_NFree == z->z_NMax) { 1024 slzone_t *pz; 1025 1026 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1027 while (z != *pz) 1028 pz = &(*pz)->z_Next; 1029 *pz = z->z_Next; 1030 z->z_Magic = -1; 1031 z->z_Next = slgd->FreeZones; 1032 slgd->FreeZones = z; 1033 ++slgd->NFreeZones; 1034 } 1035 1036 /* 1037 * Limit the number of zones we keep cached. 1038 */ 1039 while (slgd->NFreeZones > ZONE_RELS_THRESH) { 1040 z = slgd->FreeZones; 1041 slgd->FreeZones = z->z_Next; 1042 --slgd->NFreeZones; 1043 slgd_unlock(slgd); 1044 _vmem_free(z, ZoneSize); 1045 slgd_lock(slgd); 1046 } 1047 slgd_unlock(slgd); 1048 } 1049 1050 #if defined(INVARIANTS) 1051 /* 1052 * Helper routines for sanity checks 1053 */ 1054 static 1055 void 1056 chunk_mark_allocated(slzone_t z, void *chunk) 1057 { 1058 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1059 __uint32_t *bitptr; 1060 1061 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1062 bitptr = &z->z_Bitmap[bitdex >> 5]; 1063 bitdex &= 31; 1064 MASSERT((*bitptr & (1 << bitdex)) == 0); 1065 *bitptr |= 1 << bitdex; 1066 } 1067 1068 static 1069 void 1070 chunk_mark_free(slzone_t z, void *chunk) 1071 { 1072 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1073 __uint32_t *bitptr; 1074 1075 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1076 bitptr = &z->z_Bitmap[bitdex >> 5]; 1077 bitdex &= 31; 1078 MASSERT((*bitptr & (1 << bitdex)) != 0); 1079 *bitptr &= ~(1 << bitdex); 1080 } 1081 1082 #endif 1083 1084 /* 1085 * _vmem_alloc() 1086 * 1087 * Directly map memory in PAGE_SIZE'd chunks with the specified 1088 * alignment. 1089 * 1090 * Alignment must be a multiple of PAGE_SIZE. 1091 * 1092 * Size must be >= alignment. 1093 */ 1094 static void * 1095 _vmem_alloc(size_t size, size_t align, int flags) 1096 { 1097 char *addr; 1098 char *save; 1099 size_t excess; 1100 1101 /* 1102 * Map anonymous private memory. 1103 */ 1104 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1105 MAP_PRIVATE|MAP_ANON, -1, 0); 1106 if (addr == MAP_FAILED) 1107 return(NULL); 1108 1109 /* 1110 * Check alignment. The misaligned offset is also the excess 1111 * amount. If misaligned unmap the excess so we have a chance of 1112 * mapping at the next alignment point and recursively try again. 1113 * 1114 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1115 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1116 * xxxxxxxxx final excess calculation 1117 * ^ returned address 1118 */ 1119 excess = (uintptr_t)addr & (align - 1); 1120 1121 if (excess) { 1122 excess = align - excess; 1123 save = addr; 1124 1125 munmap(save + excess, size - excess); 1126 addr = _vmem_alloc(size, align, flags); 1127 munmap(save, excess); 1128 } 1129 return((void *)addr); 1130 } 1131 1132 /* 1133 * _vmem_free() 1134 * 1135 * Free a chunk of memory allocated with _vmem_alloc() 1136 */ 1137 static void 1138 _vmem_free(void *ptr, size_t size) 1139 { 1140 munmap(ptr, size); 1141 } 1142 1143 /* 1144 * Panic on fatal conditions 1145 */ 1146 static void 1147 _mpanic(const char *ctl, ...) 1148 { 1149 va_list va; 1150 1151 if (malloc_panic == 0) { 1152 malloc_panic = 1; 1153 va_start(va, ctl); 1154 vfprintf(stderr, ctl, va); 1155 fprintf(stderr, "\n"); 1156 fflush(stderr); 1157 va_end(va); 1158 } 1159 abort(); 1160 } 1161