1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a slab allocator drop-in replacement for the 38 * libc malloc(). 39 * 40 * A slab allocator reserves a ZONE for each chunk size, then lays the 41 * chunks out in an array within the zone. Allocation and deallocation 42 * is nearly instantanious, and overhead losses are limited to a fixed 43 * worst-case amount. 44 * 45 * The slab allocator does not have to pre-initialize the list of 46 * free chunks for each zone, and the underlying VM will not be 47 * touched at all beyond the zone header until an actual allocation 48 * needs it. 49 * 50 * Slab management and locking is done on a per-zone basis. 51 * 52 * Alloc Size Chunking Number of zones 53 * 0-127 8 16 54 * 128-255 16 8 55 * 256-511 32 8 56 * 512-1023 64 8 57 * 1024-2047 128 8 58 * 2048-4095 256 8 59 * 4096-8191 512 8 60 * 8192-16383 1024 8 61 * 16384-32767 2048 8 62 * 63 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 64 * is used to locate for free. One and Two-page allocations use the 65 * zone mechanic to avoid excessive mmap()/munmap() calls. 66 * 67 * API FEATURES AND SIDE EFFECTS 68 * 69 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 70 * Above that power-of-2 sized allocations are page-aligned. Non 71 * power-of-2 sized allocations are aligned the same as the chunk 72 * size for their zone. 73 * + malloc(0) returns a special non-NULL value 74 * + ability to allocate arbitrarily large chunks of memory 75 * + realloc will reuse the passed pointer if possible, within the 76 * limitations of the zone chunking. 77 */ 78 79 #include "libc_private.h" 80 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/mman.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <stdarg.h> 87 #include <stddef.h> 88 #include <unistd.h> 89 #include <string.h> 90 #include <fcntl.h> 91 #include <errno.h> 92 93 #include "spinlock.h" 94 #include "un-namespace.h" 95 96 /* 97 * Linked list of large allocations 98 */ 99 typedef struct bigalloc { 100 struct bigalloc *next; /* hash link */ 101 void *base; /* base pointer */ 102 u_long bytes; /* bytes allocated */ 103 u_long unused01; 104 } *bigalloc_t; 105 106 /* 107 * Note that any allocations which are exact multiples of PAGE_SIZE, or 108 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 109 */ 110 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 111 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 112 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 113 #define ZALLOC_ZONE_SIZE (64 * 1024) 114 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 115 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 116 117 #if ZALLOC_ZONE_LIMIT == 16384 118 #define NZONES 72 119 #elif ZALLOC_ZONE_LIMIT == 32768 120 #define NZONES 80 121 #else 122 #error "I couldn't figure out NZONES" 123 #endif 124 125 /* 126 * Chunk structure for free elements 127 */ 128 typedef struct slchunk { 129 struct slchunk *c_Next; 130 } *slchunk_t; 131 132 /* 133 * The IN-BAND zone header is placed at the beginning of each zone. 134 */ 135 struct slglobaldata; 136 137 typedef struct slzone { 138 __int32_t z_Magic; /* magic number for sanity check */ 139 int z_NFree; /* total free chunks / ualloc space */ 140 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 141 struct slglobaldata *z_GlobalData; 142 int z_NMax; /* maximum free chunks */ 143 char *z_BasePtr; /* pointer to start of chunk array */ 144 int z_UIndex; /* current initial allocation index */ 145 int z_UEndIndex; /* last (first) allocation index */ 146 int z_ChunkSize; /* chunk size for validation */ 147 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 148 int z_ZoneIndex; 149 int z_Flags; 150 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 151 #if defined(INVARIANTS) 152 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 153 #endif 154 } *slzone_t; 155 156 typedef struct slglobaldata { 157 spinlock_t Spinlock; 158 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 159 slzone_t FreeZones; /* whole zones that have become free */ 160 int NFreeZones; /* free zone count */ 161 int JunkIndex; 162 } *slglobaldata_t; 163 164 #define SLZF_UNOTZEROD 0x0001 165 166 /* 167 * Misc constants. Note that allocations that are exact multiples of 168 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 169 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 170 */ 171 #define MIN_CHUNK_SIZE 8 /* in bytes */ 172 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 173 #define ZONE_RELS_THRESH 4 /* threshold number of zones */ 174 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 175 176 /* 177 * The WEIRD_ADDR is used as known text to copy into free objects to 178 * try to create deterministic failure cases if the data is accessed after 179 * free. 180 * 181 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 182 * not be larger then 64. 183 */ 184 #define WEIRD_ADDR 0xdeadc0de 185 #define MAX_COPY sizeof(weirdary) 186 #define ZERO_LENGTH_PTR ((void *)-8) 187 188 #define BIGHSHIFT 10 /* bigalloc hash table */ 189 #define BIGHSIZE (1 << BIGHSHIFT) 190 #define BIGHMASK (BIGHSIZE - 1) 191 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 192 #define BIGXMASK (BIGXSIZE - 1) 193 194 #define SLGD_MAX 4 /* parallel allocations */ 195 196 #define SAFLAG_ZERO 0x0001 197 #define SAFLAG_PASSIVE 0x0002 198 199 /* 200 * Thread control 201 */ 202 203 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 204 205 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 206 _mpanic("assertion: %s in %s", \ 207 #exp, __func__); \ 208 } while (0) 209 210 /* 211 * Fixed globals (not per-cpu) 212 */ 213 static const int ZoneSize = ZALLOC_ZONE_SIZE; 214 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 215 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 216 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 217 218 static struct slglobaldata SLGlobalData[SLGD_MAX]; 219 static bigalloc_t bigalloc_array[BIGHSIZE]; 220 static spinlock_t bigspin_array[BIGXSIZE]; 221 static int malloc_panic; 222 223 static const int32_t weirdary[16] = { 224 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 225 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 226 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 227 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 228 }; 229 230 static __thread slglobaldata_t LastSLGD = &SLGlobalData[0]; 231 232 static void *_slaballoc(size_t size, int flags); 233 static void *_slabrealloc(void *ptr, size_t size); 234 static void _slabfree(void *ptr); 235 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 236 static void _vmem_free(void *ptr, size_t bytes); 237 static void _mpanic(const char *ctl, ...); 238 #if defined(INVARIANTS) 239 static void chunk_mark_allocated(slzone_t z, void *chunk); 240 static void chunk_mark_free(slzone_t z, void *chunk); 241 #endif 242 243 #ifdef INVARIANTS 244 /* 245 * If enabled any memory allocated without M_ZERO is initialized to -1. 246 */ 247 static int use_malloc_pattern; 248 #endif 249 250 /* 251 * Thread locks. 252 * 253 * NOTE: slgd_trylock() returns 0 or EBUSY 254 */ 255 static __inline void 256 slgd_lock(slglobaldata_t slgd) 257 { 258 if (__isthreaded) 259 _SPINLOCK(&slgd->Spinlock); 260 } 261 262 static __inline int 263 slgd_trylock(slglobaldata_t slgd) 264 { 265 if (__isthreaded) 266 return(_SPINTRYLOCK(&slgd->Spinlock)); 267 return(0); 268 } 269 270 static __inline void 271 slgd_unlock(slglobaldata_t slgd) 272 { 273 if (__isthreaded) 274 _SPINUNLOCK(&slgd->Spinlock); 275 } 276 277 /* 278 * bigalloc hashing and locking support. 279 * 280 * Return an unmasked hash code for the passed pointer. 281 */ 282 static __inline int 283 _bigalloc_hash(void *ptr) 284 { 285 int hv; 286 287 hv = ((int)(intptr_t)ptr >> PAGE_SHIFT) ^ 288 ((int)(intptr_t)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 289 290 return(hv); 291 } 292 293 /* 294 * Lock the hash chain and return a pointer to its base for the specified 295 * address. 296 */ 297 static __inline bigalloc_t * 298 bigalloc_lock(void *ptr) 299 { 300 int hv = _bigalloc_hash(ptr); 301 bigalloc_t *bigp; 302 303 bigp = &bigalloc_array[hv & BIGHMASK]; 304 if (__isthreaded) 305 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 306 return(bigp); 307 } 308 309 /* 310 * Lock the hash chain and return a pointer to its base for the specified 311 * address. 312 * 313 * BUT, if the hash chain is empty, just return NULL and do not bother 314 * to lock anything. 315 */ 316 static __inline bigalloc_t * 317 bigalloc_check_and_lock(void *ptr) 318 { 319 int hv = _bigalloc_hash(ptr); 320 bigalloc_t *bigp; 321 322 bigp = &bigalloc_array[hv & BIGHMASK]; 323 if (*bigp == NULL) 324 return(NULL); 325 if (__isthreaded) { 326 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 327 } 328 return(bigp); 329 } 330 331 static __inline void 332 bigalloc_unlock(void *ptr) 333 { 334 int hv; 335 336 if (__isthreaded) { 337 hv = _bigalloc_hash(ptr); 338 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 339 } 340 } 341 342 /* 343 * Calculate the zone index for the allocation request size and set the 344 * allocation request size to that particular zone's chunk size. 345 */ 346 static __inline int 347 zoneindex(size_t *bytes, size_t *chunking) 348 { 349 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 350 if (n < 128) { 351 *bytes = n = (n + 7) & ~7; 352 *chunking = 8; 353 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 354 } 355 if (n < 256) { 356 *bytes = n = (n + 15) & ~15; 357 *chunking = 16; 358 return(n / 16 + 7); 359 } 360 if (n < 8192) { 361 if (n < 512) { 362 *bytes = n = (n + 31) & ~31; 363 *chunking = 32; 364 return(n / 32 + 15); 365 } 366 if (n < 1024) { 367 *bytes = n = (n + 63) & ~63; 368 *chunking = 64; 369 return(n / 64 + 23); 370 } 371 if (n < 2048) { 372 *bytes = n = (n + 127) & ~127; 373 *chunking = 128; 374 return(n / 128 + 31); 375 } 376 if (n < 4096) { 377 *bytes = n = (n + 255) & ~255; 378 *chunking = 256; 379 return(n / 256 + 39); 380 } 381 *bytes = n = (n + 511) & ~511; 382 *chunking = 512; 383 return(n / 512 + 47); 384 } 385 #if ZALLOC_ZONE_LIMIT > 8192 386 if (n < 16384) { 387 *bytes = n = (n + 1023) & ~1023; 388 *chunking = 1024; 389 return(n / 1024 + 55); 390 } 391 #endif 392 #if ZALLOC_ZONE_LIMIT > 16384 393 if (n < 32768) { 394 *bytes = n = (n + 2047) & ~2047; 395 *chunking = 2048; 396 return(n / 2048 + 63); 397 } 398 #endif 399 _mpanic("Unexpected byte count %d", n); 400 return(0); 401 } 402 403 /* 404 * malloc() - call internal slab allocator 405 */ 406 void * 407 malloc(size_t size) 408 { 409 void *ptr; 410 411 ptr = _slaballoc(size, 0); 412 if (ptr == NULL) 413 errno = ENOMEM; 414 return(ptr); 415 } 416 417 /* 418 * calloc() - call internal slab allocator 419 */ 420 void * 421 calloc(size_t number, size_t size) 422 { 423 void *ptr; 424 425 ptr = _slaballoc(number * size, SAFLAG_ZERO); 426 if (ptr == NULL) 427 errno = ENOMEM; 428 return(ptr); 429 } 430 431 /* 432 * realloc() (SLAB ALLOCATOR) 433 * 434 * We do not attempt to optimize this routine beyond reusing the same 435 * pointer if the new size fits within the chunking of the old pointer's 436 * zone. 437 */ 438 void * 439 realloc(void *ptr, size_t size) 440 { 441 ptr = _slabrealloc(ptr, size); 442 if (ptr == NULL) 443 errno = ENOMEM; 444 return(ptr); 445 } 446 447 /* 448 * posix_memalign() 449 * 450 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 451 * is a power of 2 >= sizeof(void *). 452 * 453 * The slab allocator will allocate on power-of-2 boundaries up to 454 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 455 * zone matching the requirements, and _vmem_alloc() otherwise. 456 */ 457 int 458 posix_memalign(void **memptr, size_t alignment, size_t size) 459 { 460 bigalloc_t *bigp; 461 bigalloc_t big; 462 size_t chunking; 463 int zi; 464 465 /* 466 * OpenGroup spec issue 6 checks 467 */ 468 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 469 *memptr = NULL; 470 return(EINVAL); 471 } 472 if (alignment < sizeof(void *)) { 473 *memptr = NULL; 474 return(EINVAL); 475 } 476 477 /* 478 * Our zone mechanism guarantees same-sized alignment for any 479 * power-of-2 allocation. If size is a power-of-2 and reasonable 480 * we can just call _slaballoc() and be done. We round size up 481 * to the nearest alignment boundary to improve our odds of 482 * it becoming a power-of-2 if it wasn't before. 483 */ 484 if (size <= alignment) 485 size = alignment; 486 else 487 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 488 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 489 *memptr = _slaballoc(size, 0); 490 return(*memptr ? 0 : ENOMEM); 491 } 492 493 /* 494 * Otherwise locate a zone with a chunking that matches 495 * the requested alignment, within reason. Consider two cases: 496 * 497 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 498 * we find will be the best fit because the chunking will be 499 * greater or equal to the alignment. 500 * 501 * (2) A 513 allocation on a 256-byte alignment. In this case 502 * the first zoneindex we find will be for 576 byte allocations 503 * with a chunking of 64, which is not sufficient. To fix this 504 * we simply find the nearest power-of-2 >= size and use the 505 * same side-effect of _slaballoc() which guarantees 506 * same-alignment on a power-of-2 allocation. 507 */ 508 if (size < PAGE_SIZE) { 509 zi = zoneindex(&size, &chunking); 510 if (chunking >= alignment) { 511 *memptr = _slaballoc(size, 0); 512 return(*memptr ? 0 : ENOMEM); 513 } 514 if (size >= 1024) 515 alignment = 1024; 516 if (size >= 16384) 517 alignment = 16384; 518 while (alignment < size) 519 alignment <<= 1; 520 *memptr = _slaballoc(alignment, 0); 521 return(*memptr ? 0 : ENOMEM); 522 } 523 524 /* 525 * If the slab allocator cannot handle it use vmem_alloc(). 526 * 527 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 528 */ 529 if (alignment < PAGE_SIZE) 530 alignment = PAGE_SIZE; 531 if (size < alignment) 532 size = alignment; 533 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 534 *memptr = _vmem_alloc(size, alignment, 0); 535 if (*memptr == NULL) 536 return(ENOMEM); 537 538 big = _slaballoc(sizeof(struct bigalloc), 0); 539 if (big == NULL) { 540 _vmem_free(*memptr, size); 541 *memptr = NULL; 542 return(ENOMEM); 543 } 544 bigp = bigalloc_lock(*memptr); 545 big->base = *memptr; 546 big->bytes = size; 547 big->unused01 = 0; 548 big->next = *bigp; 549 *bigp = big; 550 bigalloc_unlock(*memptr); 551 552 return(0); 553 } 554 555 /* 556 * free() (SLAB ALLOCATOR) - do the obvious 557 */ 558 void 559 free(void *ptr) 560 { 561 _slabfree(ptr); 562 } 563 564 /* 565 * _slaballoc() (SLAB ALLOCATOR) 566 * 567 * Allocate memory via the slab allocator. If the request is too large, 568 * or if it page-aligned beyond a certain size, we fall back to the 569 * KMEM subsystem 570 */ 571 static void * 572 _slaballoc(size_t size, int flags) 573 { 574 slzone_t z; 575 slchunk_t chunk; 576 slglobaldata_t slgd; 577 size_t chunking; 578 int zi; 579 #ifdef INVARIANTS 580 int i; 581 #endif 582 int off; 583 584 /* 585 * Handle the degenerate size == 0 case. Yes, this does happen. 586 * Return a special pointer. This is to maintain compatibility with 587 * the original malloc implementation. Certain devices, such as the 588 * adaptec driver, not only allocate 0 bytes, they check for NULL and 589 * also realloc() later on. Joy. 590 */ 591 if (size == 0) 592 return(ZERO_LENGTH_PTR); 593 594 /* 595 * Handle large allocations directly. There should not be very many 596 * of these so performance is not a big issue. 597 * 598 * The backend allocator is pretty nasty on a SMP system. Use the 599 * slab allocator for one and two page-sized chunks even though we 600 * lose some efficiency. 601 */ 602 if (size >= ZoneLimit || 603 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 604 bigalloc_t big; 605 bigalloc_t *bigp; 606 607 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 608 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 609 if (chunk == NULL) 610 return(NULL); 611 612 big = _slaballoc(sizeof(struct bigalloc), 0); 613 if (big == NULL) { 614 _vmem_free(chunk, size); 615 return(NULL); 616 } 617 bigp = bigalloc_lock(chunk); 618 big->base = chunk; 619 big->bytes = size; 620 big->unused01 = 0; 621 big->next = *bigp; 622 *bigp = big; 623 bigalloc_unlock(chunk); 624 625 return(chunk); 626 } 627 628 /* 629 * Multi-threading support. This needs work XXX. 630 * 631 * Choose a globaldata structure to allocate from. If we cannot 632 * immediately get the lock try a different one. 633 * 634 * LastSLGD is a per-thread global. 635 */ 636 slgd = LastSLGD; 637 if (slgd_trylock(slgd) != 0) { 638 if (++slgd == &SLGlobalData[SLGD_MAX]) 639 slgd = &SLGlobalData[0]; 640 LastSLGD = slgd; 641 slgd_lock(slgd); 642 } 643 644 /* 645 * Attempt to allocate out of an existing zone. If all zones are 646 * exhausted pull one off the free list or allocate a new one. 647 * 648 * Note: zoneindex() will panic of size is too large. 649 */ 650 zi = zoneindex(&size, &chunking); 651 MASSERT(zi < NZONES); 652 653 if ((z = slgd->ZoneAry[zi]) == NULL) { 654 /* 655 * Pull the zone off the free list. If the zone on 656 * the free list happens to be correctly set up we 657 * do not have to reinitialize it. 658 */ 659 if ((z = slgd->FreeZones) != NULL) { 660 slgd->FreeZones = z->z_Next; 661 --slgd->NFreeZones; 662 if (z->z_ChunkSize == size) { 663 z->z_Magic = ZALLOC_SLAB_MAGIC; 664 z->z_Next = slgd->ZoneAry[zi]; 665 slgd->ZoneAry[zi] = z; 666 goto have_zone; 667 } 668 bzero(z, sizeof(struct slzone)); 669 z->z_Flags |= SLZF_UNOTZEROD; 670 } else { 671 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 672 if (z == NULL) 673 goto fail; 674 } 675 676 /* 677 * How big is the base structure? 678 */ 679 #if defined(INVARIANTS) 680 /* 681 * Make room for z_Bitmap. An exact calculation is 682 * somewhat more complicated so don't make an exact 683 * calculation. 684 */ 685 off = offsetof(struct slzone, 686 z_Bitmap[(ZoneSize / size + 31) / 32]); 687 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 688 #else 689 off = sizeof(struct slzone); 690 #endif 691 692 /* 693 * Align the storage in the zone based on the chunking. 694 * 695 * Guarentee power-of-2 alignment for power-of-2-sized 696 * chunks. Otherwise align based on the chunking size 697 * (typically 8 or 16 bytes for small allocations). 698 * 699 * NOTE: Allocations >= ZoneLimit are governed by the 700 * bigalloc code and typically only guarantee page-alignment. 701 * 702 * Set initial conditions for UIndex near the zone header 703 * to reduce unecessary page faults, vs semi-randomization 704 * to improve L1 cache saturation. 705 */ 706 if ((size | (size - 1)) + 1 == (size << 1)) 707 off = (off + size - 1) & ~(size - 1); 708 else 709 off = (off + chunking - 1) & ~(chunking - 1); 710 z->z_Magic = ZALLOC_SLAB_MAGIC; 711 z->z_GlobalData = slgd; 712 z->z_ZoneIndex = zi; 713 z->z_NMax = (ZoneSize - off) / size; 714 z->z_NFree = z->z_NMax; 715 z->z_BasePtr = (char *)z + off; 716 /*z->z_UIndex = z->z_UEndIndex = slgd->JunkIndex % z->z_NMax;*/ 717 z->z_UIndex = z->z_UEndIndex = 0; 718 z->z_ChunkSize = size; 719 z->z_FirstFreePg = ZonePageCount; 720 z->z_Next = slgd->ZoneAry[zi]; 721 slgd->ZoneAry[zi] = z; 722 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 723 flags &= ~SAFLAG_ZERO; /* already zero'd */ 724 flags |= SAFLAG_PASSIVE; 725 } 726 727 /* 728 * Slide the base index for initial allocations out of the 729 * next zone we create so we do not over-weight the lower 730 * part of the cpu memory caches. 731 */ 732 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 733 & (ZALLOC_MAX_ZONE_SIZE - 1); 734 } 735 736 /* 737 * Ok, we have a zone from which at least one chunk is available. 738 * 739 * Remove us from the ZoneAry[] when we become empty 740 */ 741 have_zone: 742 MASSERT(z->z_NFree > 0); 743 744 if (--z->z_NFree == 0) { 745 slgd->ZoneAry[zi] = z->z_Next; 746 z->z_Next = NULL; 747 } 748 749 /* 750 * Locate a chunk in a free page. This attempts to localize 751 * reallocations into earlier pages without us having to sort 752 * the chunk list. A chunk may still overlap a page boundary. 753 */ 754 while (z->z_FirstFreePg < ZonePageCount) { 755 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 756 #ifdef DIAGNOSTIC 757 /* 758 * Diagnostic: c_Next is not total garbage. 759 */ 760 MASSERT(chunk->c_Next == NULL || 761 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 762 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 763 #endif 764 #ifdef INVARIANTS 765 chunk_mark_allocated(z, chunk); 766 #endif 767 MASSERT((uintptr_t)chunk & ZoneMask); 768 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 769 goto done; 770 } 771 ++z->z_FirstFreePg; 772 } 773 774 /* 775 * No chunks are available but NFree said we had some memory, 776 * so it must be available in the never-before-used-memory 777 * area governed by UIndex. The consequences are very 778 * serious if our zone got corrupted so we use an explicit 779 * panic rather then a KASSERT. 780 */ 781 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 782 783 if (++z->z_UIndex == z->z_NMax) 784 z->z_UIndex = 0; 785 if (z->z_UIndex == z->z_UEndIndex) { 786 if (z->z_NFree != 0) 787 _mpanic("slaballoc: corrupted zone"); 788 } 789 790 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 791 flags &= ~SAFLAG_ZERO; 792 flags |= SAFLAG_PASSIVE; 793 } 794 #if defined(INVARIANTS) 795 chunk_mark_allocated(z, chunk); 796 #endif 797 798 done: 799 slgd_unlock(slgd); 800 if (flags & SAFLAG_ZERO) { 801 bzero(chunk, size); 802 #ifdef INVARIANTS 803 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 804 if (use_malloc_pattern) { 805 for (i = 0; i < size; i += sizeof(int)) { 806 *(int *)((char *)chunk + i) = -1; 807 } 808 } 809 /* avoid accidental double-free check */ 810 chunk->c_Next = (void *)-1; 811 #endif 812 } 813 return(chunk); 814 fail: 815 slgd_unlock(slgd); 816 return(NULL); 817 } 818 819 /* 820 * Reallocate memory within the chunk 821 */ 822 static void * 823 _slabrealloc(void *ptr, size_t size) 824 { 825 bigalloc_t *bigp; 826 void *nptr; 827 slzone_t z; 828 size_t chunking; 829 830 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) 831 return(_slaballoc(size, 0)); 832 833 if (size == 0) { 834 free(ptr); 835 return(ZERO_LENGTH_PTR); 836 } 837 838 /* 839 * Handle oversized allocations. XXX we really should require 840 * that a size be passed to free() instead of this nonsense. 841 */ 842 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 843 bigalloc_t big; 844 size_t bigbytes; 845 846 while ((big = *bigp) != NULL) { 847 if (big->base == ptr) { 848 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 849 bigbytes = big->bytes; 850 bigalloc_unlock(ptr); 851 if (bigbytes == size) 852 return(ptr); 853 if ((nptr = _slaballoc(size, 0)) == NULL) 854 return(NULL); 855 if (size > bigbytes) 856 size = bigbytes; 857 bcopy(ptr, nptr, size); 858 _slabfree(ptr); 859 return(nptr); 860 } 861 bigp = &big->next; 862 } 863 bigalloc_unlock(ptr); 864 } 865 866 /* 867 * Get the original allocation's zone. If the new request winds 868 * up using the same chunk size we do not have to do anything. 869 * 870 * NOTE: We don't have to lock the globaldata here, the fields we 871 * access here will not change at least as long as we have control 872 * over the allocation. 873 */ 874 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 875 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 876 877 /* 878 * Use zoneindex() to chunk-align the new size, as long as the 879 * new size is not too large. 880 */ 881 if (size < ZoneLimit) { 882 zoneindex(&size, &chunking); 883 if (z->z_ChunkSize == size) 884 return(ptr); 885 } 886 887 /* 888 * Allocate memory for the new request size and copy as appropriate. 889 */ 890 if ((nptr = _slaballoc(size, 0)) != NULL) { 891 if (size > z->z_ChunkSize) 892 size = z->z_ChunkSize; 893 bcopy(ptr, nptr, size); 894 _slabfree(ptr); 895 } 896 897 return(nptr); 898 } 899 900 /* 901 * free (SLAB ALLOCATOR) 902 * 903 * Free a memory block previously allocated by malloc. Note that we do not 904 * attempt to uplodate ks_loosememuse as MP races could prevent us from 905 * checking memory limits in malloc. 906 * 907 * MPSAFE 908 */ 909 static void 910 _slabfree(void *ptr) 911 { 912 slzone_t z; 913 slchunk_t chunk; 914 bigalloc_t big; 915 bigalloc_t *bigp; 916 slglobaldata_t slgd; 917 size_t size; 918 int pgno; 919 920 /* 921 * Handle NULL frees and special 0-byte allocations 922 */ 923 if (ptr == NULL) 924 return; 925 if (ptr == ZERO_LENGTH_PTR) 926 return; 927 928 /* 929 * Handle oversized allocations. 930 */ 931 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 932 while ((big = *bigp) != NULL) { 933 if (big->base == ptr) { 934 *bigp = big->next; 935 bigalloc_unlock(ptr); 936 size = big->bytes; 937 _slabfree(big); 938 #ifdef INVARIANTS 939 MASSERT(sizeof(weirdary) <= size); 940 bcopy(weirdary, ptr, sizeof(weirdary)); 941 #endif 942 _vmem_free(ptr, size); 943 return; 944 } 945 bigp = &big->next; 946 } 947 bigalloc_unlock(ptr); 948 } 949 950 /* 951 * Zone case. Figure out the zone based on the fact that it is 952 * ZoneSize aligned. 953 */ 954 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 955 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 956 957 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 958 chunk = ptr; 959 slgd = z->z_GlobalData; 960 slgd_lock(slgd); 961 962 #ifdef INVARIANTS 963 /* 964 * Attempt to detect a double-free. To reduce overhead we only check 965 * if there appears to be link pointer at the base of the data. 966 */ 967 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 968 slchunk_t scan; 969 970 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 971 if (scan == chunk) 972 _mpanic("Double free at %p", chunk); 973 } 974 } 975 chunk_mark_free(z, chunk); 976 #endif 977 978 /* 979 * Put weird data into the memory to detect modifications after 980 * freeing, illegal pointer use after freeing (we should fault on 981 * the odd address), and so forth. 982 */ 983 #ifdef INVARIANTS 984 if (z->z_ChunkSize < sizeof(weirdary)) 985 bcopy(weirdary, chunk, z->z_ChunkSize); 986 else 987 bcopy(weirdary, chunk, sizeof(weirdary)); 988 #endif 989 990 /* 991 * Add this free non-zero'd chunk to a linked list for reuse, adjust 992 * z_FirstFreePg. 993 */ 994 chunk->c_Next = z->z_PageAry[pgno]; 995 z->z_PageAry[pgno] = chunk; 996 if (z->z_FirstFreePg > pgno) 997 z->z_FirstFreePg = pgno; 998 999 /* 1000 * Bump the number of free chunks. If it becomes non-zero the zone 1001 * must be added back onto the appropriate list. 1002 */ 1003 if (z->z_NFree++ == 0) { 1004 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1005 slgd->ZoneAry[z->z_ZoneIndex] = z; 1006 } 1007 1008 /* 1009 * If the zone becomes totally free then move this zone to 1010 * the FreeZones list. 1011 * 1012 * Do not madvise here, avoiding the edge case where a malloc/free 1013 * loop is sitting on the edge of a new zone. 1014 * 1015 * We could leave at least one zone in the ZoneAry for the index, 1016 * using something like the below, but while this might be fine 1017 * for the kernel (who cares about ~10MB of wasted memory), it 1018 * probably isn't such a good idea for a user program. 1019 * 1020 * && (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) 1021 */ 1022 if (z->z_NFree == z->z_NMax) { 1023 slzone_t *pz; 1024 1025 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1026 while (z != *pz) 1027 pz = &(*pz)->z_Next; 1028 *pz = z->z_Next; 1029 z->z_Magic = -1; 1030 z->z_Next = slgd->FreeZones; 1031 slgd->FreeZones = z; 1032 ++slgd->NFreeZones; 1033 } 1034 1035 /* 1036 * Limit the number of zones we keep cached. 1037 */ 1038 while (slgd->NFreeZones > ZONE_RELS_THRESH) { 1039 z = slgd->FreeZones; 1040 slgd->FreeZones = z->z_Next; 1041 --slgd->NFreeZones; 1042 slgd_unlock(slgd); 1043 _vmem_free(z, ZoneSize); 1044 slgd_lock(slgd); 1045 } 1046 slgd_unlock(slgd); 1047 } 1048 1049 #if defined(INVARIANTS) 1050 /* 1051 * Helper routines for sanity checks 1052 */ 1053 static 1054 void 1055 chunk_mark_allocated(slzone_t z, void *chunk) 1056 { 1057 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1058 __uint32_t *bitptr; 1059 1060 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1061 bitptr = &z->z_Bitmap[bitdex >> 5]; 1062 bitdex &= 31; 1063 MASSERT((*bitptr & (1 << bitdex)) == 0); 1064 *bitptr |= 1 << bitdex; 1065 } 1066 1067 static 1068 void 1069 chunk_mark_free(slzone_t z, void *chunk) 1070 { 1071 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1072 __uint32_t *bitptr; 1073 1074 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1075 bitptr = &z->z_Bitmap[bitdex >> 5]; 1076 bitdex &= 31; 1077 MASSERT((*bitptr & (1 << bitdex)) != 0); 1078 *bitptr &= ~(1 << bitdex); 1079 } 1080 1081 #endif 1082 1083 /* 1084 * _vmem_alloc() 1085 * 1086 * Directly map memory in PAGE_SIZE'd chunks with the specified 1087 * alignment. 1088 * 1089 * Alignment must be a multiple of PAGE_SIZE. 1090 * 1091 * Size must be >= alignment. 1092 */ 1093 static void * 1094 _vmem_alloc(size_t size, size_t align, int flags) 1095 { 1096 char *addr; 1097 char *save; 1098 size_t excess; 1099 1100 /* 1101 * Map anonymous private memory. 1102 */ 1103 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1104 MAP_PRIVATE|MAP_ANON, -1, 0); 1105 if (addr == MAP_FAILED) 1106 return(NULL); 1107 1108 /* 1109 * Check alignment. The misaligned offset is also the excess 1110 * amount. If misaligned unmap the excess so we have a chance of 1111 * mapping at the next alignment point and recursively try again. 1112 * 1113 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1114 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1115 * xxxxxxxxx final excess calculation 1116 * ^ returned address 1117 */ 1118 excess = (uintptr_t)addr & (align - 1); 1119 1120 if (excess) { 1121 excess = align - excess; 1122 save = addr; 1123 1124 munmap(save + excess, size - excess); 1125 addr = _vmem_alloc(size, align, flags); 1126 munmap(save, excess); 1127 } 1128 return((void *)addr); 1129 } 1130 1131 /* 1132 * _vmem_free() 1133 * 1134 * Free a chunk of memory allocated with _vmem_alloc() 1135 */ 1136 static void 1137 _vmem_free(void *ptr, vm_size_t size) 1138 { 1139 munmap(ptr, size); 1140 } 1141 1142 /* 1143 * Panic on fatal conditions 1144 */ 1145 static void 1146 _mpanic(const char *ctl, ...) 1147 { 1148 va_list va; 1149 1150 if (malloc_panic == 0) { 1151 malloc_panic = 1; 1152 va_start(va, ctl); 1153 vfprintf(stderr, ctl, va); 1154 fprintf(stderr, "\n"); 1155 fflush(stderr); 1156 va_end(va); 1157 } 1158 abort(); 1159 } 1160