1 /* 2 * NMALLOC.C - New Malloc (ported from kernel slab allocator) 3 * 4 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Matthew Dillon <dillon@backplane.com> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 /* 37 * This module implements a slab allocator drop-in replacement for the 38 * libc malloc(). 39 * 40 * A slab allocator reserves a ZONE for each chunk size, then lays the 41 * chunks out in an array within the zone. Allocation and deallocation 42 * is nearly instantanious, and overhead losses are limited to a fixed 43 * worst-case amount. 44 * 45 * The slab allocator does not have to pre-initialize the list of 46 * free chunks for each zone, and the underlying VM will not be 47 * touched at all beyond the zone header until an actual allocation 48 * needs it. 49 * 50 * Slab management and locking is done on a per-zone basis. 51 * 52 * Alloc Size Chunking Number of zones 53 * 0-127 8 16 54 * 128-255 16 8 55 * 256-511 32 8 56 * 512-1023 64 8 57 * 1024-2047 128 8 58 * 2048-4095 256 8 59 * 4096-8191 512 8 60 * 8192-16383 1024 8 61 * 16384-32767 2048 8 62 * 63 * Allocations >= ZoneLimit (16K) go directly to mmap and a hash table 64 * is used to locate for free. One and Two-page allocations use the 65 * zone mechanic to avoid excessive mmap()/munmap() calls. 66 * 67 * API FEATURES AND SIDE EFFECTS 68 * 69 * + power-of-2 sized allocations up to a page will be power-of-2 aligned. 70 * Above that power-of-2 sized allocations are page-aligned. Non 71 * power-of-2 sized allocations are aligned the same as the chunk 72 * size for their zone. 73 * + malloc(0) returns a special non-NULL value 74 * + ability to allocate arbitrarily large chunks of memory 75 * + realloc will reuse the passed pointer if possible, within the 76 * limitations of the zone chunking. 77 */ 78 79 #include "libc_private.h" 80 81 #include <sys/param.h> 82 #include <sys/types.h> 83 #include <sys/mman.h> 84 #include <stdio.h> 85 #include <stdlib.h> 86 #include <stdarg.h> 87 #include <stddef.h> 88 #include <unistd.h> 89 #include <string.h> 90 #include <fcntl.h> 91 #include <errno.h> 92 93 #include "spinlock.h" 94 #include "un-namespace.h" 95 96 /* 97 * Linked list of large allocations 98 */ 99 typedef struct bigalloc { 100 struct bigalloc *next; /* hash link */ 101 void *base; /* base pointer */ 102 u_long bytes; /* bytes allocated */ 103 u_long unused01; 104 } *bigalloc_t; 105 106 /* 107 * Note that any allocations which are exact multiples of PAGE_SIZE, or 108 * which are >= ZALLOC_ZONE_LIMIT, will fall through to the kmem subsystem. 109 */ 110 #define ZALLOC_ZONE_LIMIT (16 * 1024) /* max slab-managed alloc */ 111 #define ZALLOC_MIN_ZONE_SIZE (32 * 1024) /* minimum zone size */ 112 #define ZALLOC_MAX_ZONE_SIZE (128 * 1024) /* maximum zone size */ 113 #define ZALLOC_ZONE_SIZE (64 * 1024) 114 #define ZALLOC_SLAB_MAGIC 0x736c6162 /* magic sanity */ 115 #define ZALLOC_SLAB_SLIDE 20 /* L1-cache skip */ 116 117 #if ZALLOC_ZONE_LIMIT == 16384 118 #define NZONES 72 119 #elif ZALLOC_ZONE_LIMIT == 32768 120 #define NZONES 80 121 #else 122 #error "I couldn't figure out NZONES" 123 #endif 124 125 /* 126 * Chunk structure for free elements 127 */ 128 typedef struct slchunk { 129 struct slchunk *c_Next; 130 } *slchunk_t; 131 132 /* 133 * The IN-BAND zone header is placed at the beginning of each zone. 134 */ 135 struct slglobaldata; 136 137 typedef struct slzone { 138 __int32_t z_Magic; /* magic number for sanity check */ 139 int z_NFree; /* total free chunks / ualloc space */ 140 struct slzone *z_Next; /* ZoneAry[] link if z_NFree non-zero */ 141 struct slglobaldata *z_GlobalData; 142 int z_NMax; /* maximum free chunks */ 143 char *z_BasePtr; /* pointer to start of chunk array */ 144 int z_UIndex; /* current initial allocation index */ 145 int z_UEndIndex; /* last (first) allocation index */ 146 int z_ChunkSize; /* chunk size for validation */ 147 int z_FirstFreePg; /* chunk list on a page-by-page basis */ 148 int z_ZoneIndex; 149 int z_Flags; 150 struct slchunk *z_PageAry[ZALLOC_ZONE_SIZE / PAGE_SIZE]; 151 #if defined(INVARIANTS) 152 __uint32_t z_Bitmap[]; /* bitmap of free chunks / sanity */ 153 #endif 154 } *slzone_t; 155 156 typedef struct slglobaldata { 157 spinlock_t Spinlock; 158 slzone_t ZoneAry[NZONES];/* linked list of zones NFree > 0 */ 159 slzone_t FreeZones; /* whole zones that have become free */ 160 int NFreeZones; /* free zone count */ 161 int JunkIndex; 162 } *slglobaldata_t; 163 164 #define SLZF_UNOTZEROD 0x0001 165 166 /* 167 * Misc constants. Note that allocations that are exact multiples of 168 * PAGE_SIZE, or exceed the zone limit, fall through to the kmem module. 169 * IN_SAME_PAGE_MASK is used to sanity-check the per-page free lists. 170 */ 171 #define MIN_CHUNK_SIZE 8 /* in bytes */ 172 #define MIN_CHUNK_MASK (MIN_CHUNK_SIZE - 1) 173 #define ZONE_RELS_THRESH 4 /* threshold number of zones */ 174 #define IN_SAME_PAGE_MASK (~(intptr_t)PAGE_MASK | MIN_CHUNK_MASK) 175 176 /* 177 * The WEIRD_ADDR is used as known text to copy into free objects to 178 * try to create deterministic failure cases if the data is accessed after 179 * free. 180 * 181 * WARNING: A limited number of spinlocks are available, BIGXSIZE should 182 * not be larger then 64. 183 */ 184 #define WEIRD_ADDR 0xdeadc0de 185 #define MAX_COPY sizeof(weirdary) 186 #define ZERO_LENGTH_PTR ((void *)-8) 187 188 #define BIGHSHIFT 10 /* bigalloc hash table */ 189 #define BIGHSIZE (1 << BIGHSHIFT) 190 #define BIGHMASK (BIGHSIZE - 1) 191 #define BIGXSIZE (BIGHSIZE / 16) /* bigalloc lock table */ 192 #define BIGXMASK (BIGXSIZE - 1) 193 194 #define SLGD_MAX 4 /* parallel allocations */ 195 196 #define SAFLAG_ZERO 0x0001 197 #define SAFLAG_PASSIVE 0x0002 198 199 /* 200 * Thread control 201 */ 202 203 #define arysize(ary) (sizeof(ary)/sizeof((ary)[0])) 204 205 #define MASSERT(exp) do { if (__predict_false(!(exp))) \ 206 _mpanic("assertion: %s in %s", \ 207 #exp, __func__); \ 208 } while (0) 209 210 /* 211 * Fixed globals (not per-cpu) 212 */ 213 static const int ZoneSize = ZALLOC_ZONE_SIZE; 214 static const int ZoneLimit = ZALLOC_ZONE_LIMIT; 215 static const int ZonePageCount = ZALLOC_ZONE_SIZE / PAGE_SIZE; 216 static const int ZoneMask = ZALLOC_ZONE_SIZE - 1; 217 218 static struct slglobaldata SLGlobalData[SLGD_MAX]; 219 static bigalloc_t bigalloc_array[BIGHSIZE]; 220 static spinlock_t bigspin_array[BIGXSIZE]; 221 static int malloc_panic; 222 223 static const int32_t weirdary[16] = { 224 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 225 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 226 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, 227 WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR, WEIRD_ADDR 228 }; 229 230 static __thread slglobaldata_t LastSLGD = &SLGlobalData[0]; 231 232 static void *_slaballoc(size_t size, int flags); 233 static void *_slabrealloc(void *ptr, size_t size); 234 static void _slabfree(void *ptr); 235 static void *_vmem_alloc(size_t bytes, size_t align, int flags); 236 static void _vmem_free(void *ptr, size_t bytes); 237 static void _mpanic(const char *ctl, ...); 238 #if defined(INVARIANTS) 239 static void chunk_mark_allocated(slzone_t z, void *chunk); 240 static void chunk_mark_free(slzone_t z, void *chunk); 241 #endif 242 243 #ifdef INVARIANTS 244 /* 245 * If enabled any memory allocated without M_ZERO is initialized to -1. 246 */ 247 static int use_malloc_pattern; 248 #endif 249 250 /* 251 * Thread locks. 252 * 253 * NOTE: slgd_trylock() returns 0 or EBUSY 254 */ 255 static __inline void 256 slgd_lock(slglobaldata_t slgd) 257 { 258 if (__isthreaded) 259 _SPINLOCK(&slgd->Spinlock); 260 } 261 262 static __inline int 263 slgd_trylock(slglobaldata_t slgd) 264 { 265 if (__isthreaded) 266 return(_SPINTRYLOCK(&slgd->Spinlock)); 267 return(0); 268 } 269 270 static __inline void 271 slgd_unlock(slglobaldata_t slgd) 272 { 273 if (__isthreaded) 274 _SPINUNLOCK(&slgd->Spinlock); 275 } 276 277 /* 278 * bigalloc hashing and locking support. 279 * 280 * Return an unmasked hash code for the passed pointer. 281 */ 282 static __inline int 283 _bigalloc_hash(void *ptr) 284 { 285 int hv; 286 287 hv = ((int)ptr >> PAGE_SHIFT) ^ ((int)ptr >> (PAGE_SHIFT + BIGHSHIFT)); 288 289 return(hv); 290 } 291 292 /* 293 * Lock the hash chain and return a pointer to its base for the specified 294 * address. 295 */ 296 static __inline bigalloc_t * 297 bigalloc_lock(void *ptr) 298 { 299 int hv = _bigalloc_hash(ptr); 300 bigalloc_t *bigp; 301 302 bigp = &bigalloc_array[hv & BIGHMASK]; 303 if (__isthreaded) 304 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 305 return(bigp); 306 } 307 308 /* 309 * Lock the hash chain and return a pointer to its base for the specified 310 * address. 311 * 312 * BUT, if the hash chain is empty, just return NULL and do not bother 313 * to lock anything. 314 */ 315 static __inline bigalloc_t * 316 bigalloc_check_and_lock(void *ptr) 317 { 318 int hv = _bigalloc_hash(ptr); 319 bigalloc_t *bigp; 320 321 bigp = &bigalloc_array[hv & BIGHMASK]; 322 if (*bigp == NULL) 323 return(NULL); 324 if (__isthreaded) { 325 _SPINLOCK(&bigspin_array[hv & BIGXMASK]); 326 } 327 return(bigp); 328 } 329 330 static __inline void 331 bigalloc_unlock(void *ptr) 332 { 333 int hv; 334 335 if (__isthreaded) { 336 hv = _bigalloc_hash(ptr); 337 _SPINUNLOCK(&bigspin_array[hv & BIGXMASK]); 338 } 339 } 340 341 /* 342 * Calculate the zone index for the allocation request size and set the 343 * allocation request size to that particular zone's chunk size. 344 */ 345 static __inline int 346 zoneindex(size_t *bytes, size_t *chunking) 347 { 348 size_t n = (unsigned int)*bytes; /* unsigned for shift opt */ 349 if (n < 128) { 350 *bytes = n = (n + 7) & ~7; 351 *chunking = 8; 352 return(n / 8 - 1); /* 8 byte chunks, 16 zones */ 353 } 354 if (n < 256) { 355 *bytes = n = (n + 15) & ~15; 356 *chunking = 16; 357 return(n / 16 + 7); 358 } 359 if (n < 8192) { 360 if (n < 512) { 361 *bytes = n = (n + 31) & ~31; 362 *chunking = 32; 363 return(n / 32 + 15); 364 } 365 if (n < 1024) { 366 *bytes = n = (n + 63) & ~63; 367 *chunking = 64; 368 return(n / 64 + 23); 369 } 370 if (n < 2048) { 371 *bytes = n = (n + 127) & ~127; 372 *chunking = 128; 373 return(n / 128 + 31); 374 } 375 if (n < 4096) { 376 *bytes = n = (n + 255) & ~255; 377 *chunking = 256; 378 return(n / 256 + 39); 379 } 380 *bytes = n = (n + 511) & ~511; 381 *chunking = 512; 382 return(n / 512 + 47); 383 } 384 #if ZALLOC_ZONE_LIMIT > 8192 385 if (n < 16384) { 386 *bytes = n = (n + 1023) & ~1023; 387 *chunking = 1024; 388 return(n / 1024 + 55); 389 } 390 #endif 391 #if ZALLOC_ZONE_LIMIT > 16384 392 if (n < 32768) { 393 *bytes = n = (n + 2047) & ~2047; 394 *chunking = 2048; 395 return(n / 2048 + 63); 396 } 397 #endif 398 _mpanic("Unexpected byte count %d", n); 399 return(0); 400 } 401 402 /* 403 * malloc() - call internal slab allocator 404 */ 405 void * 406 malloc(size_t size) 407 { 408 void *ptr; 409 410 ptr = _slaballoc(size, 0); 411 if (ptr == NULL) 412 errno = ENOMEM; 413 return(ptr); 414 } 415 416 /* 417 * calloc() - call internal slab allocator 418 */ 419 void * 420 calloc(size_t number, size_t size) 421 { 422 void *ptr; 423 424 ptr = _slaballoc(number * size, SAFLAG_ZERO); 425 if (ptr == NULL) 426 errno = ENOMEM; 427 return(ptr); 428 } 429 430 /* 431 * realloc() (SLAB ALLOCATOR) 432 * 433 * We do not attempt to optimize this routine beyond reusing the same 434 * pointer if the new size fits within the chunking of the old pointer's 435 * zone. 436 */ 437 void * 438 realloc(void *ptr, size_t size) 439 { 440 ptr = _slabrealloc(ptr, size); 441 if (ptr == NULL) 442 errno = ENOMEM; 443 return(ptr); 444 } 445 446 /* 447 * posix_memalign() 448 * 449 * Allocate (size) bytes with a alignment of (alignment), where (alignment) 450 * is a power of 2 >= sizeof(void *). 451 * 452 * The slab allocator will allocate on power-of-2 boundaries up to 453 * at least PAGE_SIZE. We use the zoneindex mechanic to find a 454 * zone matching the requirements, and _vmem_alloc() otherwise. 455 */ 456 int 457 posix_memalign(void **memptr, size_t alignment, size_t size) 458 { 459 bigalloc_t *bigp; 460 bigalloc_t big; 461 int chunking; 462 int zi; 463 464 /* 465 * OpenGroup spec issue 6 checks 466 */ 467 if ((alignment | (alignment - 1)) + 1 != (alignment << 1)) { 468 *memptr = NULL; 469 return(EINVAL); 470 } 471 if (alignment < sizeof(void *)) { 472 *memptr = NULL; 473 return(EINVAL); 474 } 475 476 /* 477 * Our zone mechanism guarantees same-sized alignment for any 478 * power-of-2 allocation. If size is a power-of-2 and reasonable 479 * we can just call _slaballoc() and be done. We round size up 480 * to the nearest alignment boundary to improve our odds of 481 * it becoming a power-of-2 if it wasn't before. 482 */ 483 if (size <= alignment) 484 size = alignment; 485 else 486 size = (size + alignment - 1) & ~(size_t)(alignment - 1); 487 if (size < PAGE_SIZE && (size | (size - 1)) + 1 == (size << 1)) { 488 *memptr = _slaballoc(size, 0); 489 return(*memptr ? 0 : ENOMEM); 490 } 491 492 /* 493 * Otherwise locate a zone with a chunking that matches 494 * the requested alignment, within reason. Consider two cases: 495 * 496 * (1) A 1K allocation on a 32-byte alignment. The first zoneindex 497 * we find will be the best fit because the chunking will be 498 * greater or equal to the alignment. 499 * 500 * (2) A 513 allocation on a 256-byte alignment. In this case 501 * the first zoneindex we find will be for 576 byte allocations 502 * with a chunking of 64, which is not sufficient. To fix this 503 * we simply find the nearest power-of-2 >= size and use the 504 * same side-effect of _slaballoc() which guarantees 505 * same-alignment on a power-of-2 allocation. 506 */ 507 if (size < PAGE_SIZE) { 508 zi = zoneindex(&size, &chunking); 509 if (chunking >= alignment) { 510 *memptr = _slaballoc(size, 0); 511 return(*memptr ? 0 : ENOMEM); 512 } 513 if (size >= 1024) 514 alignment = 1024; 515 if (size >= 16384) 516 alignment = 16384; 517 while (alignment < size) 518 alignment <<= 1; 519 *memptr = _slaballoc(alignment, 0); 520 return(*memptr ? 0 : ENOMEM); 521 } 522 523 /* 524 * If the slab allocator cannot handle it use vmem_alloc(). 525 * 526 * Alignment must be adjusted up to at least PAGE_SIZE in this case. 527 */ 528 if (alignment < PAGE_SIZE) 529 alignment = PAGE_SIZE; 530 if (size < alignment) 531 size = alignment; 532 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 533 *memptr = _vmem_alloc(size, alignment, 0); 534 if (*memptr == NULL) 535 return(ENOMEM); 536 537 big = _slaballoc(sizeof(struct bigalloc), 0); 538 if (big == NULL) { 539 _vmem_free(*memptr, size); 540 *memptr = NULL; 541 return(ENOMEM); 542 } 543 bigp = bigalloc_lock(*memptr); 544 big->base = *memptr; 545 big->bytes = size; 546 big->unused01 = 0; 547 big->next = *bigp; 548 *bigp = big; 549 bigalloc_unlock(*memptr); 550 551 return(0); 552 } 553 554 /* 555 * free() (SLAB ALLOCATOR) - do the obvious 556 */ 557 void 558 free(void *ptr) 559 { 560 _slabfree(ptr); 561 } 562 563 /* 564 * _slaballoc() (SLAB ALLOCATOR) 565 * 566 * Allocate memory via the slab allocator. If the request is too large, 567 * or if it page-aligned beyond a certain size, we fall back to the 568 * KMEM subsystem 569 */ 570 static void * 571 _slaballoc(size_t size, int flags) 572 { 573 slzone_t z; 574 slchunk_t chunk; 575 slglobaldata_t slgd; 576 int chunking; 577 int zi; 578 #ifdef INVARIANTS 579 int i; 580 #endif 581 int off; 582 583 /* 584 * Handle the degenerate size == 0 case. Yes, this does happen. 585 * Return a special pointer. This is to maintain compatibility with 586 * the original malloc implementation. Certain devices, such as the 587 * adaptec driver, not only allocate 0 bytes, they check for NULL and 588 * also realloc() later on. Joy. 589 */ 590 if (size == 0) 591 return(ZERO_LENGTH_PTR); 592 593 /* 594 * Handle large allocations directly. There should not be very many 595 * of these so performance is not a big issue. 596 * 597 * The backend allocator is pretty nasty on a SMP system. Use the 598 * slab allocator for one and two page-sized chunks even though we 599 * lose some efficiency. 600 */ 601 if (size >= ZoneLimit || 602 ((size & PAGE_MASK) == 0 && size > PAGE_SIZE*2)) { 603 bigalloc_t big; 604 bigalloc_t *bigp; 605 606 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 607 chunk = _vmem_alloc(size, PAGE_SIZE, flags); 608 if (chunk == NULL) 609 return(NULL); 610 611 big = _slaballoc(sizeof(struct bigalloc), 0); 612 if (big == NULL) { 613 _vmem_free(chunk, size); 614 return(NULL); 615 } 616 bigp = bigalloc_lock(chunk); 617 big->base = chunk; 618 big->bytes = size; 619 big->unused01 = 0; 620 big->next = *bigp; 621 *bigp = big; 622 bigalloc_unlock(chunk); 623 624 return(chunk); 625 } 626 627 /* 628 * Multi-threading support. This needs work XXX. 629 * 630 * Choose a globaldata structure to allocate from. If we cannot 631 * immediately get the lock try a different one. 632 * 633 * LastSLGD is a per-thread global. 634 */ 635 slgd = LastSLGD; 636 if (slgd_trylock(slgd) != 0) { 637 if (++slgd == &SLGlobalData[SLGD_MAX]) 638 slgd = &SLGlobalData[0]; 639 LastSLGD = slgd; 640 slgd_lock(slgd); 641 } 642 643 /* 644 * Attempt to allocate out of an existing zone. If all zones are 645 * exhausted pull one off the free list or allocate a new one. 646 * 647 * Note: zoneindex() will panic of size is too large. 648 */ 649 zi = zoneindex(&size, &chunking); 650 MASSERT(zi < NZONES); 651 652 if ((z = slgd->ZoneAry[zi]) == NULL) { 653 /* 654 * Pull the zone off the free list. If the zone on 655 * the free list happens to be correctly set up we 656 * do not have to reinitialize it. 657 */ 658 if ((z = slgd->FreeZones) != NULL) { 659 slgd->FreeZones = z->z_Next; 660 --slgd->NFreeZones; 661 if (z->z_ChunkSize == size) { 662 z->z_Magic = ZALLOC_SLAB_MAGIC; 663 z->z_Next = slgd->ZoneAry[zi]; 664 slgd->ZoneAry[zi] = z; 665 goto have_zone; 666 } 667 bzero(z, sizeof(struct slzone)); 668 z->z_Flags |= SLZF_UNOTZEROD; 669 } else { 670 z = _vmem_alloc(ZoneSize, ZoneSize, flags); 671 if (z == NULL) 672 goto fail; 673 } 674 675 /* 676 * How big is the base structure? 677 */ 678 #if defined(INVARIANTS) 679 /* 680 * Make room for z_Bitmap. An exact calculation is 681 * somewhat more complicated so don't make an exact 682 * calculation. 683 */ 684 off = offsetof(struct slzone, 685 z_Bitmap[(ZoneSize / size + 31) / 32]); 686 bzero(z->z_Bitmap, (ZoneSize / size + 31) / 8); 687 #else 688 off = sizeof(struct slzone); 689 #endif 690 691 /* 692 * Align the storage in the zone based on the chunking. 693 * 694 * Guarentee power-of-2 alignment for power-of-2-sized 695 * chunks. Otherwise align based on the chunking size 696 * (typically 8 or 16 bytes for small allocations). 697 * 698 * NOTE: Allocations >= ZoneLimit are governed by the 699 * bigalloc code and typically only guarantee page-alignment. 700 * 701 * Set initial conditions for UIndex near the zone header 702 * to reduce unecessary page faults, vs semi-randomization 703 * to improve L1 cache saturation. 704 */ 705 if ((size | (size - 1)) + 1 == (size << 1)) 706 off = (off + size - 1) & ~(size - 1); 707 else 708 off = (off + chunking - 1) & ~(chunking - 1); 709 z->z_Magic = ZALLOC_SLAB_MAGIC; 710 z->z_GlobalData = slgd; 711 z->z_ZoneIndex = zi; 712 z->z_NMax = (ZoneSize - off) / size; 713 z->z_NFree = z->z_NMax; 714 z->z_BasePtr = (char *)z + off; 715 /*z->z_UIndex = z->z_UEndIndex = slgd->JunkIndex % z->z_NMax;*/ 716 z->z_UIndex = z->z_UEndIndex = 0; 717 z->z_ChunkSize = size; 718 z->z_FirstFreePg = ZonePageCount; 719 z->z_Next = slgd->ZoneAry[zi]; 720 slgd->ZoneAry[zi] = z; 721 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 722 flags &= ~SAFLAG_ZERO; /* already zero'd */ 723 flags |= SAFLAG_PASSIVE; 724 } 725 726 /* 727 * Slide the base index for initial allocations out of the 728 * next zone we create so we do not over-weight the lower 729 * part of the cpu memory caches. 730 */ 731 slgd->JunkIndex = (slgd->JunkIndex + ZALLOC_SLAB_SLIDE) 732 & (ZALLOC_MAX_ZONE_SIZE - 1); 733 } 734 735 /* 736 * Ok, we have a zone from which at least one chunk is available. 737 * 738 * Remove us from the ZoneAry[] when we become empty 739 */ 740 have_zone: 741 MASSERT(z->z_NFree > 0); 742 743 if (--z->z_NFree == 0) { 744 slgd->ZoneAry[zi] = z->z_Next; 745 z->z_Next = NULL; 746 } 747 748 /* 749 * Locate a chunk in a free page. This attempts to localize 750 * reallocations into earlier pages without us having to sort 751 * the chunk list. A chunk may still overlap a page boundary. 752 */ 753 while (z->z_FirstFreePg < ZonePageCount) { 754 if ((chunk = z->z_PageAry[z->z_FirstFreePg]) != NULL) { 755 #ifdef DIAGNOSTIC 756 /* 757 * Diagnostic: c_Next is not total garbage. 758 */ 759 MASSERT(chunk->c_Next == NULL || 760 ((intptr_t)chunk->c_Next & IN_SAME_PAGE_MASK) == 761 ((intptr_t)chunk & IN_SAME_PAGE_MASK)); 762 #endif 763 #ifdef INVARIANTS 764 chunk_mark_allocated(z, chunk); 765 #endif 766 MASSERT((uintptr_t)chunk & ZoneMask); 767 z->z_PageAry[z->z_FirstFreePg] = chunk->c_Next; 768 goto done; 769 } 770 ++z->z_FirstFreePg; 771 } 772 773 /* 774 * No chunks are available but NFree said we had some memory, 775 * so it must be available in the never-before-used-memory 776 * area governed by UIndex. The consequences are very 777 * serious if our zone got corrupted so we use an explicit 778 * panic rather then a KASSERT. 779 */ 780 chunk = (slchunk_t)(z->z_BasePtr + z->z_UIndex * size); 781 782 if (++z->z_UIndex == z->z_NMax) 783 z->z_UIndex = 0; 784 if (z->z_UIndex == z->z_UEndIndex) { 785 if (z->z_NFree != 0) 786 _mpanic("slaballoc: corrupted zone"); 787 } 788 789 if ((z->z_Flags & SLZF_UNOTZEROD) == 0) { 790 flags &= ~SAFLAG_ZERO; 791 flags |= SAFLAG_PASSIVE; 792 } 793 #if defined(INVARIANTS) 794 chunk_mark_allocated(z, chunk); 795 #endif 796 797 done: 798 slgd_unlock(slgd); 799 if (flags & SAFLAG_ZERO) { 800 bzero(chunk, size); 801 #ifdef INVARIANTS 802 } else if ((flags & (SAFLAG_ZERO|SAFLAG_PASSIVE)) == 0) { 803 if (use_malloc_pattern) { 804 for (i = 0; i < size; i += sizeof(int)) { 805 *(int *)((char *)chunk + i) = -1; 806 } 807 } 808 /* avoid accidental double-free check */ 809 chunk->c_Next = (void *)-1; 810 #endif 811 } 812 return(chunk); 813 fail: 814 slgd_unlock(slgd); 815 return(NULL); 816 } 817 818 /* 819 * Reallocate memory within the chunk 820 */ 821 static void * 822 _slabrealloc(void *ptr, size_t size) 823 { 824 bigalloc_t *bigp; 825 void *nptr; 826 slzone_t z; 827 size_t chunking; 828 829 if (ptr == NULL || ptr == ZERO_LENGTH_PTR) 830 return(_slaballoc(size, 0)); 831 832 if (size == 0) { 833 free(ptr); 834 return(ZERO_LENGTH_PTR); 835 } 836 837 /* 838 * Handle oversized allocations. XXX we really should require 839 * that a size be passed to free() instead of this nonsense. 840 */ 841 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 842 bigalloc_t big; 843 size_t bigbytes; 844 845 while ((big = *bigp) != NULL) { 846 if (big->base == ptr) { 847 size = (size + PAGE_MASK) & ~(size_t)PAGE_MASK; 848 bigbytes = big->bytes; 849 bigalloc_unlock(ptr); 850 if (bigbytes == size) 851 return(ptr); 852 if ((nptr = _slaballoc(size, 0)) == NULL) 853 return(NULL); 854 if (size > bigbytes) 855 size = bigbytes; 856 bcopy(ptr, nptr, size); 857 _slabfree(ptr); 858 return(nptr); 859 } 860 bigp = &big->next; 861 } 862 bigalloc_unlock(ptr); 863 } 864 865 /* 866 * Get the original allocation's zone. If the new request winds 867 * up using the same chunk size we do not have to do anything. 868 * 869 * NOTE: We don't have to lock the globaldata here, the fields we 870 * access here will not change at least as long as we have control 871 * over the allocation. 872 */ 873 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 874 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 875 876 /* 877 * Use zoneindex() to chunk-align the new size, as long as the 878 * new size is not too large. 879 */ 880 if (size < ZoneLimit) { 881 zoneindex(&size, &chunking); 882 if (z->z_ChunkSize == size) 883 return(ptr); 884 } 885 886 /* 887 * Allocate memory for the new request size and copy as appropriate. 888 */ 889 if ((nptr = _slaballoc(size, 0)) != NULL) { 890 if (size > z->z_ChunkSize) 891 size = z->z_ChunkSize; 892 bcopy(ptr, nptr, size); 893 _slabfree(ptr); 894 } 895 896 return(nptr); 897 } 898 899 /* 900 * free (SLAB ALLOCATOR) 901 * 902 * Free a memory block previously allocated by malloc. Note that we do not 903 * attempt to uplodate ks_loosememuse as MP races could prevent us from 904 * checking memory limits in malloc. 905 * 906 * MPSAFE 907 */ 908 static void 909 _slabfree(void *ptr) 910 { 911 slzone_t z; 912 slchunk_t chunk; 913 bigalloc_t big; 914 bigalloc_t *bigp; 915 slglobaldata_t slgd; 916 size_t size; 917 int pgno; 918 919 /* 920 * Handle NULL frees and special 0-byte allocations 921 */ 922 if (ptr == NULL) 923 return; 924 if (ptr == ZERO_LENGTH_PTR) 925 return; 926 927 /* 928 * Handle oversized allocations. 929 */ 930 if ((bigp = bigalloc_check_and_lock(ptr)) != NULL) { 931 while ((big = *bigp) != NULL) { 932 if (big->base == ptr) { 933 *bigp = big->next; 934 bigalloc_unlock(ptr); 935 size = big->bytes; 936 _slabfree(big); 937 #ifdef INVARIANTS 938 MASSERT(sizeof(weirdary) <= size); 939 bcopy(weirdary, ptr, sizeof(weirdary)); 940 #endif 941 _vmem_free(ptr, size); 942 return; 943 } 944 bigp = &big->next; 945 } 946 bigalloc_unlock(ptr); 947 } 948 949 /* 950 * Zone case. Figure out the zone based on the fact that it is 951 * ZoneSize aligned. 952 */ 953 z = (slzone_t)((uintptr_t)ptr & ~(uintptr_t)ZoneMask); 954 MASSERT(z->z_Magic == ZALLOC_SLAB_MAGIC); 955 956 pgno = ((char *)ptr - (char *)z) >> PAGE_SHIFT; 957 chunk = ptr; 958 slgd = z->z_GlobalData; 959 slgd_lock(slgd); 960 961 #ifdef INVARIANTS 962 /* 963 * Attempt to detect a double-free. To reduce overhead we only check 964 * if there appears to be link pointer at the base of the data. 965 */ 966 if (((intptr_t)chunk->c_Next - (intptr_t)z) >> PAGE_SHIFT == pgno) { 967 slchunk_t scan; 968 969 for (scan = z->z_PageAry[pgno]; scan; scan = scan->c_Next) { 970 if (scan == chunk) 971 _mpanic("Double free at %p", chunk); 972 } 973 } 974 chunk_mark_free(z, chunk); 975 #endif 976 977 /* 978 * Put weird data into the memory to detect modifications after 979 * freeing, illegal pointer use after freeing (we should fault on 980 * the odd address), and so forth. 981 */ 982 #ifdef INVARIANTS 983 if (z->z_ChunkSize < sizeof(weirdary)) 984 bcopy(weirdary, chunk, z->z_ChunkSize); 985 else 986 bcopy(weirdary, chunk, sizeof(weirdary)); 987 #endif 988 989 /* 990 * Add this free non-zero'd chunk to a linked list for reuse, adjust 991 * z_FirstFreePg. 992 */ 993 chunk->c_Next = z->z_PageAry[pgno]; 994 z->z_PageAry[pgno] = chunk; 995 if (z->z_FirstFreePg > pgno) 996 z->z_FirstFreePg = pgno; 997 998 /* 999 * Bump the number of free chunks. If it becomes non-zero the zone 1000 * must be added back onto the appropriate list. 1001 */ 1002 if (z->z_NFree++ == 0) { 1003 z->z_Next = slgd->ZoneAry[z->z_ZoneIndex]; 1004 slgd->ZoneAry[z->z_ZoneIndex] = z; 1005 } 1006 1007 /* 1008 * If the zone becomes totally free then move this zone to 1009 * the FreeZones list. 1010 * 1011 * Do not madvise here, avoiding the edge case where a malloc/free 1012 * loop is sitting on the edge of a new zone. 1013 * 1014 * We could leave at least one zone in the ZoneAry for the index, 1015 * using something like the below, but while this might be fine 1016 * for the kernel (who cares about ~10MB of wasted memory), it 1017 * probably isn't such a good idea for a user program. 1018 * 1019 * && (z->z_Next || slgd->ZoneAry[z->z_ZoneIndex] != z) 1020 */ 1021 if (z->z_NFree == z->z_NMax) { 1022 slzone_t *pz; 1023 1024 pz = &slgd->ZoneAry[z->z_ZoneIndex]; 1025 while (z != *pz) 1026 pz = &(*pz)->z_Next; 1027 *pz = z->z_Next; 1028 z->z_Magic = -1; 1029 z->z_Next = slgd->FreeZones; 1030 slgd->FreeZones = z; 1031 ++slgd->NFreeZones; 1032 } 1033 1034 /* 1035 * Limit the number of zones we keep cached. 1036 */ 1037 while (slgd->NFreeZones > ZONE_RELS_THRESH) { 1038 z = slgd->FreeZones; 1039 slgd->FreeZones = z->z_Next; 1040 --slgd->NFreeZones; 1041 slgd_unlock(slgd); 1042 _vmem_free(z, ZoneSize); 1043 slgd_lock(slgd); 1044 } 1045 slgd_unlock(slgd); 1046 } 1047 1048 #if defined(INVARIANTS) 1049 /* 1050 * Helper routines for sanity checks 1051 */ 1052 static 1053 void 1054 chunk_mark_allocated(slzone_t z, void *chunk) 1055 { 1056 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1057 __uint32_t *bitptr; 1058 1059 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1060 bitptr = &z->z_Bitmap[bitdex >> 5]; 1061 bitdex &= 31; 1062 MASSERT((*bitptr & (1 << bitdex)) == 0); 1063 *bitptr |= 1 << bitdex; 1064 } 1065 1066 static 1067 void 1068 chunk_mark_free(slzone_t z, void *chunk) 1069 { 1070 int bitdex = ((char *)chunk - (char *)z->z_BasePtr) / z->z_ChunkSize; 1071 __uint32_t *bitptr; 1072 1073 MASSERT(bitdex >= 0 && bitdex < z->z_NMax); 1074 bitptr = &z->z_Bitmap[bitdex >> 5]; 1075 bitdex &= 31; 1076 MASSERT((*bitptr & (1 << bitdex)) != 0); 1077 *bitptr &= ~(1 << bitdex); 1078 } 1079 1080 #endif 1081 1082 /* 1083 * _vmem_alloc() 1084 * 1085 * Directly map memory in PAGE_SIZE'd chunks with the specified 1086 * alignment. 1087 * 1088 * Alignment must be a multiple of PAGE_SIZE. 1089 * 1090 * Size must be >= alignment. 1091 */ 1092 static void * 1093 _vmem_alloc(size_t size, size_t align, int flags) 1094 { 1095 char *addr; 1096 char *save; 1097 size_t excess; 1098 1099 /* 1100 * Map anonymous private memory. 1101 */ 1102 addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 1103 MAP_PRIVATE|MAP_ANON, -1, 0); 1104 if (addr == MAP_FAILED) 1105 return(NULL); 1106 1107 /* 1108 * Check alignment. The misaligned offset is also the excess 1109 * amount. If misaligned unmap the excess so we have a chance of 1110 * mapping at the next alignment point and recursively try again. 1111 * 1112 * BBBBBBBBBBB BBBBBBBBBBB BBBBBBBBBBB block alignment 1113 * aaaaaaaaa aaaaaaaaaaa aa mis-aligned allocation 1114 * xxxxxxxxx final excess calculation 1115 * ^ returned address 1116 */ 1117 excess = (uintptr_t)addr & (align - 1); 1118 1119 if (excess) { 1120 excess = align - excess; 1121 save = addr; 1122 1123 munmap(save + excess, size - excess); 1124 addr = _vmem_alloc(size, align, flags); 1125 munmap(save, excess); 1126 } 1127 return((void *)addr); 1128 } 1129 1130 /* 1131 * _vmem_free() 1132 * 1133 * Free a chunk of memory allocated with _vmem_alloc() 1134 */ 1135 static void 1136 _vmem_free(void *ptr, vm_size_t size) 1137 { 1138 munmap(ptr, size); 1139 } 1140 1141 /* 1142 * Panic on fatal conditions 1143 */ 1144 static void 1145 _mpanic(const char *ctl, ...) 1146 { 1147 va_list va; 1148 1149 if (malloc_panic == 0) { 1150 malloc_panic = 1; 1151 va_start(va, ctl); 1152 vfprintf(stderr, ctl, va); 1153 fprintf(stderr, "\n"); 1154 fflush(stderr); 1155 va_end(va); 1156 } 1157 abort(); 1158 } 1159