1 /* rpmalloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
2
3 This library provides a cross-platform lock free thread caching malloc implementation in C11.
4 The latest source code is always available at
5
6 https://github.com/mjansson/rpmalloc
7
8 This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9
10 */
11
12 #include "rpmalloc.h"
13
14 ////////////
15 ///
16 /// Build time configurable limits
17 ///
18 //////
19
20 #ifndef HEAP_ARRAY_SIZE
21 //! Size of heap hashmap
22 #define HEAP_ARRAY_SIZE 47
23 #endif
24 #ifndef ENABLE_THREAD_CACHE
25 //! Enable per-thread cache
26 #define ENABLE_THREAD_CACHE 1
27 #endif
28 #ifndef ENABLE_GLOBAL_CACHE
29 //! Enable global cache shared between all threads, requires thread cache
30 #define ENABLE_GLOBAL_CACHE 1
31 #endif
32 #ifndef ENABLE_VALIDATE_ARGS
33 //! Enable validation of args to public entry points
34 #define ENABLE_VALIDATE_ARGS 0
35 #endif
36 #ifndef ENABLE_STATISTICS
37 //! Enable statistics collection
38 #define ENABLE_STATISTICS 0
39 #endif
40 #ifndef ENABLE_ASSERTS
41 //! Enable asserts
42 #define ENABLE_ASSERTS 0
43 #endif
44 #ifndef ENABLE_OVERRIDE
45 //! Override standard library malloc/free and new/delete entry points
46 #define ENABLE_OVERRIDE 0
47 #endif
48 #ifndef ENABLE_PRELOAD
49 //! Support preloading
50 #define ENABLE_PRELOAD 0
51 #endif
52 #ifndef DISABLE_UNMAP
53 //! Disable unmapping memory pages
54 #define DISABLE_UNMAP 0
55 #endif
56 #ifndef DEFAULT_SPAN_MAP_COUNT
57 //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
58 #define DEFAULT_SPAN_MAP_COUNT 64
59 #endif
60
61 #if ENABLE_THREAD_CACHE
62 #ifndef ENABLE_UNLIMITED_CACHE
63 //! Unlimited thread and global cache
64 #define ENABLE_UNLIMITED_CACHE 0
65 #endif
66 #ifndef ENABLE_UNLIMITED_THREAD_CACHE
67 //! Unlimited cache disables any thread cache limitations
68 #define ENABLE_UNLIMITED_THREAD_CACHE ENABLE_UNLIMITED_CACHE
69 #endif
70 #if !ENABLE_UNLIMITED_THREAD_CACHE
71 #ifndef THREAD_CACHE_MULTIPLIER
72 //! Multiplier for thread cache (cache limit will be span release count multiplied by this value)
73 #define THREAD_CACHE_MULTIPLIER 16
74 #endif
75 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE
76 //! Enable adaptive size of per-thread cache (still bounded by THREAD_CACHE_MULTIPLIER hard limit)
77 #define ENABLE_ADAPTIVE_THREAD_CACHE 0
78 #endif
79 #endif
80 #endif
81
82 #if ENABLE_GLOBAL_CACHE && ENABLE_THREAD_CACHE
83 #if DISABLE_UNMAP
84 #undef ENABLE_UNLIMITED_GLOBAL_CACHE
85 #define ENABLE_UNLIMITED_GLOBAL_CACHE 1
86 #endif
87 #ifndef ENABLE_UNLIMITED_GLOBAL_CACHE
88 //! Unlimited cache disables any global cache limitations
89 #define ENABLE_UNLIMITED_GLOBAL_CACHE ENABLE_UNLIMITED_CACHE
90 #endif
91 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
92 //! Multiplier for global cache (cache limit will be span release count multiplied by this value)
93 #define GLOBAL_CACHE_MULTIPLIER (THREAD_CACHE_MULTIPLIER * 6)
94 #endif
95 #else
96 # undef ENABLE_GLOBAL_CACHE
97 # define ENABLE_GLOBAL_CACHE 0
98 #endif
99
100 #if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE
101 # undef ENABLE_ADAPTIVE_THREAD_CACHE
102 # define ENABLE_ADAPTIVE_THREAD_CACHE 0
103 #endif
104
105 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
106 # error Must use global cache if unmap is disabled
107 #endif
108
109 #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
110 # define PLATFORM_WINDOWS 1
111 # define PLATFORM_POSIX 0
112 #else
113 # define PLATFORM_WINDOWS 0
114 # define PLATFORM_POSIX 1
115 #endif
116
117 /// Platform and arch specifics
118 #if defined(_MSC_VER) && !defined(__clang__)
119 # ifndef FORCEINLINE
120 # define FORCEINLINE inline __forceinline
121 # endif
122 # define _Static_assert static_assert
123 #else
124 # ifndef FORCEINLINE
125 # define FORCEINLINE inline __attribute__((__always_inline__))
126 # endif
127 #endif
128 #if PLATFORM_WINDOWS
129 # ifndef WIN32_LEAN_AND_MEAN
130 # define WIN32_LEAN_AND_MEAN
131 # endif
132 # include <Windows.h>
133 # if ENABLE_VALIDATE_ARGS
134 # include <Intsafe.h>
135 # endif
136 #else
137 # include <unistd.h>
138 # include <stdio.h>
139 # include <stdlib.h>
140 # if defined(__APPLE__)
141 # include <mach/mach_vm.h>
142 # include <mach/vm_statistics.h>
143 # include <pthread.h>
144 # endif
145 # if defined(__HAIKU__)
146 # include <OS.h>
147 # include <pthread.h>
148 # endif
149 #endif
150
151 #include <stdint.h>
152 #include <string.h>
153 #include <errno.h>
154
155 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
156 #include <fibersapi.h>
157 static DWORD fls_key;
158 static void NTAPI
_rpmalloc_thread_destructor(void * value)159 _rpmalloc_thread_destructor(void *value) {
160 if (value)
161 rpmalloc_thread_finalize();
162 }
163 #endif
164
165 #if PLATFORM_POSIX
166 # include <sys/mman.h>
167 # include <sched.h>
168 # ifdef __FreeBSD__
169 # include <sys/sysctl.h>
170 # define MAP_HUGETLB MAP_ALIGNED_SUPER
171 # endif
172 # ifndef MAP_UNINITIALIZED
173 # define MAP_UNINITIALIZED 0
174 # endif
175 #endif
176 #include <errno.h>
177
178 #if ENABLE_ASSERTS
179 # undef NDEBUG
180 # if defined(_MSC_VER) && !defined(_DEBUG)
181 # define _DEBUG
182 # endif
183 # include <assert.h>
184 #else
185 # undef assert
186 # define assert(x) do {} while(0)
187 #endif
188 #if ENABLE_STATISTICS
189 # include <stdio.h>
190 #endif
191
192 //////
193 ///
194 /// Atomic access abstraction (since MSVC does not do C11 yet)
195 ///
196 //////
197
198 #if defined(_MSC_VER) && !defined(__clang__)
199
200 typedef volatile long atomic32_t;
201 typedef volatile long long atomic64_t;
202 typedef volatile void *atomicptr_t;
203
atomic_load32(atomic32_t * src)204 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
atomic_store32(atomic32_t * dst,int32_t val)205 static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { *dst = val; }
atomic_incr32(atomic32_t * val)206 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return (int32_t)InterlockedIncrement(val); }
atomic_decr32(atomic32_t * val)207 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return (int32_t)InterlockedDecrement(val); }
208 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
atomic_load64(atomic64_t * src)209 static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
atomic_add64(atomic64_t * val,int64_t add)210 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; }
211 #endif
atomic_add32(atomic32_t * val,int32_t add)212 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
atomic_load_ptr(atomicptr_t * src)213 static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) { return (void *)*src; }
atomic_store_ptr(atomicptr_t * dst,void * val)214 static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) { *dst = val; }
atomic_store_ptr_release(atomicptr_t * dst,void * val)215 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) { *dst = val; }
atomic_cas_ptr(atomicptr_t * dst,void * val,void * ref)216 static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) == ref) ? 1 : 0; }
atomic_cas_ptr_acquire(atomicptr_t * dst,void * val,void * ref)217 static FORCEINLINE int atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_cas_ptr(dst, val, ref); }
218
219 #define EXPECTED(x) (x)
220 #define UNEXPECTED(x) (x)
221
222 #else
223
224 #include <stdatomic.h>
225
226 typedef volatile _Atomic(int32_t) atomic32_t;
227 typedef volatile _Atomic(int64_t) atomic64_t;
228 typedef volatile _Atomic(void *) atomicptr_t;
229
atomic_load32(atomic32_t * src)230 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
atomic_store32(atomic32_t * dst,int32_t val)231 static FORCEINLINE void atomic_store32(atomic32_t *dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
atomic_incr32(atomic32_t * val)232 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; }
atomic_decr32(atomic32_t * val)233 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
234 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
atomic_load64(atomic64_t * val)235 static FORCEINLINE int64_t atomic_load64(atomic64_t *val) { return atomic_load_explicit(val, memory_order_relaxed); }
atomic_add64(atomic64_t * val,int64_t add)236 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
237 #endif
atomic_add32(atomic32_t * val,int32_t add)238 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
atomic_load_ptr(atomicptr_t * src)239 static FORCEINLINE void *atomic_load_ptr(atomicptr_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
atomic_store_ptr(atomicptr_t * dst,void * val)240 static FORCEINLINE void atomic_store_ptr(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
atomic_store_ptr_release(atomicptr_t * dst,void * val)241 static FORCEINLINE void atomic_store_ptr_release(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_release); }
atomic_cas_ptr(atomicptr_t * dst,void * val,void * ref)242 static FORCEINLINE int atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); }
atomic_cas_ptr_acquire(atomicptr_t * dst,void * val,void * ref)243 static FORCEINLINE int atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
244
245 #define EXPECTED(x) __builtin_expect((x), 1)
246 #define UNEXPECTED(x) __builtin_expect((x), 0)
247
248 #endif
249
250 ////////////
251 ///
252 /// Statistics related functions (evaluate to nothing when statistics not enabled)
253 ///
254 //////
255
256 #if ENABLE_STATISTICS
257 # define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
258 # define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
259 # define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value))
260 # define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value))
261 # define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0)
262 # define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value))
263 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \
264 int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
265 if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
266 heap->size_class_use[class_idx].alloc_peak = alloc_current; \
267 atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
268 } while(0)
269 # define _rpmalloc_stat_inc_free(heap, class_idx) do { \
270 atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
271 atomic_incr32(&heap->size_class_use[class_idx].free_total); \
272 } while(0)
273 #else
274 # define _rpmalloc_stat_inc(counter) do {} while(0)
275 # define _rpmalloc_stat_dec(counter) do {} while(0)
276 # define _rpmalloc_stat_add(counter, value) do {} while(0)
277 # define _rpmalloc_stat_add64(counter, value) do {} while(0)
278 # define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0)
279 # define _rpmalloc_stat_sub(counter, value) do {} while(0)
280 # define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0)
281 # define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0)
282 #endif
283
284 ///
285 /// Preconfigured limits and sizes
286 ///
287
288 //! Granularity of a small allocation block (must be power of two)
289 #define SMALL_GRANULARITY 16
290 //! Small granularity shift count
291 #define SMALL_GRANULARITY_SHIFT 4
292 //! Number of small block size classes
293 #define SMALL_CLASS_COUNT 65
294 //! Maximum size of a small block
295 #define SMALL_SIZE_LIMIT (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
296 //! Granularity of a medium allocation block
297 #define MEDIUM_GRANULARITY 512
298 //! Medium granularity shift count
299 #define MEDIUM_GRANULARITY_SHIFT 9
300 //! Number of medium block size classes
301 #define MEDIUM_CLASS_COUNT 61
302 //! Total number of small + medium size classes
303 #define SIZE_CLASS_COUNT (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
304 //! Number of large block size classes
305 #define LARGE_CLASS_COUNT 32
306 //! Maximum size of a medium block
307 #define MEDIUM_SIZE_LIMIT (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
308 //! Maximum size of a large block
309 #define LARGE_SIZE_LIMIT ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
310 //! ABA protection size in orhpan heap list (also becomes limit of smallest page size)
311 #define HEAP_ORPHAN_ABA_SIZE 512
312 //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
313 #define SPAN_HEADER_SIZE 128
314
315 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two");
316 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two");
317
318 #if ENABLE_VALIDATE_ARGS
319 //! Maximum allocation size to avoid integer overflow
320 #undef MAX_ALLOC_SIZE
321 #define MAX_ALLOC_SIZE (((size_t)-1) - _memory_span_size)
322 #endif
323
324 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
325 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
326
327 #define INVALID_POINTER ((void*)((uintptr_t)-1))
328
329 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
330 #define SIZE_CLASS_HUGE ((uint32_t)-1)
331
332 ////////////
333 ///
334 /// Data types
335 ///
336 //////
337
338 //! A memory heap, per thread
339 typedef struct heap_t heap_t;
340 //! Span of memory pages
341 typedef struct span_t span_t;
342 //! Span list
343 typedef struct span_list_t span_list_t;
344 //! Span active data
345 typedef struct span_active_t span_active_t;
346 //! Size class definition
347 typedef struct size_class_t size_class_t;
348 //! Global cache
349 typedef struct global_cache_t global_cache_t;
350
351 //! Flag indicating span is the first (master) span of a split superspan
352 #define SPAN_FLAG_MASTER 1U
353 //! Flag indicating span is a secondary (sub) span of a split superspan
354 #define SPAN_FLAG_SUBSPAN 2U
355 //! Flag indicating span has blocks with increased alignment
356 #define SPAN_FLAG_ALIGNED_BLOCKS 4U
357
358 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
359 struct span_use_t {
360 //! Current number of spans used (actually used, not in cache)
361 atomic32_t current;
362 //! High water mark of spans used
363 atomic32_t high;
364 #if ENABLE_STATISTICS
365 //! Number of spans transitioned to global cache
366 atomic32_t spans_to_global;
367 //! Number of spans transitioned from global cache
368 atomic32_t spans_from_global;
369 //! Number of spans transitioned to thread cache
370 atomic32_t spans_to_cache;
371 //! Number of spans transitioned from thread cache
372 atomic32_t spans_from_cache;
373 //! Number of spans transitioned to reserved state
374 atomic32_t spans_to_reserved;
375 //! Number of spans transitioned from reserved state
376 atomic32_t spans_from_reserved;
377 //! Number of raw memory map calls
378 atomic32_t spans_map_calls;
379 #endif
380 };
381 typedef struct span_use_t span_use_t;
382 #endif
383
384 #if ENABLE_STATISTICS
385 struct size_class_use_t {
386 //! Current number of allocations
387 atomic32_t alloc_current;
388 //! Peak number of allocations
389 int32_t alloc_peak;
390 //! Total number of allocations
391 atomic32_t alloc_total;
392 //! Total number of frees
393 atomic32_t free_total;
394 //! Number of spans in use
395 atomic32_t spans_current;
396 //! Number of spans transitioned to cache
397 int32_t spans_peak;
398 //! Number of spans transitioned to cache
399 atomic32_t spans_to_cache;
400 //! Number of spans transitioned from cache
401 atomic32_t spans_from_cache;
402 //! Number of spans transitioned from reserved state
403 atomic32_t spans_from_reserved;
404 //! Number of spans mapped
405 atomic32_t spans_map_calls;
406 };
407 typedef struct size_class_use_t size_class_use_t;
408 #endif
409
410 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
411 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
412 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
413 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
414 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
415 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
416 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually
417 // to reduce physical memory use).
418 struct span_t {
419 //! Free list
420 void *free_list;
421 //! Total block count of size class
422 uint32_t block_count;
423 //! Size class
424 uint32_t size_class;
425 //! Index of last block initialized in free list
426 uint32_t free_list_limit;
427 //! Number of used blocks remaining when in partial state
428 uint32_t used_count;
429 //! Deferred free list
430 atomicptr_t free_list_deferred;
431 //! Size of deferred free list, or list of spans when part of a cache list
432 uint32_t list_size;
433 //! Size of a block
434 uint32_t block_size;
435 //! Flags and counters
436 uint32_t flags;
437 //! Number of spans
438 uint32_t span_count;
439 //! Total span counter for master spans
440 uint32_t total_spans;
441 //! Offset from master span for subspans
442 uint32_t offset_from_master;
443 //! Remaining span counter, for master spans
444 atomic32_t remaining_spans;
445 //! Alignment offset
446 uint32_t align_offset;
447 //! Owning heap
448 heap_t *heap;
449 //! Next span
450 span_t *next;
451 //! Previous span
452 span_t *prev;
453 };
454 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
455
456 // Control structure for a heap, either a thread heap or a first class heap if enabled
457 struct heap_t {
458 //! Owning thread ID
459 uintptr_t owner_thread;
460 //! Free list of active span
461 void *free_list[SIZE_CLASS_COUNT];
462 //! Double linked list of partially used spans with free blocks for each size class.
463 // Previous span pointer in head points to tail span of list.
464 span_t *partial_span[SIZE_CLASS_COUNT];
465 #if RPMALLOC_FIRST_CLASS_HEAPS
466 //! Double linked list of fully utilized spans with free blocks for each size class.
467 // Previous span pointer in head points to tail span of list.
468 span_t *full_span[SIZE_CLASS_COUNT];
469 #endif
470 #if ENABLE_THREAD_CACHE
471 //! List of free spans (single linked list)
472 span_t *span_cache[LARGE_CLASS_COUNT];
473 #endif
474 //! List of deferred free spans (single linked list)
475 atomicptr_t span_free_deferred;
476 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
477 //! Current and high water mark of spans used per span count
478 span_use_t span_use[LARGE_CLASS_COUNT];
479 #endif
480 #if RPMALLOC_FIRST_CLASS_HEAPS
481 //! Double linked list of large and huge spans allocated by this heap
482 span_t *large_huge_span;
483 #endif
484 //! Number of full spans
485 size_t full_span_count;
486 //! Mapped but unused spans
487 span_t *span_reserve;
488 //! Master span for mapped but unused spans
489 span_t *span_reserve_master;
490 //! Number of mapped but unused spans
491 size_t spans_reserved;
492 //! Next heap in id list
493 heap_t *next_heap;
494 //! Next heap in orphan list
495 heap_t *next_orphan;
496 //! Memory pages alignment offset
497 size_t align_offset;
498 //! Heap ID
499 int32_t id;
500 //! Finalization state flag
501 int finalize;
502 //! Master heap owning the memory pages
503 heap_t *master_heap;
504 //! Child count
505 atomic32_t child_count;
506 #if ENABLE_STATISTICS
507 //! Number of bytes transitioned thread -> global
508 atomic64_t thread_to_global;
509 //! Number of bytes transitioned global -> thread
510 atomic64_t global_to_thread;
511 //! Allocation stats per size class
512 size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
513 #endif
514 };
515
516 // Size class for defining a block size bucket
517 struct size_class_t {
518 //! Size of blocks in this class
519 uint32_t block_size;
520 //! Number of blocks in each chunk
521 uint16_t block_count;
522 //! Class index this class is merged with
523 uint16_t class_idx;
524 };
525 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
526
527 struct global_cache_t {
528 //! Cache list pointer
529 atomicptr_t cache;
530 //! Cache size
531 atomic32_t size;
532 //! ABA counter
533 atomic32_t counter;
534 };
535
536 ////////////
537 ///
538 /// Global data
539 ///
540 //////
541
542 //! Initialized flag
543 static int _rpmalloc_initialized;
544 //! Configuration
545 static rpmalloc_config_t _memory_config;
546 //! Memory page size
547 static size_t _memory_page_size;
548 //! Shift to divide by page size
549 static size_t _memory_page_size_shift;
550 //! Granularity at which memory pages are mapped by OS
551 static size_t _memory_map_granularity;
552 #if RPMALLOC_CONFIGURABLE
553 //! Size of a span of memory pages
554 static size_t _memory_span_size;
555 //! Shift to divide by span size
556 static size_t _memory_span_size_shift;
557 //! Mask to get to start of a memory span
558 static uintptr_t _memory_span_mask;
559 #else
560 //! Hardwired span size (64KiB)
561 #define _memory_span_size (64 * 1024)
562 #define _memory_span_size_shift 16
563 #define _memory_span_mask (~((uintptr_t)(_memory_span_size - 1)))
564 #endif
565 //! Number of spans to map in each map call
566 static size_t _memory_span_map_count;
567 //! Number of spans to release from thread cache to global cache (single spans)
568 static size_t _memory_span_release_count;
569 //! Number of spans to release from thread cache to global cache (large multiple spans)
570 static size_t _memory_span_release_count_large;
571 //! Global size classes
572 static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
573 //! Run-time size limit of medium blocks
574 static size_t _memory_medium_size_limit;
575 //! Heap ID counter
576 static atomic32_t _memory_heap_id;
577 //! Huge page support
578 static int _memory_huge_pages;
579 #if ENABLE_GLOBAL_CACHE
580 //! Global span cache
581 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
582 #endif
583 //! All heaps
584 static atomicptr_t _memory_heaps[HEAP_ARRAY_SIZE];
585 //! Orphaned heaps
586 static atomicptr_t _memory_orphan_heaps;
587 #if RPMALLOC_FIRST_CLASS_HEAPS
588 //! Orphaned heaps (first class heaps)
589 static atomicptr_t _memory_first_class_orphan_heaps;
590 #endif
591 //! Running orphan counter to avoid ABA issues in linked list
592 static atomic32_t _memory_orphan_counter;
593 #if ENABLE_STATISTICS
594 //! Active heap count
595 static atomic32_t _memory_active_heaps;
596 //! Number of currently mapped memory pages
597 static atomic32_t _mapped_pages;
598 //! Peak number of concurrently mapped memory pages
599 static int32_t _mapped_pages_peak;
600 //! Number of mapped master spans
601 static atomic32_t _master_spans;
602 //! Number of currently unused spans
603 static atomic32_t _reserved_spans;
604 //! Running counter of total number of mapped memory pages since start
605 static atomic32_t _mapped_total;
606 //! Running counter of total number of unmapped memory pages since start
607 static atomic32_t _unmapped_total;
608 //! Number of currently mapped memory pages in OS calls
609 static atomic32_t _mapped_pages_os;
610 //! Number of currently allocated pages in huge allocations
611 static atomic32_t _huge_pages_current;
612 //! Peak number of currently allocated pages in huge allocations
613 static int32_t _huge_pages_peak;
614 #endif
615
616 ////////////
617 ///
618 /// Thread local heap and ID
619 ///
620 //////
621
622 //! Current thread heap
623 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
624 static pthread_key_t _memory_thread_heap;
625 #else
626 # ifdef _MSC_VER
627 # define _Thread_local __declspec(thread)
628 # define TLS_MODEL
629 # else
630 # define TLS_MODEL __attribute__((tls_model("initial-exec")))
631 # if !defined(__clang__) && defined(__GNUC__)
632 # define _Thread_local __thread
633 # endif
634 # endif
635 static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
636 #endif
637
638 static inline heap_t *
get_thread_heap_raw(void)639 get_thread_heap_raw(void) {
640 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
641 return pthread_getspecific(_memory_thread_heap);
642 #else
643 return _memory_thread_heap;
644 #endif
645 }
646
647 //! Get the current thread heap
648 static inline heap_t *
get_thread_heap(void)649 get_thread_heap(void) {
650 heap_t *heap = get_thread_heap_raw();
651 #if ENABLE_PRELOAD
652 if (EXPECTED(heap != 0))
653 return heap;
654 rpmalloc_initialize();
655 return get_thread_heap_raw();
656 #else
657 return heap;
658 #endif
659 }
660
661 //! Fast thread ID
662 static inline uintptr_t
get_thread_id(void)663 get_thread_id(void) {
664 #if defined(_WIN32)
665 return (uintptr_t)((void *)NtCurrentTeb());
666 #elif defined(__GNUC__) || defined(__clang__)
667 uintptr_t tid;
668 # if defined(__i386__)
669 __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
670 # elif defined(__MACH__)
671 __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
672 # elif defined(__x86_64__)
673 __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
674 # elif defined(__arm__)
675 __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
676 # elif defined(__aarch64__)
677 __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
678 # else
679 tid = (uintptr_t)get_thread_heap_raw();
680 # endif
681 return tid;
682 #else
683 return (uintptr_t)get_thread_heap_raw();
684 #endif
685 }
686
687 //! Set the current thread heap
688 static void
set_thread_heap(heap_t * heap)689 set_thread_heap(heap_t *heap) {
690 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
691 pthread_setspecific(_memory_thread_heap, heap);
692 #else
693 _memory_thread_heap = heap;
694 #endif
695 if (heap)
696 heap->owner_thread = get_thread_id();
697 }
698
699 ////////////
700 ///
701 /// Low level memory map/unmap
702 ///
703 //////
704
705 //! Map more virtual memory
706 // size is number of bytes to map
707 // offset receives the offset in bytes from start of mapped region
708 // returns address to start of mapped region to use
709 static void *
_rpmalloc_mmap(size_t size,size_t * offset)710 _rpmalloc_mmap(size_t size, size_t *offset) {
711 assert(!(size % _memory_page_size));
712 assert(size >= _memory_page_size);
713 _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak);
714 _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
715 return _memory_config.memory_map(size, offset);
716 }
717
718 //! Unmap virtual memory
719 // address is the memory address to unmap, as returned from _memory_map
720 // size is the number of bytes to unmap, which might be less than full region for a partial unmap
721 // offset is the offset in bytes to the actual mapped region, as set by _memory_map
722 // release is set to 0 for partial unmap, or size of entire range for a full unmap
723 static void
_rpmalloc_unmap(void * address,size_t size,size_t offset,size_t release)724 _rpmalloc_unmap(void *address, size_t size, size_t offset, size_t release) {
725 assert(!release || (release >= size));
726 assert(!release || (release >= _memory_page_size));
727 if (release) {
728 assert(!(release % _memory_page_size));
729 _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
730 _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
731 }
732 _memory_config.memory_unmap(address, size, offset, release);
733 }
734
735 //! Default implementation to map new pages to virtual memory
736 static void *
_rpmalloc_mmap_os(size_t size,size_t * offset)737 _rpmalloc_mmap_os(size_t size, size_t *offset) {
738 //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
739 size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
740 assert(size >= _memory_page_size);
741 #if PLATFORM_WINDOWS
742 //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
743 void *ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT,
744 PAGE_READWRITE);
745 if (!ptr) {
746 assert(ptr && "Failed to map virtual memory block");
747 return 0;
748 }
749 #else
750 int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
751 # if defined(__APPLE__)
752 int fd = (int)VM_MAKE_TAG(240U);
753 if (_memory_huge_pages)
754 fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
755 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
756 # elif defined(MAP_HUGETLB)
757 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
758 # else
759 void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
760 # endif
761 if ((ptr == MAP_FAILED) || !ptr) {
762 assert("Failed to map virtual memory block" == 0);
763 return 0;
764 }
765 #endif
766 _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift));
767 if (padding) {
768 size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
769 assert(final_padding <= _memory_span_size);
770 assert(final_padding <= padding);
771 assert(!(final_padding % 8));
772 ptr = pointer_offset(ptr, final_padding);
773 *offset = final_padding >> 3;
774 }
775 assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask));
776 return ptr;
777 }
778
779 //! Default implementation to unmap pages from virtual memory
780 static void
_rpmalloc_unmap_os(void * address,size_t size,size_t offset,size_t release)781 _rpmalloc_unmap_os(void *address, size_t size, size_t offset, size_t release) {
782 assert(release || (offset == 0));
783 assert(!release || (release >= _memory_page_size));
784 assert(size >= _memory_page_size);
785 if (release && offset) {
786 offset <<= 3;
787 address = pointer_offset(address, -(int32_t)offset);
788 #if PLATFORM_POSIX
789 //Padding is always one span size
790 release += _memory_span_size;
791 #endif
792 }
793 #if !DISABLE_UNMAP
794 #if PLATFORM_WINDOWS
795 if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
796 assert(address && "Failed to unmap virtual memory block");
797 }
798 #else
799 if (release) {
800 if (munmap(address, release)) {
801 assert("Failed to unmap virtual memory block" == 0);
802 }
803 } else {
804 #if defined(POSIX_MADV_FREE)
805 if (posix_madvise(address, size, POSIX_MADV_FREE))
806 #endif
807 if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
808 assert("Failed to madvise virtual memory block as free" == 0);
809 }
810 }
811 #endif
812 #endif
813 if (release)
814 _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
815 }
816
817
818 ////////////
819 ///
820 /// Span linked list management
821 ///
822 //////
823
824 #if ENABLE_THREAD_CACHE
825
826 static void
827 _rpmalloc_span_unmap(span_t *span);
828
829 //! Unmap a single linked list of spans
830 static void
_rpmalloc_span_list_unmap_all(span_t * span)831 _rpmalloc_span_list_unmap_all(span_t *span) {
832 size_t list_size = span->list_size;
833 for (size_t ispan = 0; ispan < list_size; ++ispan) {
834 span_t *next_span = span->next;
835 _rpmalloc_span_unmap(span);
836 span = next_span;
837 }
838 assert(!span);
839 }
840
841 //! Add span to head of single linked span list
842 static size_t
_rpmalloc_span_list_push(span_t ** head,span_t * span)843 _rpmalloc_span_list_push(span_t **head, span_t *span) {
844 span->next = *head;
845 if (*head)
846 span->list_size = (*head)->list_size + 1;
847 else
848 span->list_size = 1;
849 *head = span;
850 return span->list_size;
851 }
852
853 //! Remove span from head of single linked span list, returns the new list head
854 static span_t *
_rpmalloc_span_list_pop(span_t ** head)855 _rpmalloc_span_list_pop(span_t **head) {
856 span_t *span = *head;
857 span_t *next_span = 0;
858 if (span->list_size > 1) {
859 assert(span->next);
860 next_span = span->next;
861 assert(next_span);
862 next_span->list_size = span->list_size - 1;
863 }
864 *head = next_span;
865 return span;
866 }
867
868 //! Split a single linked span list
869 static span_t *
_rpmalloc_span_list_split(span_t * span,size_t limit)870 _rpmalloc_span_list_split(span_t *span, size_t limit) {
871 span_t *next = 0;
872 if (limit < 2)
873 limit = 2;
874 if (span->list_size > limit) {
875 uint32_t list_size = 1;
876 span_t *last = span;
877 next = span->next;
878 while (list_size < limit) {
879 last = next;
880 next = next->next;
881 ++list_size;
882 }
883 last->next = 0;
884 assert(next);
885 next->list_size = span->list_size - list_size;
886 span->list_size = list_size;
887 span->prev = 0;
888 }
889 return next;
890 }
891
892 #endif
893
894 //! Add a span to double linked list at the head
895 static void
_rpmalloc_span_double_link_list_add(span_t ** head,span_t * span)896 _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
897 if (*head) {
898 span->next = *head;
899 (*head)->prev = span;
900 } else {
901 span->next = 0;
902 }
903 *head = span;
904 }
905
906 //! Pop head span from double linked list
907 static void
_rpmalloc_span_double_link_list_pop_head(span_t ** head,span_t * span)908 _rpmalloc_span_double_link_list_pop_head(span_t **head, span_t *span) {
909 assert(*head == span);
910 span = *head;
911 *head = span->next;
912 }
913
914 //! Remove a span from double linked list
915 static void
_rpmalloc_span_double_link_list_remove(span_t ** head,span_t * span)916 _rpmalloc_span_double_link_list_remove(span_t **head, span_t *span) {
917 assert(*head);
918 if (*head == span) {
919 *head = span->next;
920 } else {
921 span_t *next_span = span->next;
922 span_t *prev_span = span->prev;
923 prev_span->next = next_span;
924 if (EXPECTED(next_span != 0)) {
925 next_span->prev = prev_span;
926 }
927 }
928 }
929
930
931 ////////////
932 ///
933 /// Span control
934 ///
935 //////
936
937 static void
938 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
939
940 static void
941 _rpmalloc_heap_finalize(heap_t *heap);
942
943 static void
944 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count);
945
946 //! Declare the span to be a subspan and store distance from master span and span count
947 static void
_rpmalloc_span_mark_as_subspan_unless_master(span_t * master,span_t * subspan,size_t span_count)948 _rpmalloc_span_mark_as_subspan_unless_master(span_t *master, span_t *subspan, size_t span_count) {
949 assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER));
950 if (subspan != master) {
951 subspan->flags = SPAN_FLAG_SUBSPAN;
952 subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
953 subspan->align_offset = 0;
954 }
955 subspan->span_count = (uint32_t)span_count;
956 }
957
958 //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
959 static span_t *
_rpmalloc_span_map_from_reserve(heap_t * heap,size_t span_count)960 _rpmalloc_span_map_from_reserve(heap_t *heap, size_t span_count) {
961 //Update the heap span reserve
962 span_t *span = heap->span_reserve;
963 heap->span_reserve = (span_t *)pointer_offset(span, span_count * _memory_span_size);
964 heap->spans_reserved -= span_count;
965
966 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
967 if (span_count <= LARGE_CLASS_COUNT)
968 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
969
970 return span;
971 }
972
973 //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size
974 static size_t
_rpmalloc_span_align_count(size_t span_count)975 _rpmalloc_span_align_count(size_t span_count) {
976 size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
977 if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
978 request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
979 return request_count;
980 }
981
982 //! Setup a newly mapped span
983 static void
_rpmalloc_span_initialize(span_t * span,size_t total_span_count,size_t span_count,size_t align_offset)984 _rpmalloc_span_initialize(span_t *span, size_t total_span_count, size_t span_count, size_t align_offset) {
985 span->total_spans = (uint32_t)total_span_count;
986 span->span_count = (uint32_t)span_count;
987 span->align_offset = (uint32_t)align_offset;
988 span->flags = SPAN_FLAG_MASTER;
989 atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
990 }
991
992 //! Map an aligned set of spans, taking configured mapping granularity and the page size into account
993 static span_t *
_rpmalloc_span_map_aligned_count(heap_t * heap,size_t span_count)994 _rpmalloc_span_map_aligned_count(heap_t *heap, size_t span_count) {
995 //If we already have some, but not enough, reserved spans, release those to heap cache and map a new
996 //full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
997 size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
998 size_t align_offset = 0;
999 span_t *span = (span_t *)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset);
1000 if (!span)
1001 return 0;
1002 _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
1003 _rpmalloc_stat_add(&_reserved_spans, aligned_span_count);
1004 _rpmalloc_stat_inc(&_master_spans);
1005 if (span_count <= LARGE_CLASS_COUNT)
1006 _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
1007 if (aligned_span_count > span_count) {
1008 span_t *reserved_spans = (span_t *)pointer_offset(span, span_count * _memory_span_size);
1009 size_t reserved_count = aligned_span_count - span_count;
1010 if (heap->spans_reserved) {
1011 _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
1012 _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
1013 }
1014 _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
1015 }
1016 return span;
1017 }
1018
1019 //! Map in memory pages for the given number of spans (or use previously reserved pages)
1020 static span_t *
_rpmalloc_span_map(heap_t * heap,size_t span_count)1021 _rpmalloc_span_map(heap_t *heap, size_t span_count) {
1022 if (span_count <= heap->spans_reserved)
1023 return _rpmalloc_span_map_from_reserve(heap, span_count);
1024 return _rpmalloc_span_map_aligned_count(heap, span_count);
1025 }
1026
1027 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
1028 static void
_rpmalloc_span_unmap(span_t * span)1029 _rpmalloc_span_unmap(span_t *span) {
1030 assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
1031 assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
1032
1033 int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1034 span_t *master = is_master ? span : ((span_t *)pointer_offset(span,
1035 -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size)));
1036 assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN));
1037 assert(master->flags & SPAN_FLAG_MASTER);
1038
1039 size_t span_count = span->span_count;
1040 if (!is_master) {
1041 //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
1042 assert(span->align_offset == 0);
1043 if (_memory_span_size >= _memory_page_size) {
1044 _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1045 _rpmalloc_stat_sub(&_reserved_spans, span_count);
1046 }
1047 } else {
1048 //Special double flag to denote an unmapped master
1049 //It must be kept in memory since span header must be used
1050 span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
1051 }
1052
1053 if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1054 //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
1055 assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN));
1056 size_t unmap_count = master->span_count;
1057 if (_memory_span_size < _memory_page_size)
1058 unmap_count = master->total_spans;
1059 _rpmalloc_stat_sub(&_reserved_spans, unmap_count);
1060 _rpmalloc_stat_sub(&_master_spans, 1);
1061 _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
1062 }
1063 }
1064
1065 //! Move the span (used for small or medium allocations) to the heap thread cache
1066 static void
_rpmalloc_span_release_to_cache(heap_t * heap,span_t * span)1067 _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
1068 assert(heap == span->heap);
1069 assert(span->size_class < SIZE_CLASS_COUNT);
1070 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1071 atomic_decr32(&heap->span_use[0].current);
1072 #endif
1073 _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1074 _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1075 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1076 _rpmalloc_heap_cache_insert(heap, span);
1077 }
1078
1079 //! Initialize a (partial) free list up to next system memory page, while reserving the first block
1080 //! as allocated, returning number of blocks in list
1081 static uint32_t
free_list_partial_init(void ** list,void ** first_block,void * page_start,void * block_start,uint32_t block_count,uint32_t block_size)1082 free_list_partial_init(void **list, void **first_block, void *page_start, void *block_start,
1083 uint32_t block_count, uint32_t block_size) {
1084 assert(block_count);
1085 *first_block = block_start;
1086 if (block_count > 1) {
1087 void *free_block = pointer_offset(block_start, block_size);
1088 void *block_end = pointer_offset(block_start, (size_t)block_size * block_count);
1089 //If block size is less than half a memory page, bound init to next memory page boundary
1090 if (block_size < (_memory_page_size >> 1)) {
1091 void *page_end = pointer_offset(page_start, _memory_page_size);
1092 if (page_end < block_end)
1093 block_end = page_end;
1094 }
1095 *list = free_block;
1096 block_count = 2;
1097 void *next_block = pointer_offset(free_block, block_size);
1098 while (next_block < block_end) {
1099 *((void **)free_block) = next_block;
1100 free_block = next_block;
1101 ++block_count;
1102 next_block = pointer_offset(next_block, block_size);
1103 }
1104 *((void **)free_block) = 0;
1105 } else {
1106 *list = 0;
1107 }
1108 return block_count;
1109 }
1110
1111 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list
1112 static void *
_rpmalloc_span_initialize_new(heap_t * heap,span_t * span,uint32_t class_idx)1113 _rpmalloc_span_initialize_new(heap_t *heap, span_t *span, uint32_t class_idx) {
1114 assert(span->span_count == 1);
1115 size_class_t *size_class = _memory_size_class + class_idx;
1116 span->size_class = class_idx;
1117 span->heap = heap;
1118 span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
1119 span->block_size = size_class->block_size;
1120 span->block_count = size_class->block_count;
1121 span->free_list = 0;
1122 span->list_size = 0;
1123 atomic_store_ptr_release(&span->free_list_deferred, 0);
1124
1125 //Setup free list. Only initialize one system page worth of free blocks in list
1126 void *block;
1127 span->free_list_limit = free_list_partial_init(&heap->free_list[class_idx], &block,
1128 span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
1129 //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
1130 if (span->free_list_limit < span->block_count) {
1131 _rpmalloc_span_double_link_list_add(&heap->partial_span[class_idx], span);
1132 span->used_count = span->free_list_limit;
1133 } else {
1134 #if RPMALLOC_FIRST_CLASS_HEAPS
1135 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1136 #endif
1137 ++heap->full_span_count;
1138 span->used_count = span->block_count;
1139 }
1140 return block;
1141 }
1142
1143 static void
_rpmalloc_span_extract_free_list_deferred(span_t * span)1144 _rpmalloc_span_extract_free_list_deferred(span_t *span) {
1145 // We need acquire semantics on the CAS operation since we are interested in the list size
1146 // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency
1147 do {
1148 span->free_list = atomic_load_ptr(&span->free_list_deferred);
1149 } while ((span->free_list == INVALID_POINTER) ||
1150 !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, span->free_list));
1151 span->used_count -= span->list_size;
1152 span->list_size = 0;
1153 atomic_store_ptr_release(&span->free_list_deferred, 0);
1154 }
1155
1156 static int
_rpmalloc_span_is_fully_utilized(span_t * span)1157 _rpmalloc_span_is_fully_utilized(span_t *span) {
1158 assert(span->free_list_limit <= span->block_count);
1159 return !span->free_list && (span->free_list_limit >= span->block_count);
1160 }
1161
1162 static int
_rpmalloc_span_finalize(heap_t * heap,size_t iclass,span_t * span,span_t ** list_head)1163 _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span, span_t **list_head) {
1164 span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1165 if (span == class_span) {
1166 // Adopt the heap class free list back into the span free list
1167 void *block = span->free_list;
1168 void *last_block = 0;
1169 while (block) {
1170 last_block = block;
1171 block = *((void **)block);
1172 }
1173 uint32_t free_count = 0;
1174 block = heap->free_list[iclass];
1175 while (block) {
1176 ++free_count;
1177 block = *((void **)block);
1178 }
1179 if (last_block) {
1180 *((void **)last_block) = heap->free_list[iclass];
1181 } else {
1182 span->free_list = heap->free_list[iclass];
1183 }
1184 heap->free_list[iclass] = 0;
1185 span->used_count -= free_count;
1186 }
1187 //If this assert triggers you have memory leaks
1188 assert(span->list_size == span->used_count);
1189 if (span->list_size == span->used_count) {
1190 _rpmalloc_stat_dec(&heap->span_use[0].current);
1191 _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1192 // This function only used for spans in double linked lists
1193 if (list_head)
1194 _rpmalloc_span_double_link_list_remove(list_head, span);
1195 _rpmalloc_span_unmap(span);
1196 return 1;
1197 }
1198 return 0;
1199 }
1200
1201
1202 ////////////
1203 ///
1204 /// Global cache
1205 ///
1206 //////
1207
1208 #if ENABLE_GLOBAL_CACHE
1209
1210 //! Insert the given list of memory page spans in the global cache
1211 static void
_rpmalloc_global_cache_insert(global_cache_t * cache,span_t * span,size_t cache_limit)1212 _rpmalloc_global_cache_insert(global_cache_t *cache, span_t *span, size_t cache_limit) {
1213 assert((span->list_size == 1) || (span->next != 0));
1214 int32_t list_size = (int32_t)span->list_size;
1215 //Unmap if cache has reached the limit. Does not need stronger synchronization, the worst
1216 //case is that the span list is unmapped when it could have been cached (no real dependency
1217 //between the two variables)
1218 if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
1219 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
1220 _rpmalloc_span_list_unmap_all(span);
1221 atomic_add32(&cache->size, -list_size);
1222 return;
1223 #endif
1224 }
1225 void *current_cache, *new_cache;
1226 do {
1227 current_cache = atomic_load_ptr(&cache->cache);
1228 span->prev = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1229 new_cache = (void *)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1230 } while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
1231 }
1232
1233 //! Extract a number of memory page spans from the global cache
1234 static span_t *
_rpmalloc_global_cache_extract(global_cache_t * cache)1235 _rpmalloc_global_cache_extract(global_cache_t *cache) {
1236 uintptr_t span_ptr;
1237 do {
1238 void *global_span = atomic_load_ptr(&cache->cache);
1239 span_ptr = (uintptr_t)global_span & _memory_span_mask;
1240 if (span_ptr) {
1241 span_t *span = (span_t *)span_ptr;
1242 //By accessing the span ptr before it is swapped out of list we assume that a contending thread
1243 //does not manage to traverse the span to being unmapped before we access it
1244 void *new_cache = (void *)((uintptr_t)span->prev | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1245 if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
1246 atomic_add32(&cache->size, -(int32_t)span->list_size);
1247 return span;
1248 }
1249 }
1250 } while (span_ptr);
1251 return 0;
1252 }
1253
1254 //! Finalize a global cache, only valid from allocator finalization (not thread safe)
1255 static void
_rpmalloc_global_cache_finalize(global_cache_t * cache)1256 _rpmalloc_global_cache_finalize(global_cache_t *cache) {
1257 void *current_cache = atomic_load_ptr(&cache->cache);
1258 span_t *span = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1259 while (span) {
1260 span_t *skip_span = (span_t *)((uintptr_t)span->prev & _memory_span_mask);
1261 atomic_add32(&cache->size, -(int32_t)span->list_size);
1262 _rpmalloc_span_list_unmap_all(span);
1263 span = skip_span;
1264 }
1265 assert(!atomic_load32(&cache->size));
1266 atomic_store_ptr(&cache->cache, 0);
1267 atomic_store32(&cache->size, 0);
1268 }
1269
1270 //! Insert the given list of memory page spans in the global cache
1271 static void
_rpmalloc_global_cache_insert_span_list(span_t * span)1272 _rpmalloc_global_cache_insert_span_list(span_t *span) {
1273 size_t span_count = span->span_count;
1274 #if ENABLE_UNLIMITED_GLOBAL_CACHE
1275 _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, 0);
1276 #else
1277 const size_t cache_limit = (GLOBAL_CACHE_MULTIPLIER * ((span_count == 1) ? _memory_span_release_count :
1278 _memory_span_release_count_large));
1279 _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, cache_limit);
1280 #endif
1281 }
1282
1283 //! Extract a number of memory page spans from the global cache for large blocks
1284 static span_t *
_rpmalloc_global_cache_extract_span_list(size_t span_count)1285 _rpmalloc_global_cache_extract_span_list(size_t span_count) {
1286 span_t *span = _rpmalloc_global_cache_extract(&_memory_span_cache[span_count - 1]);
1287 assert(!span || (span->span_count == span_count));
1288 return span;
1289 }
1290
1291 #endif
1292
1293
1294 ////////////
1295 ///
1296 /// Heap control
1297 ///
1298 //////
1299
1300 static void _rpmalloc_deallocate_huge(span_t *);
1301
1302 //! Store the given spans as reserve in the given heap
1303 static void
_rpmalloc_heap_set_reserved_spans(heap_t * heap,span_t * master,span_t * reserve,size_t reserve_span_count)1304 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count) {
1305 heap->span_reserve_master = master;
1306 heap->span_reserve = reserve;
1307 heap->spans_reserved = reserve_span_count;
1308 }
1309
1310 //! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use
1311 static void
_rpmalloc_heap_cache_adopt_deferred(heap_t * heap,span_t ** single_span)1312 _rpmalloc_heap_cache_adopt_deferred(heap_t *heap, span_t **single_span) {
1313 span_t *span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1314 if (!span)
1315 return;
1316 while (!atomic_cas_ptr(&heap->span_free_deferred, 0, span))
1317 span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1318 while (span) {
1319 span_t *next_span = (span_t *)span->free_list;
1320 assert(span->heap == heap);
1321 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1322 assert(heap->full_span_count);
1323 --heap->full_span_count;
1324 #if RPMALLOC_FIRST_CLASS_HEAPS
1325 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
1326 #endif
1327 if (single_span && !*single_span) {
1328 *single_span = span;
1329 } else {
1330 _rpmalloc_stat_dec(&heap->span_use[0].current);
1331 _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1332 _rpmalloc_heap_cache_insert(heap, span);
1333 }
1334 } else {
1335 if (span->size_class == SIZE_CLASS_HUGE) {
1336 _rpmalloc_deallocate_huge(span);
1337 } else {
1338 assert(span->size_class == SIZE_CLASS_LARGE);
1339 assert(heap->full_span_count);
1340 --heap->full_span_count;
1341 #if RPMALLOC_FIRST_CLASS_HEAPS
1342 _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1343 #endif
1344 uint32_t idx = span->span_count - 1;
1345 if (!idx && single_span && !*single_span) {
1346 *single_span = span;
1347 } else {
1348 _rpmalloc_stat_dec(&heap->span_use[idx].current);
1349 _rpmalloc_heap_cache_insert(heap, span);
1350 }
1351 }
1352 }
1353 span = next_span;
1354 }
1355 }
1356
1357 static void
_rpmalloc_heap_unmap(heap_t * heap)1358 _rpmalloc_heap_unmap(heap_t *heap) {
1359 if (!heap->master_heap) {
1360 if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1361 size_t heap_size = sizeof(heap_t);
1362 size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1363 _rpmalloc_unmap(heap, block_size, heap->align_offset, block_size);
1364 }
1365 } else {
1366 if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1367 _rpmalloc_heap_unmap(heap->master_heap);
1368 }
1369 }
1370 }
1371
1372 static void
_rpmalloc_heap_global_finalize(heap_t * heap)1373 _rpmalloc_heap_global_finalize(heap_t *heap) {
1374 if (heap->finalize++ > 1) {
1375 --heap->finalize;
1376 return;
1377 }
1378
1379 _rpmalloc_heap_finalize(heap);
1380
1381 #if ENABLE_THREAD_CACHE
1382 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1383 span_t *span = heap->span_cache[iclass];
1384 heap->span_cache[iclass] = 0;
1385 if (span)
1386 _rpmalloc_span_list_unmap_all(span);
1387 }
1388 #endif
1389
1390 if (heap->full_span_count) {
1391 --heap->finalize;
1392 return;
1393 }
1394
1395 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1396 if (heap->free_list[iclass] || heap->partial_span[iclass]) {
1397 --heap->finalize;
1398 return;
1399 }
1400 }
1401 //Heap is now completely free, unmap and remove from heap list
1402 size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1403 heap_t *list_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1404 if (list_heap == heap) {
1405 atomic_store_ptr(&_memory_heaps[list_idx], heap->next_heap);
1406 } else {
1407 while (list_heap->next_heap != heap)
1408 list_heap = list_heap->next_heap;
1409 list_heap->next_heap = heap->next_heap;
1410 }
1411
1412 _rpmalloc_heap_unmap(heap);
1413 }
1414
1415 //! Insert a single span into thread heap cache, releasing to global cache if overflow
1416 static void
_rpmalloc_heap_cache_insert(heap_t * heap,span_t * span)1417 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
1418 if (UNEXPECTED(heap->finalize != 0)) {
1419 _rpmalloc_span_unmap(span);
1420 _rpmalloc_heap_global_finalize(heap);
1421 return;
1422 }
1423 #if ENABLE_THREAD_CACHE
1424 size_t span_count = span->span_count;
1425 size_t idx = span_count - 1;
1426 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_cache);
1427 #if ENABLE_UNLIMITED_THREAD_CACHE
1428 _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1429 #else
1430 const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large);
1431 size_t current_cache_size = _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1432 if (current_cache_size <= release_count)
1433 return;
1434 const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER;
1435 if (current_cache_size <= hard_limit) {
1436 #if ENABLE_ADAPTIVE_THREAD_CACHE
1437 //Require 25% of high water mark to remain in cache (and at least 1, if use is 0)
1438 const size_t high_mark = heap->span_use[idx].high;
1439 const size_t min_limit = (high_mark >> 2) + release_count + 1;
1440 if (current_cache_size < min_limit)
1441 return;
1442 #else
1443 return;
1444 #endif
1445 }
1446 heap->span_cache[idx] = _rpmalloc_span_list_split(span, release_count);
1447 assert(span->list_size == release_count);
1448 #if ENABLE_GLOBAL_CACHE
1449 _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span_count * _memory_span_size);
1450 _rpmalloc_stat_add(&heap->span_use[idx].spans_to_global, span->list_size);
1451 _rpmalloc_global_cache_insert_span_list(span);
1452 #else
1453 _rpmalloc_span_list_unmap_all(span);
1454 #endif
1455 #endif
1456 #else
1457 (void)sizeof(heap);
1458 _rpmalloc_span_unmap(span);
1459 #endif
1460 }
1461
1462 //! Extract the given number of spans from the different cache levels
1463 static span_t *
_rpmalloc_heap_thread_cache_extract(heap_t * heap,size_t span_count)1464 _rpmalloc_heap_thread_cache_extract(heap_t *heap, size_t span_count) {
1465 span_t *span = 0;
1466 size_t idx = span_count - 1;
1467 if (!idx)
1468 _rpmalloc_heap_cache_adopt_deferred(heap, &span);
1469 #if ENABLE_THREAD_CACHE
1470 if (!span && heap->span_cache[idx]) {
1471 _rpmalloc_stat_inc(&heap->span_use[idx].spans_from_cache);
1472 span = _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1473 }
1474 #endif
1475 return span;
1476 }
1477
1478 static span_t *
_rpmalloc_heap_reserved_extract(heap_t * heap,size_t span_count)1479 _rpmalloc_heap_reserved_extract(heap_t *heap, size_t span_count) {
1480 if (heap->spans_reserved >= span_count)
1481 return _rpmalloc_span_map(heap, span_count);
1482 return 0;
1483 }
1484
1485 //! Extract a span from the global cache
1486 static span_t *
_rpmalloc_heap_global_cache_extract(heap_t * heap,size_t span_count)1487 _rpmalloc_heap_global_cache_extract(heap_t *heap, size_t span_count) {
1488 #if ENABLE_GLOBAL_CACHE
1489 size_t idx = span_count - 1;
1490 heap->span_cache[idx] = _rpmalloc_global_cache_extract_span_list(span_count);
1491 if (heap->span_cache[idx]) {
1492 _rpmalloc_stat_add64(&heap->global_to_thread, (size_t)heap->span_cache[idx]->list_size * span_count * _memory_span_size);
1493 _rpmalloc_stat_add(&heap->span_use[idx].spans_from_global, heap->span_cache[idx]->list_size);
1494 return _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1495 }
1496 #endif
1497 (void)sizeof(heap);
1498 (void)sizeof(span_count);
1499 return 0;
1500 }
1501
1502 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
1503 static span_t *
_rpmalloc_heap_extract_new_span(heap_t * heap,size_t span_count,uint32_t class_idx)1504 _rpmalloc_heap_extract_new_span(heap_t *heap, size_t span_count, uint32_t class_idx) {
1505 (void)sizeof(class_idx);
1506 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1507 uint32_t idx = (uint32_t)span_count - 1;
1508 uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
1509 if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
1510 atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
1511 _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak);
1512 #endif
1513 span_t *span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
1514 if (EXPECTED(span != 0)) {
1515 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1516 return span;
1517 }
1518 span = _rpmalloc_heap_reserved_extract(heap, span_count);
1519 if (EXPECTED(span != 0)) {
1520 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
1521 return span;
1522 }
1523 span = _rpmalloc_heap_global_cache_extract(heap, span_count);
1524 if (EXPECTED(span != 0)) {
1525 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1526 return span;
1527 }
1528 //Final fallback, map in more virtual memory
1529 span = _rpmalloc_span_map(heap, span_count);
1530 _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
1531 return span;
1532 }
1533
1534 static void
_rpmalloc_heap_initialize(heap_t * heap)1535 _rpmalloc_heap_initialize(heap_t *heap) {
1536 memset(heap, 0, sizeof(heap_t));
1537
1538 //Get a new heap ID
1539 heap->id = 1 + atomic_incr32(&_memory_heap_id);
1540
1541 //Link in heap in heap ID map
1542 heap_t *next_heap;
1543 size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1544 do {
1545 next_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1546 heap->next_heap = next_heap;
1547 } while (!atomic_cas_ptr(&_memory_heaps[list_idx], heap, next_heap));
1548 }
1549
1550 static void
_rpmalloc_heap_orphan(heap_t * heap,int first_class)1551 _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
1552 void *raw_heap;
1553 uintptr_t orphan_counter;
1554 heap_t *last_heap;
1555 heap->owner_thread = (uintptr_t) - 1;
1556 #if RPMALLOC_FIRST_CLASS_HEAPS
1557 atomicptr_t *heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
1558 #else
1559 (void)sizeof(first_class);
1560 atomicptr_t *heap_list = &_memory_orphan_heaps;
1561 #endif
1562 do {
1563 last_heap = (heap_t *)atomic_load_ptr(heap_list);
1564 heap->next_orphan = (heap_t *)((uintptr_t)last_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1565 orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1566 raw_heap = (void *)((uintptr_t)heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1567 } while (!atomic_cas_ptr(heap_list, raw_heap, last_heap));
1568 }
1569
1570 //! Allocate a new heap from newly mapped memory pages
1571 static heap_t *
_rpmalloc_heap_allocate_new(void)1572 _rpmalloc_heap_allocate_new(void) {
1573 //Map in pages for a new heap
1574 size_t align_offset = 0;
1575 size_t heap_size = sizeof(heap_t);
1576 size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1577 heap_t *heap = (heap_t *)_rpmalloc_mmap(block_size, &align_offset);
1578 if (!heap)
1579 return heap;
1580
1581 _rpmalloc_heap_initialize(heap);
1582 heap->align_offset = align_offset;
1583
1584 //Put extra heaps as orphans, aligning to make sure ABA protection bits fit in pointer low bits
1585 size_t aligned_heap_size = HEAP_ORPHAN_ABA_SIZE * ((heap_size + HEAP_ORPHAN_ABA_SIZE - 1) / HEAP_ORPHAN_ABA_SIZE);
1586 size_t num_heaps = block_size / aligned_heap_size;
1587 atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
1588 heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
1589 while (num_heaps > 1) {
1590 _rpmalloc_heap_initialize(extra_heap);
1591 extra_heap->master_heap = heap;
1592 _rpmalloc_heap_orphan(extra_heap, 1);
1593 extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
1594 --num_heaps;
1595 }
1596 return heap;
1597 }
1598
1599 static heap_t *
_rpmalloc_heap_extract_orphan(atomicptr_t * heap_list)1600 _rpmalloc_heap_extract_orphan(atomicptr_t *heap_list) {
1601 void *raw_heap;
1602 void *next_raw_heap;
1603 uintptr_t orphan_counter;
1604 heap_t *heap;
1605 heap_t *next_heap;
1606 do {
1607 raw_heap = atomic_load_ptr(heap_list);
1608 heap = (heap_t *)((uintptr_t)raw_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1609 if (!heap)
1610 break;
1611 next_heap = heap->next_orphan;
1612 orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1613 next_raw_heap = (void *)((uintptr_t)next_heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1614 } while (!atomic_cas_ptr(heap_list, next_raw_heap, raw_heap));
1615 return heap;
1616 }
1617
1618 //! Allocate a new heap, potentially reusing a previously orphaned heap
1619 static heap_t *
_rpmalloc_heap_allocate(int first_class)1620 _rpmalloc_heap_allocate(int first_class) {
1621 heap_t *heap = 0;
1622 if (first_class == 0)
1623 heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
1624 #if RPMALLOC_FIRST_CLASS_HEAPS
1625 if (!heap)
1626 heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
1627 #endif
1628 if (!heap)
1629 heap = _rpmalloc_heap_allocate_new();
1630 return heap;
1631 }
1632
1633 static void
_rpmalloc_heap_release(void * heapptr,int first_class)1634 _rpmalloc_heap_release(void *heapptr, int first_class) {
1635 heap_t *heap = (heap_t *)heapptr;
1636 if (!heap)
1637 return;
1638 //Release thread cache spans back to global cache
1639 _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1640 #if ENABLE_THREAD_CACHE
1641 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1642 span_t *span = heap->span_cache[iclass];
1643 heap->span_cache[iclass] = 0;
1644 if (span && heap->finalize) {
1645 _rpmalloc_span_list_unmap_all(span);
1646 continue;
1647 }
1648 #if ENABLE_GLOBAL_CACHE
1649 while (span) {
1650 assert(span->span_count == (iclass + 1));
1651 size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
1652 span_t *next = _rpmalloc_span_list_split(span, (uint32_t)release_count);
1653 _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
1654 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
1655 _rpmalloc_global_cache_insert_span_list(span);
1656 span = next;
1657 }
1658 #else
1659 if (span)
1660 _rpmalloc_span_list_unmap_all(span);
1661 #endif
1662 }
1663 #endif
1664
1665 //Orphan the heap
1666 _rpmalloc_heap_orphan(heap, first_class);
1667
1668 if (get_thread_heap_raw() == heap)
1669 set_thread_heap(0);
1670 #if ENABLE_STATISTICS
1671 atomic_decr32(&_memory_active_heaps);
1672 assert(atomic_load32(&_memory_active_heaps) >= 0);
1673 #endif
1674 }
1675
1676 static void
_rpmalloc_heap_release_raw(void * heapptr)1677 _rpmalloc_heap_release_raw(void *heapptr) {
1678 _rpmalloc_heap_release(heapptr, 0);
1679 }
1680
1681 static void
_rpmalloc_heap_finalize(heap_t * heap)1682 _rpmalloc_heap_finalize(heap_t *heap) {
1683 if (heap->spans_reserved) {
1684 span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
1685 _rpmalloc_span_unmap(span);
1686 heap->spans_reserved = 0;
1687 }
1688
1689 _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1690
1691 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1692 span_t *span = heap->partial_span[iclass];
1693 while (span) {
1694 span_t *next = span->next;
1695 _rpmalloc_span_finalize(heap, iclass, span, &heap->partial_span[iclass]);
1696 span = next;
1697 }
1698 // If class still has a free list it must be a full span
1699 if (heap->free_list[iclass]) {
1700 span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1701 span_t **list = 0;
1702 #if RPMALLOC_FIRST_CLASS_HEAPS
1703 list = &heap->full_span[iclass];
1704 #endif
1705 --heap->full_span_count;
1706 if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
1707 if (list)
1708 _rpmalloc_span_double_link_list_remove(list, class_span);
1709 _rpmalloc_span_double_link_list_add(&heap->partial_span[iclass], class_span);
1710 }
1711 }
1712 }
1713
1714 #if ENABLE_THREAD_CACHE
1715 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1716 if (heap->span_cache[iclass]) {
1717 _rpmalloc_span_list_unmap_all(heap->span_cache[iclass]);
1718 heap->span_cache[iclass] = 0;
1719 }
1720 }
1721 #endif
1722 assert(!atomic_load_ptr(&heap->span_free_deferred));
1723 }
1724
1725
1726 ////////////
1727 ///
1728 /// Allocation entry points
1729 ///
1730 //////
1731
1732 //! Pop first block from a free list
1733 static void *
free_list_pop(void ** list)1734 free_list_pop(void **list) {
1735 void *block = *list;
1736 *list = *((void **)block);
1737 return block;
1738 }
1739
1740 //! Allocate a small/medium sized memory block from the given heap
1741 static void *
_rpmalloc_allocate_from_heap_fallback(heap_t * heap,uint32_t class_idx)1742 _rpmalloc_allocate_from_heap_fallback(heap_t *heap, uint32_t class_idx) {
1743 span_t *span = heap->partial_span[class_idx];
1744 if (EXPECTED(span != 0)) {
1745 assert(span->block_count == _memory_size_class[span->size_class].block_count);
1746 assert(!_rpmalloc_span_is_fully_utilized(span));
1747 void *block;
1748 if (span->free_list) {
1749 //Swap in free list if not empty
1750 heap->free_list[class_idx] = span->free_list;
1751 span->free_list = 0;
1752 block = free_list_pop(&heap->free_list[class_idx]);
1753 } else {
1754 //If the span did not fully initialize free list, link up another page worth of blocks
1755 void *block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
1756 span->free_list_limit += free_list_partial_init(&heap->free_list[class_idx], &block,
1757 (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
1758 span->block_count - span->free_list_limit, span->block_size);
1759 }
1760 assert(span->free_list_limit <= span->block_count);
1761 span->used_count = span->free_list_limit;
1762
1763 //Swap in deferred free list if present
1764 if (atomic_load_ptr(&span->free_list_deferred))
1765 _rpmalloc_span_extract_free_list_deferred(span);
1766
1767 //If span is still not fully utilized keep it in partial list and early return block
1768 if (!_rpmalloc_span_is_fully_utilized(span))
1769 return block;
1770
1771 //The span is fully utilized, unlink from partial list and add to fully utilized list
1772 _rpmalloc_span_double_link_list_pop_head(&heap->partial_span[class_idx], span);
1773 #if RPMALLOC_FIRST_CLASS_HEAPS
1774 _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1775 #endif
1776 ++heap->full_span_count;
1777 return block;
1778 }
1779
1780 //Find a span in one of the cache levels
1781 span = _rpmalloc_heap_extract_new_span(heap, 1, class_idx);
1782 if (EXPECTED(span != 0)) {
1783 //Mark span as owned by this heap and set base data, return first block
1784 return _rpmalloc_span_initialize_new(heap, span, class_idx);
1785 }
1786
1787 return 0;
1788 }
1789
1790 //! Allocate a small sized memory block from the given heap
1791 static void *
_rpmalloc_allocate_small(heap_t * heap,size_t size)1792 _rpmalloc_allocate_small(heap_t *heap, size_t size) {
1793 assert(heap);
1794 //Small sizes have unique size classes
1795 const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
1796 _rpmalloc_stat_inc_alloc(heap, class_idx);
1797 if (EXPECTED(heap->free_list[class_idx] != 0))
1798 return free_list_pop(&heap->free_list[class_idx]);
1799 return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1800 }
1801
1802 //! Allocate a medium sized memory block from the given heap
1803 static void *
_rpmalloc_allocate_medium(heap_t * heap,size_t size)1804 _rpmalloc_allocate_medium(heap_t *heap, size_t size) {
1805 assert(heap);
1806 //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
1807 const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
1808 const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
1809 _rpmalloc_stat_inc_alloc(heap, class_idx);
1810 if (EXPECTED(heap->free_list[class_idx] != 0))
1811 return free_list_pop(&heap->free_list[class_idx]);
1812 return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1813 }
1814
1815 //! Allocate a large sized memory block from the given heap
1816 static void *
_rpmalloc_allocate_large(heap_t * heap,size_t size)1817 _rpmalloc_allocate_large(heap_t *heap, size_t size) {
1818 assert(heap);
1819 //Calculate number of needed max sized spans (including header)
1820 //Since this function is never called if size > LARGE_SIZE_LIMIT
1821 //the span_count is guaranteed to be <= LARGE_CLASS_COUNT
1822 size += SPAN_HEADER_SIZE;
1823 size_t span_count = size >> _memory_span_size_shift;
1824 if (size & (_memory_span_size - 1))
1825 ++span_count;
1826
1827 //Find a span in one of the cache levels
1828 span_t *span = _rpmalloc_heap_extract_new_span(heap, span_count, SIZE_CLASS_LARGE);
1829 if (!span)
1830 return span;
1831
1832 //Mark span as owned by this heap and set base data
1833 assert(span->span_count == span_count);
1834 span->size_class = SIZE_CLASS_LARGE;
1835 span->heap = heap;
1836
1837 #if RPMALLOC_FIRST_CLASS_HEAPS
1838 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1839 #endif
1840 ++heap->full_span_count;
1841
1842 return pointer_offset(span, SPAN_HEADER_SIZE);
1843 }
1844
1845 //! Allocate a huge block by mapping memory pages directly
1846 static void *
_rpmalloc_allocate_huge(heap_t * heap,size_t size)1847 _rpmalloc_allocate_huge(heap_t *heap, size_t size) {
1848 assert(heap);
1849 size += SPAN_HEADER_SIZE;
1850 size_t num_pages = size >> _memory_page_size_shift;
1851 if (size & (_memory_page_size - 1))
1852 ++num_pages;
1853 size_t align_offset = 0;
1854 span_t *span = (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
1855 if (!span)
1856 return span;
1857
1858 //Store page count in span_count
1859 span->size_class = SIZE_CLASS_HUGE;
1860 span->span_count = (uint32_t)num_pages;
1861 span->align_offset = (uint32_t)align_offset;
1862 span->heap = heap;
1863 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1864
1865 #if RPMALLOC_FIRST_CLASS_HEAPS
1866 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1867 #endif
1868 ++heap->full_span_count;
1869
1870 return pointer_offset(span, SPAN_HEADER_SIZE);
1871 }
1872
1873 //! Allocate a block of the given size
1874 static void *
_rpmalloc_allocate(heap_t * heap,size_t size)1875 _rpmalloc_allocate(heap_t *heap, size_t size) {
1876 if (EXPECTED(size <= SMALL_SIZE_LIMIT))
1877 return _rpmalloc_allocate_small(heap, size);
1878 else if (size <= _memory_medium_size_limit)
1879 return _rpmalloc_allocate_medium(heap, size);
1880 else if (size <= LARGE_SIZE_LIMIT)
1881 return _rpmalloc_allocate_large(heap, size);
1882 return _rpmalloc_allocate_huge(heap, size);
1883 }
1884
1885 static void *
_rpmalloc_aligned_allocate(heap_t * heap,size_t alignment,size_t size)1886 _rpmalloc_aligned_allocate(heap_t *heap, size_t alignment, size_t size) {
1887 if (alignment <= SMALL_GRANULARITY)
1888 return _rpmalloc_allocate(heap, size);
1889
1890 #if ENABLE_VALIDATE_ARGS
1891 if ((size + alignment) < size) {
1892 errno = EINVAL;
1893 return 0;
1894 }
1895 if (alignment & (alignment - 1)) {
1896 errno = EINVAL;
1897 return 0;
1898 }
1899 #endif
1900
1901 if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) {
1902 // If alignment is less or equal to span header size (which is power of two),
1903 // and size aligned to span header size multiples is less than size + alignment,
1904 // then use natural alignment of blocks to provide alignment
1905 size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE;
1906 assert(!(multiple_size % SPAN_HEADER_SIZE));
1907 if (multiple_size <= (size + alignment))
1908 return _rpmalloc_allocate(heap, multiple_size);
1909 }
1910
1911 void *ptr = 0;
1912 size_t align_mask = alignment - 1;
1913 if (alignment <= _memory_page_size) {
1914 ptr = _rpmalloc_allocate(heap, size + alignment);
1915 if ((uintptr_t)ptr & align_mask) {
1916 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1917 //Mark as having aligned blocks
1918 span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
1919 span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
1920 }
1921 return ptr;
1922 }
1923
1924 // Fallback to mapping new pages for this request. Since pointers passed
1925 // to rpfree must be able to reach the start of the span by bitmasking of
1926 // the address with the span size, the returned aligned pointer from this
1927 // function must be with a span size of the start of the mapped area.
1928 // In worst case this requires us to loop and map pages until we get a
1929 // suitable memory address. It also means we can never align to span size
1930 // or greater, since the span header will push alignment more than one
1931 // span size away from span start (thus causing pointer mask to give us
1932 // an invalid span start on free)
1933 if (alignment & align_mask) {
1934 errno = EINVAL;
1935 return 0;
1936 }
1937 if (alignment >= _memory_span_size) {
1938 errno = EINVAL;
1939 return 0;
1940 }
1941
1942 size_t extra_pages = alignment / _memory_page_size;
1943
1944 // Since each span has a header, we will at least need one extra memory page
1945 size_t num_pages = 1 + (size / _memory_page_size);
1946 if (size & (_memory_page_size - 1))
1947 ++num_pages;
1948
1949 if (extra_pages > num_pages)
1950 num_pages = 1 + extra_pages;
1951
1952 size_t original_pages = num_pages;
1953 size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
1954 if (limit_pages < (original_pages * 2))
1955 limit_pages = original_pages * 2;
1956
1957 size_t mapped_size, align_offset;
1958 span_t *span;
1959
1960 retry:
1961 align_offset = 0;
1962 mapped_size = num_pages * _memory_page_size;
1963
1964 span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
1965 if (!span) {
1966 errno = ENOMEM;
1967 return 0;
1968 }
1969 ptr = pointer_offset(span, SPAN_HEADER_SIZE);
1970
1971 if ((uintptr_t)ptr & align_mask)
1972 ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1973
1974 if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
1975 (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
1976 (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
1977 _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
1978 ++num_pages;
1979 if (num_pages > limit_pages) {
1980 errno = EINVAL;
1981 return 0;
1982 }
1983 goto retry;
1984 }
1985
1986 //Store page count in span_count
1987 span->size_class = SIZE_CLASS_HUGE;
1988 span->span_count = (uint32_t)num_pages;
1989 span->align_offset = (uint32_t)align_offset;
1990 span->heap = heap;
1991 _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1992
1993 #if RPMALLOC_FIRST_CLASS_HEAPS
1994 _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1995 #endif
1996 ++heap->full_span_count;
1997
1998 return ptr;
1999 }
2000
2001
2002 ////////////
2003 ///
2004 /// Deallocation entry points
2005 ///
2006 //////
2007
2008 //! Deallocate the given small/medium memory block in the current thread local heap
2009 static void
_rpmalloc_deallocate_direct_small_or_medium(span_t * span,void * block)2010 _rpmalloc_deallocate_direct_small_or_medium(span_t *span, void *block) {
2011 heap_t *heap = span->heap;
2012 assert(heap->owner_thread == get_thread_id() || heap->finalize);
2013 //Add block to free list
2014 if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
2015 span->used_count = span->block_count;
2016 #if RPMALLOC_FIRST_CLASS_HEAPS
2017 _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
2018 #endif
2019 _rpmalloc_span_double_link_list_add(&heap->partial_span[span->size_class], span);
2020 --heap->full_span_count;
2021 }
2022 --span->used_count;
2023 *((void **)block) = span->free_list;
2024 span->free_list = block;
2025 if (UNEXPECTED(span->used_count == span->list_size)) {
2026 _rpmalloc_span_double_link_list_remove(&heap->partial_span[span->size_class], span);
2027 _rpmalloc_span_release_to_cache(heap, span);
2028 }
2029 }
2030
2031 static void
_rpmalloc_deallocate_defer_free_span(heap_t * heap,span_t * span)2032 _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
2033 //This list does not need ABA protection, no mutable side state
2034 do {
2035 span->free_list = atomic_load_ptr(&heap->span_free_deferred);
2036 } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2037 }
2038
2039 //! Put the block in the deferred free list of the owning span
2040 static void
_rpmalloc_deallocate_defer_small_or_medium(span_t * span,void * block)2041 _rpmalloc_deallocate_defer_small_or_medium(span_t *span, void *block) {
2042 // The memory ordering here is a bit tricky, to avoid having to ABA protect
2043 // the deferred free list to avoid desynchronization of list and list size
2044 // we need to have acquire semantics on successful CAS of the pointer to
2045 // guarantee the list_size variable validity + release semantics on pointer store
2046 void *free_list;
2047 do {
2048 free_list = atomic_load_ptr(&span->free_list_deferred);
2049 *((void **)block) = free_list;
2050 } while ((free_list == INVALID_POINTER) || !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, free_list));
2051 uint32_t free_count = ++span->list_size;
2052 atomic_store_ptr_release(&span->free_list_deferred, block);
2053 if (free_count == span->block_count) {
2054 // Span was completely freed by this block. Due to the INVALID_POINTER spin lock
2055 // no other thread can reach this state simultaneously on this span.
2056 // Safe to move to owner heap deferred cache
2057 _rpmalloc_deallocate_defer_free_span(span->heap, span);
2058 }
2059 }
2060
2061 static void
_rpmalloc_deallocate_small_or_medium(span_t * span,void * p)2062 _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
2063 _rpmalloc_stat_inc_free(span->heap, span->size_class);
2064 if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2065 //Realign pointer to block start
2066 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2067 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2068 p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2069 }
2070 //Check if block belongs to this heap or if deallocation should be deferred
2071 if ((span->heap->owner_thread == get_thread_id()) || span->heap->finalize)
2072 _rpmalloc_deallocate_direct_small_or_medium(span, p);
2073 else
2074 _rpmalloc_deallocate_defer_small_or_medium(span, p);
2075 }
2076
2077 //! Deallocate the given large memory block to the current heap
2078 static void
_rpmalloc_deallocate_large(span_t * span)2079 _rpmalloc_deallocate_large(span_t *span) {
2080 assert(span->size_class == SIZE_CLASS_LARGE);
2081 assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
2082 assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
2083 //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap
2084 int defer = (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize;
2085 if (defer) {
2086 _rpmalloc_deallocate_defer_free_span(span->heap, span);
2087 return;
2088 }
2089 assert(span->heap->full_span_count);
2090 --span->heap->full_span_count;
2091 #if RPMALLOC_FIRST_CLASS_HEAPS
2092 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2093 #endif
2094 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2095 //Decrease counter
2096 size_t idx = span->span_count - 1;
2097 atomic_decr32(&span->heap->span_use[idx].current);
2098 #endif
2099 heap_t *heap = get_thread_heap();
2100 assert(heap);
2101 span->heap = heap;
2102 if ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved) {
2103 heap->span_reserve = span;
2104 heap->spans_reserved = span->span_count;
2105 if (span->flags & SPAN_FLAG_MASTER) {
2106 heap->span_reserve_master = span;
2107 } else { //SPAN_FLAG_SUBSPAN
2108 span_t *master = (span_t *)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2109 heap->span_reserve_master = master;
2110 assert(master->flags & SPAN_FLAG_MASTER);
2111 assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count);
2112 }
2113 _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2114 } else {
2115 //Insert into cache list
2116 _rpmalloc_heap_cache_insert(heap, span);
2117 }
2118 }
2119
2120 //! Deallocate the given huge span
2121 static void
_rpmalloc_deallocate_huge(span_t * span)2122 _rpmalloc_deallocate_huge(span_t *span) {
2123 assert(span->heap);
2124 if ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize) {
2125 _rpmalloc_deallocate_defer_free_span(span->heap, span);
2126 return;
2127 }
2128 assert(span->heap->full_span_count);
2129 --span->heap->full_span_count;
2130 #if RPMALLOC_FIRST_CLASS_HEAPS
2131 _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2132 #endif
2133
2134 //Oversized allocation, page count is stored in span_count
2135 size_t num_pages = span->span_count;
2136 _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size);
2137 _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2138 }
2139
2140 //! Deallocate the given block
2141 static void
_rpmalloc_deallocate(void * p)2142 _rpmalloc_deallocate(void *p) {
2143 //Grab the span (always at start of span, using span alignment)
2144 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2145
2146 if (UNEXPECTED(!span))
2147 return;
2148 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
2149 _rpmalloc_deallocate_small_or_medium(span, p);
2150 else if (span->size_class == SIZE_CLASS_LARGE)
2151 _rpmalloc_deallocate_large(span);
2152 else
2153 _rpmalloc_deallocate_huge(span);
2154 }
2155
2156
2157 ////////////
2158 ///
2159 /// Reallocation entry points
2160 ///
2161 //////
2162
2163 static size_t
2164 _rpmalloc_usable_size(void *p);
2165
2166 //! Reallocate the given block to the given size
2167 static void *
_rpmalloc_reallocate(heap_t * heap,void * p,size_t size,size_t oldsize,unsigned int flags)2168 _rpmalloc_reallocate(heap_t *heap, void *p, size_t size, size_t oldsize, unsigned int flags) {
2169 if (p) {
2170 //Grab the span using guaranteed span alignment
2171 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2172 if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2173 //Small/medium sized block
2174 assert(span->span_count == 1);
2175 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2176 uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2177 uint32_t block_idx = block_offset / span->block_size;
2178 void *block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2179 if (!oldsize)
2180 oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2181 if ((size_t)span->block_size >= size) {
2182 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2183 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2184 memmove(block, p, oldsize);
2185 return block;
2186 }
2187 } else if (span->size_class == SIZE_CLASS_LARGE) {
2188 //Large block
2189 size_t total_size = size + SPAN_HEADER_SIZE;
2190 size_t num_spans = total_size >> _memory_span_size_shift;
2191 if (total_size & (_memory_span_mask - 1))
2192 ++num_spans;
2193 size_t current_spans = span->span_count;
2194 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2195 if (!oldsize)
2196 oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2197 if ((current_spans >= num_spans) && (num_spans >= (current_spans / 2))) {
2198 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2199 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2200 memmove(block, p, oldsize);
2201 return block;
2202 }
2203 } else {
2204 //Oversized block
2205 size_t total_size = size + SPAN_HEADER_SIZE;
2206 size_t num_pages = total_size >> _memory_page_size_shift;
2207 if (total_size & (_memory_page_size - 1))
2208 ++num_pages;
2209 //Page count is stored in span_count
2210 size_t current_pages = span->span_count;
2211 void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2212 if (!oldsize)
2213 oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2214 if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2215 //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2216 if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2217 memmove(block, p, oldsize);
2218 return block;
2219 }
2220 }
2221 } else {
2222 oldsize = 0;
2223 }
2224
2225 if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2226 return 0;
2227
2228 //Size is greater than block size, need to allocate a new block and deallocate the old
2229 //Avoid hysteresis by overallocating if increase is small (below 37%)
2230 size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
2231 size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
2232 void *block = _rpmalloc_allocate(heap, new_size);
2233 if (p && block) {
2234 if (!(flags & RPMALLOC_NO_PRESERVE))
2235 memcpy(block, p, oldsize < new_size ? oldsize : new_size);
2236 _rpmalloc_deallocate(p);
2237 }
2238
2239 return block;
2240 }
2241
2242 static void *
_rpmalloc_aligned_reallocate(heap_t * heap,void * ptr,size_t alignment,size_t size,size_t oldsize,unsigned int flags)2243 _rpmalloc_aligned_reallocate(heap_t *heap, void *ptr, size_t alignment, size_t size, size_t oldsize,
2244 unsigned int flags) {
2245 if (alignment <= SMALL_GRANULARITY)
2246 return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
2247
2248 int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
2249 size_t usablesize = _rpmalloc_usable_size(ptr);
2250 if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
2251 if (no_alloc || (size >= (usablesize / 2)))
2252 return ptr;
2253 }
2254 // Aligned alloc marks span as having aligned blocks
2255 void *block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
2256 if (EXPECTED(block != 0)) {
2257 if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
2258 if (!oldsize)
2259 oldsize = usablesize;
2260 memcpy(block, ptr, oldsize < size ? oldsize : size);
2261 }
2262 _rpmalloc_deallocate(ptr);
2263 }
2264 return block;
2265 }
2266
2267
2268 ////////////
2269 ///
2270 /// Initialization, finalization and utility
2271 ///
2272 //////
2273
2274 //! Get the usable size of the given block
2275 static size_t
_rpmalloc_usable_size(void * p)2276 _rpmalloc_usable_size(void *p) {
2277 //Grab the span using guaranteed span alignment
2278 span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2279 if (span->size_class < SIZE_CLASS_COUNT) {
2280 //Small/medium block
2281 void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2282 fprintf(stderr, "RPV %p, %p %p %d\n", span, blocks_start, p, span->block_size);
2283 return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size);
2284 }
2285 if (span->size_class == SIZE_CLASS_LARGE) {
2286 //Large block
2287 size_t current_spans = span->span_count;
2288 return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
2289 }
2290 //Oversized block, page count is stored in span_count
2291 size_t current_pages = span->span_count;
2292 return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
2293 }
2294
2295 //! Adjust and optimize the size class properties for the given class
2296 static void
_rpmalloc_adjust_size_class(size_t iclass)2297 _rpmalloc_adjust_size_class(size_t iclass) {
2298 size_t block_size = _memory_size_class[iclass].block_size;
2299 size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
2300
2301 _memory_size_class[iclass].block_count = (uint16_t)block_count;
2302 _memory_size_class[iclass].class_idx = (uint16_t)iclass;
2303
2304 //Check if previous size classes can be merged
2305 size_t prevclass = iclass;
2306 while (prevclass > 0) {
2307 --prevclass;
2308 //A class can be merged if number of pages and number of blocks are equal
2309 if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count)
2310 memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
2311 else
2312 break;
2313 }
2314 }
2315
2316 //! Initialize the allocator and setup global data
2317 extern inline int
rpmalloc_initialize(void)2318 rpmalloc_initialize(void) {
2319 if (_rpmalloc_initialized) {
2320 rpmalloc_thread_initialize();
2321 return 0;
2322 }
2323 return rpmalloc_initialize_config(0);
2324 }
2325
2326 int
rpmalloc_initialize_config(const rpmalloc_config_t * config)2327 rpmalloc_initialize_config(const rpmalloc_config_t *config) {
2328 if (_rpmalloc_initialized) {
2329 rpmalloc_thread_initialize();
2330 return 0;
2331 }
2332 _rpmalloc_initialized = 1;
2333
2334 if (config)
2335 memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
2336 else
2337 memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
2338
2339 if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
2340 _memory_config.memory_map = _rpmalloc_mmap_os;
2341 _memory_config.memory_unmap = _rpmalloc_unmap_os;
2342 }
2343
2344 #if RPMALLOC_CONFIGURABLE
2345 _memory_page_size = _memory_config.page_size;
2346 #else
2347 _memory_page_size = 0;
2348 #endif
2349 _memory_huge_pages = 0;
2350 _memory_map_granularity = _memory_page_size;
2351 if (!_memory_page_size) {
2352 #if PLATFORM_WINDOWS
2353 SYSTEM_INFO system_info;
2354 memset(&system_info, 0, sizeof(system_info));
2355 GetSystemInfo(&system_info);
2356 _memory_page_size = system_info.dwPageSize;
2357 _memory_map_granularity = system_info.dwAllocationGranularity;
2358 #else
2359 _memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
2360 _memory_map_granularity = _memory_page_size;
2361 if (_memory_config.enable_huge_pages) {
2362 #if defined(__linux__)
2363 size_t huge_page_size = 0;
2364 FILE *meminfo = fopen("/proc/meminfo", "r");
2365 if (meminfo) {
2366 char line[128];
2367 while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
2368 line[sizeof(line) - 1] = 0;
2369 if (strstr(line, "Hugepagesize:"))
2370 huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
2371 }
2372 fclose(meminfo);
2373 }
2374 if (huge_page_size) {
2375 _memory_huge_pages = 1;
2376 _memory_page_size = huge_page_size;
2377 _memory_map_granularity = huge_page_size;
2378 }
2379 #elif defined(__FreeBSD__)
2380 int rc;
2381 size_t sz = sizeof(rc);
2382
2383 if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
2384 _memory_huge_pages = 1;
2385 _memory_page_size = 2 * 1024 * 1024;
2386 _memory_map_granularity = _memory_page_size;
2387 }
2388 #elif defined(__APPLE__)
2389 _memory_huge_pages = 1;
2390 _memory_page_size = 2 * 1024 * 1024;
2391 _memory_map_granularity = _memory_page_size;
2392 #endif
2393 }
2394 #endif
2395 } else {
2396 if (_memory_config.enable_huge_pages)
2397 _memory_huge_pages = 1;
2398 }
2399
2400 #if PLATFORM_WINDOWS
2401 if (_memory_config.enable_huge_pages) {
2402 HANDLE token = 0;
2403 size_t large_page_minimum = GetLargePageMinimum();
2404 if (large_page_minimum)
2405 OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
2406 if (token) {
2407 LUID luid;
2408 if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
2409 TOKEN_PRIVILEGES token_privileges;
2410 memset(&token_privileges, 0, sizeof(token_privileges));
2411 token_privileges.PrivilegeCount = 1;
2412 token_privileges.Privileges[0].Luid = luid;
2413 token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2414 if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
2415 DWORD err = GetLastError();
2416 if (err == ERROR_SUCCESS) {
2417 _memory_huge_pages = 1;
2418 if (large_page_minimum > _memory_page_size)
2419 _memory_page_size = large_page_minimum;
2420 if (large_page_minimum > _memory_map_granularity)
2421 _memory_map_granularity = large_page_minimum;
2422 }
2423 }
2424 }
2425 CloseHandle(token);
2426 }
2427 }
2428 #endif
2429
2430 //The ABA counter in heap orphan list is tied to using HEAP_ORPHAN_ABA_SIZE
2431 size_t min_span_size = HEAP_ORPHAN_ABA_SIZE;
2432 size_t max_page_size;
2433 #if UINTPTR_MAX > 0xFFFFFFFF
2434 max_page_size = 4096ULL * 1024ULL * 1024ULL;
2435 #else
2436 max_page_size = 4 * 1024 * 1024;
2437 #endif
2438 if (_memory_page_size < min_span_size)
2439 _memory_page_size = min_span_size;
2440 if (_memory_page_size > max_page_size)
2441 _memory_page_size = max_page_size;
2442 _memory_page_size_shift = 0;
2443 size_t page_size_bit = _memory_page_size;
2444 while (page_size_bit != 1) {
2445 ++_memory_page_size_shift;
2446 page_size_bit >>= 1;
2447 }
2448 _memory_page_size = ((size_t)1 << _memory_page_size_shift);
2449
2450 #if RPMALLOC_CONFIGURABLE
2451 size_t span_size = _memory_config.span_size;
2452 if (!span_size)
2453 span_size = (64 * 1024);
2454 if (span_size > (256 * 1024))
2455 span_size = (256 * 1024);
2456 _memory_span_size = 4096;
2457 _memory_span_size_shift = 12;
2458 while (_memory_span_size < span_size) {
2459 _memory_span_size <<= 1;
2460 ++_memory_span_size_shift;
2461 }
2462 _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
2463 #endif
2464
2465 _memory_span_map_count = (_memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
2466 if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
2467 _memory_span_map_count = (_memory_page_size / _memory_span_size);
2468 if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
2469 _memory_span_map_count = (_memory_page_size / _memory_span_size);
2470
2471 _memory_config.page_size = _memory_page_size;
2472 _memory_config.span_size = _memory_span_size;
2473 _memory_config.span_map_count = _memory_span_map_count;
2474 _memory_config.enable_huge_pages = _memory_huge_pages;
2475
2476 _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
2477 _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
2478
2479 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2480 if (pthread_key_create(&_memory_thread_heap, _memory_heap_release_raw))
2481 return -1;
2482 #endif
2483 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2484 fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
2485 #endif
2486
2487 //Setup all small and medium size classes
2488 size_t iclass = 0;
2489 _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
2490 _rpmalloc_adjust_size_class(iclass);
2491 for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
2492 size_t size = iclass * SMALL_GRANULARITY;
2493 _memory_size_class[iclass].block_size = (uint32_t)size;
2494 _rpmalloc_adjust_size_class(iclass);
2495 }
2496 //At least two blocks per span, then fall back to large allocations
2497 _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
2498 if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
2499 _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
2500 for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
2501 size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
2502 if (size > _memory_medium_size_limit)
2503 break;
2504 _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
2505 _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
2506 }
2507
2508 atomic_store_ptr(&_memory_orphan_heaps, 0);
2509 #if RPMALLOC_FIRST_CLASS_HEAPS
2510 atomic_store_ptr(&_memory_first_class_orphan_heaps, 0);
2511 #endif
2512 for (size_t ilist = 0, lsize = (sizeof(_memory_heaps) / sizeof(_memory_heaps[0])); ilist < lsize; ++ilist)
2513 atomic_store_ptr(&_memory_heaps[ilist], 0);
2514
2515 //Initialize this thread
2516 rpmalloc_thread_initialize();
2517 return 0;
2518 }
2519
2520 //! Finalize the allocator
2521 void
rpmalloc_finalize(void)2522 rpmalloc_finalize(void) {
2523 rpmalloc_thread_finalize();
2524 //rpmalloc_dump_statistics(stderr);
2525
2526 //Free all thread caches and fully free spans
2527 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2528 heap_t *heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
2529 while (heap) {
2530 heap_t *next_heap = heap->next_heap;
2531 heap->finalize = 1;
2532 _rpmalloc_heap_global_finalize(heap);
2533 heap = next_heap;
2534 }
2535 }
2536
2537 #if ENABLE_GLOBAL_CACHE
2538 //Free global caches
2539 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
2540 _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
2541 #endif
2542
2543 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2544 pthread_key_delete(_memory_thread_heap);
2545 #endif
2546 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2547 FlsFree(fls_key);
2548 fls_key = 0;
2549 #endif
2550 #if ENABLE_STATISTICS
2551 //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code
2552 assert(!atomic_load32(&_mapped_pages));
2553 assert(!atomic_load32(&_reserved_spans));
2554 assert(!atomic_load32(&_mapped_pages_os));
2555 #endif
2556
2557 _rpmalloc_initialized = 0;
2558 }
2559
2560 //! Initialize thread, assign heap
2561 extern inline void
rpmalloc_thread_initialize(void)2562 rpmalloc_thread_initialize(void) {
2563 if (!get_thread_heap_raw()) {
2564 heap_t *heap = _rpmalloc_heap_allocate(0);
2565 if (heap) {
2566 _rpmalloc_stat_inc(&_memory_active_heaps);
2567 set_thread_heap(heap);
2568 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2569 FlsSetValue(fls_key, heap);
2570 #endif
2571 }
2572 }
2573 }
2574
2575 //! Finalize thread, orphan heap
2576 void
rpmalloc_thread_finalize(void)2577 rpmalloc_thread_finalize(void) {
2578 heap_t *heap = get_thread_heap_raw();
2579 if (heap)
2580 _rpmalloc_heap_release_raw(heap);
2581 set_thread_heap(0);
2582 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2583 FlsSetValue(fls_key, 0);
2584 #endif
2585 }
2586
2587 int
rpmalloc_is_thread_initialized(void)2588 rpmalloc_is_thread_initialized(void) {
2589 return (get_thread_heap_raw() != 0) ? 1 : 0;
2590 }
2591
2592 const rpmalloc_config_t *
rpmalloc_config(void)2593 rpmalloc_config(void) {
2594 return &_memory_config;
2595 }
2596
2597 // Extern interface
2598
2599 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc(size_t size)2600 rpmalloc(size_t size) {
2601 #if ENABLE_VALIDATE_ARGS
2602 if (size >= MAX_ALLOC_SIZE) {
2603 errno = EINVAL;
2604 return 0;
2605 }
2606 #endif
2607 heap_t *heap = get_thread_heap();
2608 return _rpmalloc_allocate(heap, size);
2609 }
2610
2611 extern inline void
rpfree(void * ptr)2612 rpfree(void *ptr) {
2613 _rpmalloc_deallocate(ptr);
2614 }
2615
2616 extern inline RPMALLOC_ALLOCATOR void *
rpcalloc(size_t num,size_t size)2617 rpcalloc(size_t num, size_t size) {
2618 size_t total;
2619 #if ENABLE_VALIDATE_ARGS
2620 #if PLATFORM_WINDOWS
2621 int err = SizeTMult(num, size, &total);
2622 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2623 errno = EINVAL;
2624 return 0;
2625 }
2626 #else
2627 int err = __builtin_umull_overflow(num, size, &total);
2628 if (err || (total >= MAX_ALLOC_SIZE)) {
2629 errno = EINVAL;
2630 return 0;
2631 }
2632 #endif
2633 #else
2634 total = num * size;
2635 #endif
2636 heap_t *heap = get_thread_heap();
2637 void *block = _rpmalloc_allocate(heap, total);
2638 if (block)
2639 memset(block, 0, total);
2640 return block;
2641 }
2642
2643 extern inline RPMALLOC_ALLOCATOR void *
rprealloc(void * ptr,size_t size)2644 rprealloc(void *ptr, size_t size) {
2645 #if ENABLE_VALIDATE_ARGS
2646 if (size >= MAX_ALLOC_SIZE) {
2647 errno = EINVAL;
2648 return ptr;
2649 }
2650 #endif
2651 heap_t *heap = get_thread_heap();
2652 return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
2653 }
2654
2655 extern RPMALLOC_ALLOCATOR void *
rpaligned_realloc(void * ptr,size_t alignment,size_t size,size_t oldsize,unsigned int flags)2656 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
2657 unsigned int flags) {
2658 #if ENABLE_VALIDATE_ARGS
2659 if ((size + alignment < size) || (alignment > _memory_page_size)) {
2660 errno = EINVAL;
2661 return 0;
2662 }
2663 #endif
2664 heap_t *heap = get_thread_heap();
2665 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags);
2666 }
2667
2668 extern RPMALLOC_ALLOCATOR void *
rpaligned_alloc(size_t alignment,size_t size)2669 rpaligned_alloc(size_t alignment, size_t size) {
2670 heap_t *heap = get_thread_heap();
2671 return _rpmalloc_aligned_allocate(heap, alignment, size);
2672 }
2673
2674 extern inline RPMALLOC_ALLOCATOR void *
rpaligned_calloc(size_t alignment,size_t num,size_t size)2675 rpaligned_calloc(size_t alignment, size_t num, size_t size) {
2676 size_t total;
2677 #if ENABLE_VALIDATE_ARGS
2678 #if PLATFORM_WINDOWS
2679 int err = SizeTMult(num, size, &total);
2680 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2681 errno = EINVAL;
2682 return 0;
2683 }
2684 #else
2685 int err = __builtin_umull_overflow(num, size, &total);
2686 if (err || (total >= MAX_ALLOC_SIZE)) {
2687 errno = EINVAL;
2688 return 0;
2689 }
2690 #endif
2691 #else
2692 total = num * size;
2693 #endif
2694 void *block = rpaligned_alloc(alignment, total);
2695 if (block)
2696 memset(block, 0, total);
2697 return block;
2698 }
2699
2700 extern inline RPMALLOC_ALLOCATOR void *
rpmemalign(size_t alignment,size_t size)2701 rpmemalign(size_t alignment, size_t size) {
2702 return rpaligned_alloc(alignment, size);
2703 }
2704
2705 extern inline int
rpposix_memalign(void ** memptr,size_t alignment,size_t size)2706 rpposix_memalign(void **memptr, size_t alignment, size_t size) {
2707 if (memptr)
2708 *memptr = rpaligned_alloc(alignment, size);
2709 else
2710 return EINVAL;
2711 return *memptr ? 0 : ENOMEM;
2712 }
2713
2714 extern inline size_t
rpmalloc_usable_size(void * ptr)2715 rpmalloc_usable_size(void *ptr) {
2716 return (ptr ? _rpmalloc_usable_size(ptr) : 0);
2717 }
2718
2719 extern inline void
rpmalloc_thread_collect(void)2720 rpmalloc_thread_collect(void) {
2721 }
2722
2723 void
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t * stats)2724 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
2725 memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
2726 heap_t *heap = get_thread_heap_raw();
2727 if (!heap)
2728 return;
2729
2730 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2731 size_class_t *size_class = _memory_size_class + iclass;
2732 span_t *span = heap->partial_span[iclass];
2733 while (span) {
2734 size_t free_count = span->list_size;
2735 size_t block_count = size_class->block_count;
2736 if (span->free_list_limit < block_count)
2737 block_count = span->free_list_limit;
2738 free_count += (block_count - span->used_count);
2739 stats->sizecache = free_count * size_class->block_size;
2740 span = span->next;
2741 }
2742 }
2743
2744 #if ENABLE_THREAD_CACHE
2745 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2746 if (heap->span_cache[iclass])
2747 stats->spancache = (size_t)heap->span_cache[iclass]->list_size * (iclass + 1) * _memory_span_size;
2748 }
2749 #endif
2750
2751 span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
2752 while (deferred) {
2753 if (deferred->size_class != SIZE_CLASS_HUGE)
2754 stats->spancache = (size_t)deferred->span_count * _memory_span_size;
2755 deferred = (span_t *)deferred->free_list;
2756 }
2757
2758 #if ENABLE_STATISTICS
2759 stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
2760 stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
2761
2762 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2763 stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current);
2764 stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high);
2765 stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
2766 stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
2767 stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
2768 stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
2769 stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
2770 stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
2771 stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
2772 }
2773 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2774 stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
2775 stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak;
2776 stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
2777 stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
2778 stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
2779 stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
2780 stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved);
2781 stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
2782 }
2783 #endif
2784 }
2785
2786 void
rpmalloc_global_statistics(rpmalloc_global_statistics_t * stats)2787 rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
2788 memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
2789 #if ENABLE_STATISTICS
2790 stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2791 stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2792 stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2793 stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2794 stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2795 stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
2796 #endif
2797 #if ENABLE_GLOBAL_CACHE
2798 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2799 stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
2800 }
2801 #endif
2802 }
2803
2804 #if ENABLE_STATISTICS
2805
2806 static void
_memory_heap_dump_statistics(heap_t * heap,void * file)2807 _memory_heap_dump_statistics(heap_t *heap, void *file) {
2808 fprintf(file, "Heap %d stats:\n", heap->id);
2809 fprintf(file,
2810 "Class CurAlloc PeakAlloc TotAlloc TotFree BlkSize BlkCount SpansCur SpansPeak PeakAllocMiB ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n");
2811 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2812 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
2813 continue;
2814 fprintf(file, "%3u: %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass,
2815 atomic_load32(&heap->size_class_use[iclass].alloc_current),
2816 heap->size_class_use[iclass].alloc_peak,
2817 atomic_load32(&heap->size_class_use[iclass].alloc_total),
2818 atomic_load32(&heap->size_class_use[iclass].free_total),
2819 _memory_size_class[iclass].block_size,
2820 _memory_size_class[iclass].block_count,
2821 atomic_load32(&heap->size_class_use[iclass].spans_current),
2822 heap->size_class_use[iclass].spans_peak,
2823 ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024),
2824 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2825 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2826 ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024),
2827 atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2828 }
2829 fprintf(file,
2830 "Spans Current Peak PeakMiB Cached ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB MmapCalls\n");
2831 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2832 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2833 continue;
2834 fprintf(file, "%4u: %8d %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1),
2835 atomic_load32(&heap->span_use[iclass].current),
2836 atomic_load32(&heap->span_use[iclass].high),
2837 ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2838 #if ENABLE_THREAD_CACHE
2839 heap->span_cache[iclass] ? heap->span_cache[iclass]->list_size : 0,
2840 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2841 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2842 #else
2843 0, 0ULL, 0ULL,
2844 #endif
2845 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2846 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2847 ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2848 1024 * 1024),
2849 ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2850 1024 * 1024),
2851 atomic_load32(&heap->span_use[iclass].spans_map_calls));
2852 }
2853 fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
2854 fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
2855 (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
2856 }
2857
2858 #endif
2859
2860 void
rpmalloc_dump_statistics(void * file)2861 rpmalloc_dump_statistics(void *file) {
2862 #if ENABLE_STATISTICS
2863 //If you hit this assert, you still have active threads or forgot to finalize some thread(s)
2864 assert(atomic_load32(&_memory_active_heaps) == 0);
2865 for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2866 heap_t *heap = atomic_load_ptr(&_memory_heaps[list_idx]);
2867 while (heap) {
2868 int need_dump = 0;
2869 for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) {
2870 if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
2871 assert(!atomic_load32(&heap->size_class_use[iclass].free_total));
2872 assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2873 continue;
2874 }
2875 need_dump = 1;
2876 }
2877 for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) {
2878 if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2879 continue;
2880 need_dump = 1;
2881 }
2882 if (need_dump)
2883 _memory_heap_dump_statistics(heap, file);
2884 heap = heap->next_heap;
2885 }
2886 }
2887 fprintf(file, "Global stats:\n");
2888 size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2889 size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
2890 fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
2891 fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
2892
2893 size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2894 size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
2895 size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2896 size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2897 size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2898 size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
2899 fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
2900 fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
2901 mapped / (size_t)(1024 * 1024),
2902 mapped_os / (size_t)(1024 * 1024),
2903 mapped_peak / (size_t)(1024 * 1024),
2904 mapped_total / (size_t)(1024 * 1024),
2905 unmapped_total / (size_t)(1024 * 1024),
2906 reserved_total / (size_t)(1024 * 1024));
2907
2908 fprintf(file, "\n");
2909 #else
2910 (void)sizeof(file);
2911 #endif
2912 }
2913
2914 #if RPMALLOC_FIRST_CLASS_HEAPS
2915
2916 extern inline rpmalloc_heap_t *
rpmalloc_heap_acquire(void)2917 rpmalloc_heap_acquire(void) {
2918 // Must be a pristine heap from newly mapped memory pages, or else memory blocks
2919 // could already be allocated from the heap which would (wrongly) be released when
2920 // heap is cleared with rpmalloc_heap_free_all()
2921 heap_t *heap = _rpmalloc_heap_allocate(1);
2922 _rpmalloc_stat_inc(&_memory_active_heaps);
2923 return heap;
2924 }
2925
2926 extern inline void
rpmalloc_heap_release(rpmalloc_heap_t * heap)2927 rpmalloc_heap_release(rpmalloc_heap_t *heap) {
2928 if (heap)
2929 _rpmalloc_heap_release(heap, 1);
2930 }
2931
2932 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_alloc(rpmalloc_heap_t * heap,size_t size)2933 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
2934 #if ENABLE_VALIDATE_ARGS
2935 if (size >= MAX_ALLOC_SIZE) {
2936 errno = EINVAL;
2937 return ptr;
2938 }
2939 #endif
2940 return _rpmalloc_allocate(heap, size);
2941 }
2942
2943 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_alloc(rpmalloc_heap_t * heap,size_t alignment,size_t size)2944 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment, size_t size) {
2945 #if ENABLE_VALIDATE_ARGS
2946 if (size >= MAX_ALLOC_SIZE) {
2947 errno = EINVAL;
2948 return ptr;
2949 }
2950 #endif
2951 return _rpmalloc_aligned_allocate(heap, alignment, size);
2952 }
2953
2954 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_calloc(rpmalloc_heap_t * heap,size_t num,size_t size)2955 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
2956 return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
2957 }
2958
2959 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_calloc(rpmalloc_heap_t * heap,size_t alignment,size_t num,size_t size)2960 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment, size_t num, size_t size) {
2961 size_t total;
2962 #if ENABLE_VALIDATE_ARGS
2963 #if PLATFORM_WINDOWS
2964 int err = SizeTMult(num, size, &total);
2965 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2966 errno = EINVAL;
2967 return 0;
2968 }
2969 #else
2970 int err = __builtin_umull_overflow(num, size, &total);
2971 if (err || (total >= MAX_ALLOC_SIZE)) {
2972 errno = EINVAL;
2973 return 0;
2974 }
2975 #endif
2976 #else
2977 total = num * size;
2978 #endif
2979 void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
2980 if (block)
2981 memset(block, 0, total);
2982 return block;
2983 }
2984
2985 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_realloc(rpmalloc_heap_t * heap,void * ptr,size_t size,unsigned int flags)2986 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size, unsigned int flags) {
2987 #if ENABLE_VALIDATE_ARGS
2988 if (size >= MAX_ALLOC_SIZE) {
2989 errno = EINVAL;
2990 return ptr;
2991 }
2992 #endif
2993 return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
2994 }
2995
2996 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_realloc(rpmalloc_heap_t * heap,void * ptr,size_t alignment,size_t size,unsigned int flags)2997 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size, unsigned int flags) {
2998 #if ENABLE_VALIDATE_ARGS
2999 if ((size + alignment < size) || (alignment > _memory_page_size)) {
3000 errno = EINVAL;
3001 return 0;
3002 }
3003 #endif
3004 return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3005 }
3006
3007 extern inline void
rpmalloc_heap_free(rpmalloc_heap_t * heap,void * ptr)3008 rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
3009 (void)sizeof(heap);
3010 _rpmalloc_deallocate(ptr);
3011 }
3012
3013 extern inline void
rpmalloc_heap_free_all(rpmalloc_heap_t * heap)3014 rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
3015 span_t *span;
3016 span_t *next_span;
3017
3018 _rpmalloc_heap_cache_adopt_deferred(heap, 0);
3019
3020 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3021 span = heap->partial_span[iclass];
3022 while (span) {
3023 next_span = span->next;
3024 _rpmalloc_heap_cache_insert(heap, span);
3025 span = next_span;
3026 }
3027 heap->partial_span[iclass] = 0;
3028 span = heap->full_span[iclass];
3029 while (span) {
3030 next_span = span->next;
3031 _rpmalloc_heap_cache_insert(heap, span);
3032 span = next_span;
3033 }
3034 }
3035 memset(heap->free_list, 0, sizeof(heap->free_list));
3036 memset(heap->partial_span, 0, sizeof(heap->partial_span));
3037 memset(heap->full_span, 0, sizeof(heap->full_span));
3038
3039 span = heap->large_huge_span;
3040 while (span) {
3041 next_span = span->next;
3042 if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
3043 _rpmalloc_deallocate_huge(span);
3044 else
3045 _rpmalloc_heap_cache_insert(heap, span);
3046 span = next_span;
3047 }
3048 heap->large_huge_span = 0;
3049 heap->full_span_count = 0;
3050
3051 #if ENABLE_THREAD_CACHE
3052 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3053 span = heap->span_cache[iclass];
3054 #if ENABLE_GLOBAL_CACHE
3055 while (span) {
3056 assert(span->span_count == (iclass + 1));
3057 size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
3058 next_span = _rpmalloc_span_list_split(span, (uint32_t)release_count);
3059 _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
3060 _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
3061 _rpmalloc_global_cache_insert_span_list(span);
3062 span = next_span;
3063 }
3064 #else
3065 if (span)
3066 _rpmalloc_span_list_unmap_all(span);
3067 #endif
3068 heap->span_cache[iclass] = 0;
3069 }
3070 #endif
3071
3072 #if ENABLE_STATISTICS
3073 for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3074 atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
3075 atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
3076 }
3077 for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3078 atomic_store32(&heap->span_use[iclass].current, 0);
3079 }
3080 #endif
3081 }
3082
3083 extern inline void
rpmalloc_heap_thread_set_current(rpmalloc_heap_t * heap)3084 rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
3085 heap_t *prev_heap = get_thread_heap_raw();
3086 if (prev_heap != heap) {
3087 set_thread_heap(heap);
3088 if (prev_heap)
3089 rpmalloc_heap_release(prev_heap);
3090 }
3091 }
3092
3093 #endif
3094
3095 #if ENABLE_PRELOAD || ENABLE_OVERRIDE
3096
3097 #include "malloc.c"
3098
3099 #endif
3100