1 /* rpmalloc.c  -  Memory allocator  -  Public Domain  -  2016 Mattias Jansson
2 
3    This library provides a cross-platform lock free thread caching malloc implementation in C11.
4    The latest source code is always available at
5 
6    https://github.com/mjansson/rpmalloc
7 
8    This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9 
10 */
11 
12 #include "rpmalloc.h"
13 
14 ////////////
15 ///
16 /// Build time configurable limits
17 ///
18 //////
19 
20 #ifndef HEAP_ARRAY_SIZE
21 //! Size of heap hashmap
22 #define HEAP_ARRAY_SIZE           47
23 #endif
24 #ifndef ENABLE_THREAD_CACHE
25 //! Enable per-thread cache
26 #define ENABLE_THREAD_CACHE       1
27 #endif
28 #ifndef ENABLE_GLOBAL_CACHE
29 //! Enable global cache shared between all threads, requires thread cache
30 #define ENABLE_GLOBAL_CACHE       1
31 #endif
32 #ifndef ENABLE_VALIDATE_ARGS
33 //! Enable validation of args to public entry points
34 #define ENABLE_VALIDATE_ARGS      0
35 #endif
36 #ifndef ENABLE_STATISTICS
37 //! Enable statistics collection
38 #define ENABLE_STATISTICS         0
39 #endif
40 #ifndef ENABLE_ASSERTS
41 //! Enable asserts
42 #define ENABLE_ASSERTS            0
43 #endif
44 #ifndef ENABLE_OVERRIDE
45 //! Override standard library malloc/free and new/delete entry points
46 #define ENABLE_OVERRIDE           0
47 #endif
48 #ifndef ENABLE_PRELOAD
49 //! Support preloading
50 #define ENABLE_PRELOAD            0
51 #endif
52 #ifndef DISABLE_UNMAP
53 //! Disable unmapping memory pages
54 #define DISABLE_UNMAP             0
55 #endif
56 #ifndef DEFAULT_SPAN_MAP_COUNT
57 //! Default number of spans to map in call to map more virtual memory (default values yield 4MiB here)
58 #define DEFAULT_SPAN_MAP_COUNT    64
59 #endif
60 
61 #if ENABLE_THREAD_CACHE
62 #ifndef ENABLE_UNLIMITED_CACHE
63 //! Unlimited thread and global cache
64 #define ENABLE_UNLIMITED_CACHE    0
65 #endif
66 #ifndef ENABLE_UNLIMITED_THREAD_CACHE
67 //! Unlimited cache disables any thread cache limitations
68 #define ENABLE_UNLIMITED_THREAD_CACHE ENABLE_UNLIMITED_CACHE
69 #endif
70 #if !ENABLE_UNLIMITED_THREAD_CACHE
71 #ifndef THREAD_CACHE_MULTIPLIER
72 //! Multiplier for thread cache (cache limit will be span release count multiplied by this value)
73 #define THREAD_CACHE_MULTIPLIER 16
74 #endif
75 #ifndef ENABLE_ADAPTIVE_THREAD_CACHE
76 //! Enable adaptive size of per-thread cache (still bounded by THREAD_CACHE_MULTIPLIER hard limit)
77 #define ENABLE_ADAPTIVE_THREAD_CACHE  0
78 #endif
79 #endif
80 #endif
81 
82 #if ENABLE_GLOBAL_CACHE && ENABLE_THREAD_CACHE
83 #if DISABLE_UNMAP
84 #undef ENABLE_UNLIMITED_GLOBAL_CACHE
85 #define ENABLE_UNLIMITED_GLOBAL_CACHE 1
86 #endif
87 #ifndef ENABLE_UNLIMITED_GLOBAL_CACHE
88 //! Unlimited cache disables any global cache limitations
89 #define ENABLE_UNLIMITED_GLOBAL_CACHE ENABLE_UNLIMITED_CACHE
90 #endif
91 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
92 //! Multiplier for global cache (cache limit will be span release count multiplied by this value)
93 #define GLOBAL_CACHE_MULTIPLIER (THREAD_CACHE_MULTIPLIER * 6)
94 #endif
95 #else
96 #  undef ENABLE_GLOBAL_CACHE
97 #  define ENABLE_GLOBAL_CACHE 0
98 #endif
99 
100 #if !ENABLE_THREAD_CACHE || ENABLE_UNLIMITED_THREAD_CACHE
101 #  undef ENABLE_ADAPTIVE_THREAD_CACHE
102 #  define ENABLE_ADAPTIVE_THREAD_CACHE 0
103 #endif
104 
105 #if DISABLE_UNMAP && !ENABLE_GLOBAL_CACHE
106 #  error Must use global cache if unmap is disabled
107 #endif
108 
109 #if defined( _WIN32 ) || defined( __WIN32__ ) || defined( _WIN64 )
110 #  define PLATFORM_WINDOWS 1
111 #  define PLATFORM_POSIX 0
112 #else
113 #  define PLATFORM_WINDOWS 0
114 #  define PLATFORM_POSIX 1
115 #endif
116 
117 /// Platform and arch specifics
118 #if defined(_MSC_VER) && !defined(__clang__)
119 #  ifndef FORCEINLINE
120 #    define FORCEINLINE inline __forceinline
121 #  endif
122 #  define _Static_assert static_assert
123 #else
124 #  ifndef FORCEINLINE
125 #    define FORCEINLINE inline __attribute__((__always_inline__))
126 #  endif
127 #endif
128 #if PLATFORM_WINDOWS
129 #  ifndef WIN32_LEAN_AND_MEAN
130 #    define WIN32_LEAN_AND_MEAN
131 #  endif
132 #  include <Windows.h>
133 #  if ENABLE_VALIDATE_ARGS
134 #    include <Intsafe.h>
135 #  endif
136 #else
137 #  include <unistd.h>
138 #  include <stdio.h>
139 #  include <stdlib.h>
140 #  if defined(__APPLE__)
141 #    include <mach/mach_vm.h>
142 #    include <mach/vm_statistics.h>
143 #    include <pthread.h>
144 #  endif
145 #  if defined(__HAIKU__)
146 #    include <OS.h>
147 #    include <pthread.h>
148 #  endif
149 #endif
150 
151 #include <stdint.h>
152 #include <string.h>
153 #include <errno.h>
154 
155 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
156 #include <fibersapi.h>
157 static DWORD fls_key;
158 static void NTAPI
_rpmalloc_thread_destructor(void * value)159 _rpmalloc_thread_destructor(void *value) {
160   if (value)
161     rpmalloc_thread_finalize();
162 }
163 #endif
164 
165 #if PLATFORM_POSIX
166 #  include <sys/mman.h>
167 #  include <sched.h>
168 #  ifdef __FreeBSD__
169 #    include <sys/sysctl.h>
170 #    define MAP_HUGETLB MAP_ALIGNED_SUPER
171 #  endif
172 #  ifndef MAP_UNINITIALIZED
173 #    define MAP_UNINITIALIZED 0
174 #  endif
175 #endif
176 #include <errno.h>
177 
178 #if ENABLE_ASSERTS
179 #  undef NDEBUG
180 #  if defined(_MSC_VER) && !defined(_DEBUG)
181 #    define _DEBUG
182 #  endif
183 #  include <assert.h>
184 #else
185 #  undef  assert
186 #  define assert(x) do {} while(0)
187 #endif
188 #if ENABLE_STATISTICS
189 #  include <stdio.h>
190 #endif
191 
192 //////
193 ///
194 /// Atomic access abstraction (since MSVC does not do C11 yet)
195 ///
196 //////
197 
198 #if defined(_MSC_VER) && !defined(__clang__)
199 
200 typedef volatile long      atomic32_t;
201 typedef volatile long long atomic64_t;
202 typedef volatile void     *atomicptr_t;
203 
atomic_load32(atomic32_t * src)204 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return *src; }
atomic_store32(atomic32_t * dst,int32_t val)205 static FORCEINLINE void    atomic_store32(atomic32_t *dst, int32_t val) { *dst = val; }
atomic_incr32(atomic32_t * val)206 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return (int32_t)InterlockedIncrement(val); }
atomic_decr32(atomic32_t * val)207 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return (int32_t)InterlockedDecrement(val); }
208 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
atomic_load64(atomic64_t * src)209 static FORCEINLINE int64_t atomic_load64(atomic64_t *src) { return *src; }
atomic_add64(atomic64_t * val,int64_t add)210 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return (int64_t)InterlockedExchangeAdd64(val, add) + add; }
211 #endif
atomic_add32(atomic32_t * val,int32_t add)212 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return (int32_t)InterlockedExchangeAdd(val, add) + add; }
atomic_load_ptr(atomicptr_t * src)213 static FORCEINLINE void   *atomic_load_ptr(atomicptr_t *src) { return (void *)*src; }
atomic_store_ptr(atomicptr_t * dst,void * val)214 static FORCEINLINE void    atomic_store_ptr(atomicptr_t *dst, void *val) { *dst = val; }
atomic_store_ptr_release(atomicptr_t * dst,void * val)215 static FORCEINLINE void    atomic_store_ptr_release(atomicptr_t *dst, void *val) { *dst = val; }
atomic_cas_ptr(atomicptr_t * dst,void * val,void * ref)216 static FORCEINLINE int     atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return (InterlockedCompareExchangePointer((void *volatile *)dst, val, ref) == ref) ? 1 : 0; }
atomic_cas_ptr_acquire(atomicptr_t * dst,void * val,void * ref)217 static FORCEINLINE int     atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_cas_ptr(dst, val, ref); }
218 
219 #define EXPECTED(x) (x)
220 #define UNEXPECTED(x) (x)
221 
222 #else
223 
224 #include <stdatomic.h>
225 
226 typedef volatile _Atomic(int32_t) atomic32_t;
227 typedef volatile _Atomic(int64_t) atomic64_t;
228 typedef volatile _Atomic(void *) atomicptr_t;
229 
atomic_load32(atomic32_t * src)230 static FORCEINLINE int32_t atomic_load32(atomic32_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
atomic_store32(atomic32_t * dst,int32_t val)231 static FORCEINLINE void    atomic_store32(atomic32_t *dst, int32_t val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
atomic_incr32(atomic32_t * val)232 static FORCEINLINE int32_t atomic_incr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, 1, memory_order_relaxed) + 1; }
atomic_decr32(atomic32_t * val)233 static FORCEINLINE int32_t atomic_decr32(atomic32_t *val) { return atomic_fetch_add_explicit(val, -1, memory_order_relaxed) - 1; }
234 #if ENABLE_STATISTICS || ENABLE_ADAPTIVE_THREAD_CACHE
atomic_load64(atomic64_t * val)235 static FORCEINLINE int64_t atomic_load64(atomic64_t *val) { return atomic_load_explicit(val, memory_order_relaxed); }
atomic_add64(atomic64_t * val,int64_t add)236 static FORCEINLINE int64_t atomic_add64(atomic64_t *val, int64_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
237 #endif
atomic_add32(atomic32_t * val,int32_t add)238 static FORCEINLINE int32_t atomic_add32(atomic32_t *val, int32_t add) { return atomic_fetch_add_explicit(val, add, memory_order_relaxed) + add; }
atomic_load_ptr(atomicptr_t * src)239 static FORCEINLINE void   *atomic_load_ptr(atomicptr_t *src) { return atomic_load_explicit(src, memory_order_relaxed); }
atomic_store_ptr(atomicptr_t * dst,void * val)240 static FORCEINLINE void    atomic_store_ptr(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_relaxed); }
atomic_store_ptr_release(atomicptr_t * dst,void * val)241 static FORCEINLINE void    atomic_store_ptr_release(atomicptr_t *dst, void *val) { atomic_store_explicit(dst, val, memory_order_release); }
atomic_cas_ptr(atomicptr_t * dst,void * val,void * ref)242 static FORCEINLINE int     atomic_cas_ptr(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_relaxed, memory_order_relaxed); }
atomic_cas_ptr_acquire(atomicptr_t * dst,void * val,void * ref)243 static FORCEINLINE int     atomic_cas_ptr_acquire(atomicptr_t *dst, void *val, void *ref) { return atomic_compare_exchange_weak_explicit(dst, &ref, val, memory_order_acquire, memory_order_relaxed); }
244 
245 #define EXPECTED(x) __builtin_expect((x), 1)
246 #define UNEXPECTED(x) __builtin_expect((x), 0)
247 
248 #endif
249 
250 ////////////
251 ///
252 /// Statistics related functions (evaluate to nothing when statistics not enabled)
253 ///
254 //////
255 
256 #if ENABLE_STATISTICS
257 #  define _rpmalloc_stat_inc(counter) atomic_incr32(counter)
258 #  define _rpmalloc_stat_dec(counter) atomic_decr32(counter)
259 #  define _rpmalloc_stat_add(counter, value) atomic_add32(counter, (int32_t)(value))
260 #  define _rpmalloc_stat_add64(counter, value) atomic_add64(counter, (int64_t)(value))
261 #  define _rpmalloc_stat_add_peak(counter, value, peak) do { int32_t _cur_count = atomic_add32(counter, (int32_t)(value)); if (_cur_count > (peak)) peak = _cur_count; } while (0)
262 #  define _rpmalloc_stat_sub(counter, value) atomic_add32(counter, -(int32_t)(value))
263 #  define _rpmalloc_stat_inc_alloc(heap, class_idx) do { \
264 	int32_t alloc_current = atomic_incr32(&heap->size_class_use[class_idx].alloc_current); \
265 	if (alloc_current > heap->size_class_use[class_idx].alloc_peak) \
266 		heap->size_class_use[class_idx].alloc_peak = alloc_current; \
267 	atomic_incr32(&heap->size_class_use[class_idx].alloc_total); \
268 } while(0)
269 #  define _rpmalloc_stat_inc_free(heap, class_idx) do { \
270 	atomic_decr32(&heap->size_class_use[class_idx].alloc_current); \
271 	atomic_incr32(&heap->size_class_use[class_idx].free_total); \
272 } while(0)
273 #else
274 #  define _rpmalloc_stat_inc(counter) do {} while(0)
275 #  define _rpmalloc_stat_dec(counter) do {} while(0)
276 #  define _rpmalloc_stat_add(counter, value) do {} while(0)
277 #  define _rpmalloc_stat_add64(counter, value) do {} while(0)
278 #  define _rpmalloc_stat_add_peak(counter, value, peak) do {} while (0)
279 #  define _rpmalloc_stat_sub(counter, value) do {} while(0)
280 #  define _rpmalloc_stat_inc_alloc(heap, class_idx) do {} while(0)
281 #  define _rpmalloc_stat_inc_free(heap, class_idx) do {} while(0)
282 #endif
283 
284 ///
285 /// Preconfigured limits and sizes
286 ///
287 
288 //! Granularity of a small allocation block (must be power of two)
289 #define SMALL_GRANULARITY         16
290 //! Small granularity shift count
291 #define SMALL_GRANULARITY_SHIFT   4
292 //! Number of small block size classes
293 #define SMALL_CLASS_COUNT         65
294 //! Maximum size of a small block
295 #define SMALL_SIZE_LIMIT          (SMALL_GRANULARITY * (SMALL_CLASS_COUNT - 1))
296 //! Granularity of a medium allocation block
297 #define MEDIUM_GRANULARITY        512
298 //! Medium granularity shift count
299 #define MEDIUM_GRANULARITY_SHIFT  9
300 //! Number of medium block size classes
301 #define MEDIUM_CLASS_COUNT        61
302 //! Total number of small + medium size classes
303 #define SIZE_CLASS_COUNT          (SMALL_CLASS_COUNT + MEDIUM_CLASS_COUNT)
304 //! Number of large block size classes
305 #define LARGE_CLASS_COUNT         32
306 //! Maximum size of a medium block
307 #define MEDIUM_SIZE_LIMIT         (SMALL_SIZE_LIMIT + (MEDIUM_GRANULARITY * MEDIUM_CLASS_COUNT))
308 //! Maximum size of a large block
309 #define LARGE_SIZE_LIMIT          ((LARGE_CLASS_COUNT * _memory_span_size) - SPAN_HEADER_SIZE)
310 //! ABA protection size in orhpan heap list (also becomes limit of smallest page size)
311 #define HEAP_ORPHAN_ABA_SIZE      512
312 //! Size of a span header (must be a multiple of SMALL_GRANULARITY and a power of two)
313 #define SPAN_HEADER_SIZE          128
314 
315 _Static_assert((SMALL_GRANULARITY & (SMALL_GRANULARITY - 1)) == 0, "Small granularity must be power of two");
316 _Static_assert((SPAN_HEADER_SIZE & (SPAN_HEADER_SIZE - 1)) == 0, "Span header size must be power of two");
317 
318 #if ENABLE_VALIDATE_ARGS
319 //! Maximum allocation size to avoid integer overflow
320 #undef  MAX_ALLOC_SIZE
321 #define MAX_ALLOC_SIZE            (((size_t)-1) - _memory_span_size)
322 #endif
323 
324 #define pointer_offset(ptr, ofs) (void*)((char*)(ptr) + (ptrdiff_t)(ofs))
325 #define pointer_diff(first, second) (ptrdiff_t)((const char*)(first) - (const char*)(second))
326 
327 #define INVALID_POINTER ((void*)((uintptr_t)-1))
328 
329 #define SIZE_CLASS_LARGE SIZE_CLASS_COUNT
330 #define SIZE_CLASS_HUGE ((uint32_t)-1)
331 
332 ////////////
333 ///
334 /// Data types
335 ///
336 //////
337 
338 //! A memory heap, per thread
339 typedef struct heap_t heap_t;
340 //! Span of memory pages
341 typedef struct span_t span_t;
342 //! Span list
343 typedef struct span_list_t span_list_t;
344 //! Span active data
345 typedef struct span_active_t span_active_t;
346 //! Size class definition
347 typedef struct size_class_t size_class_t;
348 //! Global cache
349 typedef struct global_cache_t global_cache_t;
350 
351 //! Flag indicating span is the first (master) span of a split superspan
352 #define SPAN_FLAG_MASTER 1U
353 //! Flag indicating span is a secondary (sub) span of a split superspan
354 #define SPAN_FLAG_SUBSPAN 2U
355 //! Flag indicating span has blocks with increased alignment
356 #define SPAN_FLAG_ALIGNED_BLOCKS 4U
357 
358 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
359 struct span_use_t {
360   //! Current number of spans used (actually used, not in cache)
361   atomic32_t current;
362   //! High water mark of spans used
363   atomic32_t high;
364 #if ENABLE_STATISTICS
365   //! Number of spans transitioned to global cache
366   atomic32_t spans_to_global;
367   //! Number of spans transitioned from global cache
368   atomic32_t spans_from_global;
369   //! Number of spans transitioned to thread cache
370   atomic32_t spans_to_cache;
371   //! Number of spans transitioned from thread cache
372   atomic32_t spans_from_cache;
373   //! Number of spans transitioned to reserved state
374   atomic32_t spans_to_reserved;
375   //! Number of spans transitioned from reserved state
376   atomic32_t spans_from_reserved;
377   //! Number of raw memory map calls
378   atomic32_t spans_map_calls;
379 #endif
380 };
381 typedef struct span_use_t span_use_t;
382 #endif
383 
384 #if ENABLE_STATISTICS
385 struct size_class_use_t {
386   //! Current number of allocations
387   atomic32_t alloc_current;
388   //! Peak number of allocations
389   int32_t alloc_peak;
390   //! Total number of allocations
391   atomic32_t alloc_total;
392   //! Total number of frees
393   atomic32_t free_total;
394   //! Number of spans in use
395   atomic32_t spans_current;
396   //! Number of spans transitioned to cache
397   int32_t spans_peak;
398   //! Number of spans transitioned to cache
399   atomic32_t spans_to_cache;
400   //! Number of spans transitioned from cache
401   atomic32_t spans_from_cache;
402   //! Number of spans transitioned from reserved state
403   atomic32_t spans_from_reserved;
404   //! Number of spans mapped
405   atomic32_t spans_map_calls;
406 };
407 typedef struct size_class_use_t size_class_use_t;
408 #endif
409 
410 // A span can either represent a single span of memory pages with size declared by span_map_count configuration variable,
411 // or a set of spans in a continuous region, a super span. Any reference to the term "span" usually refers to both a single
412 // span or a super span. A super span can further be divided into multiple spans (or this, super spans), where the first
413 // (super)span is the master and subsequent (super)spans are subspans. The master span keeps track of how many subspans
414 // that are still alive and mapped in virtual memory, and once all subspans and master have been unmapped the entire
415 // superspan region is released and unmapped (on Windows for example, the entire superspan range has to be released
416 // in the same call to release the virtual memory range, but individual subranges can be decommitted individually
417 // to reduce physical memory use).
418 struct span_t {
419   //! Free list
420   void       *free_list;
421   //! Total block count of size class
422   uint32_t    block_count;
423   //! Size class
424   uint32_t    size_class;
425   //! Index of last block initialized in free list
426   uint32_t    free_list_limit;
427   //! Number of used blocks remaining when in partial state
428   uint32_t    used_count;
429   //! Deferred free list
430   atomicptr_t free_list_deferred;
431   //! Size of deferred free list, or list of spans when part of a cache list
432   uint32_t    list_size;
433   //! Size of a block
434   uint32_t    block_size;
435   //! Flags and counters
436   uint32_t    flags;
437   //! Number of spans
438   uint32_t    span_count;
439   //! Total span counter for master spans
440   uint32_t    total_spans;
441   //! Offset from master span for subspans
442   uint32_t    offset_from_master;
443   //! Remaining span counter, for master spans
444   atomic32_t  remaining_spans;
445   //! Alignment offset
446   uint32_t    align_offset;
447   //! Owning heap
448   heap_t     *heap;
449   //! Next span
450   span_t     *next;
451   //! Previous span
452   span_t     *prev;
453 };
454 _Static_assert(sizeof(span_t) <= SPAN_HEADER_SIZE, "span size mismatch");
455 
456 // Control structure for a heap, either a thread heap or a first class heap if enabled
457 struct heap_t {
458   //! Owning thread ID
459   uintptr_t    owner_thread;
460   //! Free list of active span
461   void        *free_list[SIZE_CLASS_COUNT];
462   //! Double linked list of partially used spans with free blocks for each size class.
463   //  Previous span pointer in head points to tail span of list.
464   span_t      *partial_span[SIZE_CLASS_COUNT];
465 #if RPMALLOC_FIRST_CLASS_HEAPS
466   //! Double linked list of fully utilized spans with free blocks for each size class.
467   //  Previous span pointer in head points to tail span of list.
468   span_t      *full_span[SIZE_CLASS_COUNT];
469 #endif
470 #if ENABLE_THREAD_CACHE
471   //! List of free spans (single linked list)
472   span_t      *span_cache[LARGE_CLASS_COUNT];
473 #endif
474   //! List of deferred free spans (single linked list)
475   atomicptr_t  span_free_deferred;
476 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
477   //! Current and high water mark of spans used per span count
478   span_use_t   span_use[LARGE_CLASS_COUNT];
479 #endif
480 #if RPMALLOC_FIRST_CLASS_HEAPS
481   //! Double linked list of large and huge spans allocated by this heap
482   span_t      *large_huge_span;
483 #endif
484   //! Number of full spans
485   size_t       full_span_count;
486   //! Mapped but unused spans
487   span_t      *span_reserve;
488   //! Master span for mapped but unused spans
489   span_t      *span_reserve_master;
490   //! Number of mapped but unused spans
491   size_t       spans_reserved;
492   //! Next heap in id list
493   heap_t      *next_heap;
494   //! Next heap in orphan list
495   heap_t      *next_orphan;
496   //! Memory pages alignment offset
497   size_t       align_offset;
498   //! Heap ID
499   int32_t      id;
500   //! Finalization state flag
501   int          finalize;
502   //! Master heap owning the memory pages
503   heap_t      *master_heap;
504   //! Child count
505   atomic32_t   child_count;
506 #if ENABLE_STATISTICS
507   //! Number of bytes transitioned thread -> global
508   atomic64_t   thread_to_global;
509   //! Number of bytes transitioned global -> thread
510   atomic64_t   global_to_thread;
511   //! Allocation stats per size class
512   size_class_use_t size_class_use[SIZE_CLASS_COUNT + 1];
513 #endif
514 };
515 
516 // Size class for defining a block size bucket
517 struct size_class_t {
518   //! Size of blocks in this class
519   uint32_t block_size;
520   //! Number of blocks in each chunk
521   uint16_t block_count;
522   //! Class index this class is merged with
523   uint16_t class_idx;
524 };
525 _Static_assert(sizeof(size_class_t) == 8, "Size class size mismatch");
526 
527 struct global_cache_t {
528   //! Cache list pointer
529   atomicptr_t cache;
530   //! Cache size
531   atomic32_t size;
532   //! ABA counter
533   atomic32_t counter;
534 };
535 
536 ////////////
537 ///
538 /// Global data
539 ///
540 //////
541 
542 //! Initialized flag
543 static int _rpmalloc_initialized;
544 //! Configuration
545 static rpmalloc_config_t _memory_config;
546 //! Memory page size
547 static size_t _memory_page_size;
548 //! Shift to divide by page size
549 static size_t _memory_page_size_shift;
550 //! Granularity at which memory pages are mapped by OS
551 static size_t _memory_map_granularity;
552 #if RPMALLOC_CONFIGURABLE
553 //! Size of a span of memory pages
554 static size_t _memory_span_size;
555 //! Shift to divide by span size
556 static size_t _memory_span_size_shift;
557 //! Mask to get to start of a memory span
558 static uintptr_t _memory_span_mask;
559 #else
560 //! Hardwired span size (64KiB)
561 #define _memory_span_size (64 * 1024)
562 #define _memory_span_size_shift 16
563 #define _memory_span_mask (~((uintptr_t)(_memory_span_size - 1)))
564 #endif
565 //! Number of spans to map in each map call
566 static size_t _memory_span_map_count;
567 //! Number of spans to release from thread cache to global cache (single spans)
568 static size_t _memory_span_release_count;
569 //! Number of spans to release from thread cache to global cache (large multiple spans)
570 static size_t _memory_span_release_count_large;
571 //! Global size classes
572 static size_class_t _memory_size_class[SIZE_CLASS_COUNT];
573 //! Run-time size limit of medium blocks
574 static size_t _memory_medium_size_limit;
575 //! Heap ID counter
576 static atomic32_t _memory_heap_id;
577 //! Huge page support
578 static int _memory_huge_pages;
579 #if ENABLE_GLOBAL_CACHE
580 //! Global span cache
581 static global_cache_t _memory_span_cache[LARGE_CLASS_COUNT];
582 #endif
583 //! All heaps
584 static atomicptr_t _memory_heaps[HEAP_ARRAY_SIZE];
585 //! Orphaned heaps
586 static atomicptr_t _memory_orphan_heaps;
587 #if RPMALLOC_FIRST_CLASS_HEAPS
588 //! Orphaned heaps (first class heaps)
589 static atomicptr_t _memory_first_class_orphan_heaps;
590 #endif
591 //! Running orphan counter to avoid ABA issues in linked list
592 static atomic32_t _memory_orphan_counter;
593 #if ENABLE_STATISTICS
594 //! Active heap count
595 static atomic32_t _memory_active_heaps;
596 //! Number of currently mapped memory pages
597 static atomic32_t _mapped_pages;
598 //! Peak number of concurrently mapped memory pages
599 static int32_t _mapped_pages_peak;
600 //! Number of mapped master spans
601 static atomic32_t _master_spans;
602 //! Number of currently unused spans
603 static atomic32_t _reserved_spans;
604 //! Running counter of total number of mapped memory pages since start
605 static atomic32_t _mapped_total;
606 //! Running counter of total number of unmapped memory pages since start
607 static atomic32_t _unmapped_total;
608 //! Number of currently mapped memory pages in OS calls
609 static atomic32_t _mapped_pages_os;
610 //! Number of currently allocated pages in huge allocations
611 static atomic32_t _huge_pages_current;
612 //! Peak number of currently allocated pages in huge allocations
613 static int32_t _huge_pages_peak;
614 #endif
615 
616 ////////////
617 ///
618 /// Thread local heap and ID
619 ///
620 //////
621 
622 //! Current thread heap
623 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
624 static pthread_key_t _memory_thread_heap;
625 #else
626 #  ifdef _MSC_VER
627 #    define _Thread_local __declspec(thread)
628 #    define TLS_MODEL
629 #  else
630 #    define TLS_MODEL __attribute__((tls_model("initial-exec")))
631 #    if !defined(__clang__) && defined(__GNUC__)
632 #      define _Thread_local __thread
633 #    endif
634 #  endif
635 static _Thread_local heap_t *_memory_thread_heap TLS_MODEL;
636 #endif
637 
638 static inline heap_t *
get_thread_heap_raw(void)639 get_thread_heap_raw(void) {
640 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
641   return pthread_getspecific(_memory_thread_heap);
642 #else
643   return _memory_thread_heap;
644 #endif
645 }
646 
647 //! Get the current thread heap
648 static inline heap_t *
get_thread_heap(void)649 get_thread_heap(void) {
650   heap_t *heap = get_thread_heap_raw();
651 #if ENABLE_PRELOAD
652   if (EXPECTED(heap != 0))
653     return heap;
654   rpmalloc_initialize();
655   return get_thread_heap_raw();
656 #else
657   return heap;
658 #endif
659 }
660 
661 //! Fast thread ID
662 static inline uintptr_t
get_thread_id(void)663 get_thread_id(void) {
664 #if defined(_WIN32)
665   return (uintptr_t)((void *)NtCurrentTeb());
666 #elif defined(__GNUC__) || defined(__clang__)
667   uintptr_t tid;
668 #  if defined(__i386__)
669   __asm__("movl %%gs:0, %0" : "=r"(tid) : :);
670 #  elif defined(__MACH__)
671   __asm__("movq %%gs:0, %0" : "=r"(tid) : :);
672 #  elif defined(__x86_64__)
673   __asm__("movq %%fs:0, %0" : "=r"(tid) : :);
674 #  elif defined(__arm__)
675   __asm__ volatile("mrc p15, 0, %0, c13, c0, 3" : "=r"(tid));
676 #  elif defined(__aarch64__)
677   __asm__ volatile("mrs %0, tpidr_el0" : "=r"(tid));
678 #  else
679   tid = (uintptr_t)get_thread_heap_raw();
680 #  endif
681   return tid;
682 #else
683   return (uintptr_t)get_thread_heap_raw();
684 #endif
685 }
686 
687 //! Set the current thread heap
688 static void
set_thread_heap(heap_t * heap)689 set_thread_heap(heap_t *heap) {
690 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
691   pthread_setspecific(_memory_thread_heap, heap);
692 #else
693   _memory_thread_heap = heap;
694 #endif
695   if (heap)
696     heap->owner_thread = get_thread_id();
697 }
698 
699 ////////////
700 ///
701 /// Low level memory map/unmap
702 ///
703 //////
704 
705 //! Map more virtual memory
706 //  size is number of bytes to map
707 //  offset receives the offset in bytes from start of mapped region
708 //  returns address to start of mapped region to use
709 static void *
_rpmalloc_mmap(size_t size,size_t * offset)710 _rpmalloc_mmap(size_t size, size_t *offset) {
711   assert(!(size % _memory_page_size));
712   assert(size >= _memory_page_size);
713   _rpmalloc_stat_add_peak(&_mapped_pages, (size >> _memory_page_size_shift), _mapped_pages_peak);
714   _rpmalloc_stat_add(&_mapped_total, (size >> _memory_page_size_shift));
715   return _memory_config.memory_map(size, offset);
716 }
717 
718 //! Unmap virtual memory
719 //  address is the memory address to unmap, as returned from _memory_map
720 //  size is the number of bytes to unmap, which might be less than full region for a partial unmap
721 //  offset is the offset in bytes to the actual mapped region, as set by _memory_map
722 //  release is set to 0 for partial unmap, or size of entire range for a full unmap
723 static void
_rpmalloc_unmap(void * address,size_t size,size_t offset,size_t release)724 _rpmalloc_unmap(void *address, size_t size, size_t offset, size_t release) {
725   assert(!release || (release >= size));
726   assert(!release || (release >= _memory_page_size));
727   if (release) {
728     assert(!(release % _memory_page_size));
729     _rpmalloc_stat_sub(&_mapped_pages, (release >> _memory_page_size_shift));
730     _rpmalloc_stat_add(&_unmapped_total, (release >> _memory_page_size_shift));
731   }
732   _memory_config.memory_unmap(address, size, offset, release);
733 }
734 
735 //! Default implementation to map new pages to virtual memory
736 static void *
_rpmalloc_mmap_os(size_t size,size_t * offset)737 _rpmalloc_mmap_os(size_t size, size_t *offset) {
738   //Either size is a heap (a single page) or a (multiple) span - we only need to align spans, and only if larger than map granularity
739   size_t padding = ((size >= _memory_span_size) && (_memory_span_size > _memory_map_granularity)) ? _memory_span_size : 0;
740   assert(size >= _memory_page_size);
741 #if PLATFORM_WINDOWS
742   //Ok to MEM_COMMIT - according to MSDN, "actual physical pages are not allocated unless/until the virtual addresses are actually accessed"
743   void *ptr = VirtualAlloc(0, size + padding, (_memory_huge_pages ? MEM_LARGE_PAGES : 0) | MEM_RESERVE | MEM_COMMIT,
744                            PAGE_READWRITE);
745   if (!ptr) {
746     assert(ptr && "Failed to map virtual memory block");
747     return 0;
748   }
749 #else
750   int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED;
751 #  if defined(__APPLE__)
752   int fd = (int)VM_MAKE_TAG(240U);
753   if (_memory_huge_pages)
754     fd |= VM_FLAGS_SUPERPAGE_SIZE_2MB;
755   void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, fd, 0);
756 #  elif defined(MAP_HUGETLB)
757   void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, (_memory_huge_pages ? MAP_HUGETLB : 0) | flags, -1, 0);
758 #  else
759   void *ptr = mmap(0, size + padding, PROT_READ | PROT_WRITE, flags, -1, 0);
760 #  endif
761   if ((ptr == MAP_FAILED) || !ptr) {
762     assert("Failed to map virtual memory block" == 0);
763     return 0;
764   }
765 #endif
766   _rpmalloc_stat_add(&_mapped_pages_os, (int32_t)((size + padding) >> _memory_page_size_shift));
767   if (padding) {
768     size_t final_padding = padding - ((uintptr_t)ptr & ~_memory_span_mask);
769     assert(final_padding <= _memory_span_size);
770     assert(final_padding <= padding);
771     assert(!(final_padding % 8));
772     ptr = pointer_offset(ptr, final_padding);
773     *offset = final_padding >> 3;
774   }
775   assert((size < _memory_span_size) || !((uintptr_t)ptr & ~_memory_span_mask));
776   return ptr;
777 }
778 
779 //! Default implementation to unmap pages from virtual memory
780 static void
_rpmalloc_unmap_os(void * address,size_t size,size_t offset,size_t release)781 _rpmalloc_unmap_os(void *address, size_t size, size_t offset, size_t release) {
782   assert(release || (offset == 0));
783   assert(!release || (release >= _memory_page_size));
784   assert(size >= _memory_page_size);
785   if (release && offset) {
786     offset <<= 3;
787     address = pointer_offset(address, -(int32_t)offset);
788 #if PLATFORM_POSIX
789     //Padding is always one span size
790     release += _memory_span_size;
791 #endif
792   }
793 #if !DISABLE_UNMAP
794 #if PLATFORM_WINDOWS
795   if (!VirtualFree(address, release ? 0 : size, release ? MEM_RELEASE : MEM_DECOMMIT)) {
796     assert(address && "Failed to unmap virtual memory block");
797   }
798 #else
799   if (release) {
800     if (munmap(address, release)) {
801       assert("Failed to unmap virtual memory block" == 0);
802     }
803   } else {
804 #if defined(POSIX_MADV_FREE)
805     if (posix_madvise(address, size, POSIX_MADV_FREE))
806 #endif
807       if (posix_madvise(address, size, POSIX_MADV_DONTNEED)) {
808         assert("Failed to madvise virtual memory block as free" == 0);
809       }
810   }
811 #endif
812 #endif
813   if (release)
814     _rpmalloc_stat_sub(&_mapped_pages_os, release >> _memory_page_size_shift);
815 }
816 
817 
818 ////////////
819 ///
820 /// Span linked list management
821 ///
822 //////
823 
824 #if ENABLE_THREAD_CACHE
825 
826 static void
827 _rpmalloc_span_unmap(span_t *span);
828 
829 //! Unmap a single linked list of spans
830 static void
_rpmalloc_span_list_unmap_all(span_t * span)831 _rpmalloc_span_list_unmap_all(span_t *span) {
832   size_t list_size = span->list_size;
833   for (size_t ispan = 0; ispan < list_size; ++ispan) {
834     span_t *next_span = span->next;
835     _rpmalloc_span_unmap(span);
836     span = next_span;
837   }
838   assert(!span);
839 }
840 
841 //! Add span to head of single linked span list
842 static size_t
_rpmalloc_span_list_push(span_t ** head,span_t * span)843 _rpmalloc_span_list_push(span_t **head, span_t *span) {
844   span->next = *head;
845   if (*head)
846     span->list_size = (*head)->list_size + 1;
847   else
848     span->list_size = 1;
849   *head = span;
850   return span->list_size;
851 }
852 
853 //! Remove span from head of single linked span list, returns the new list head
854 static span_t *
_rpmalloc_span_list_pop(span_t ** head)855 _rpmalloc_span_list_pop(span_t **head) {
856   span_t *span = *head;
857   span_t *next_span = 0;
858   if (span->list_size > 1) {
859     assert(span->next);
860     next_span = span->next;
861     assert(next_span);
862     next_span->list_size = span->list_size - 1;
863   }
864   *head = next_span;
865   return span;
866 }
867 
868 //! Split a single linked span list
869 static span_t *
_rpmalloc_span_list_split(span_t * span,size_t limit)870 _rpmalloc_span_list_split(span_t *span, size_t limit) {
871   span_t *next = 0;
872   if (limit < 2)
873     limit = 2;
874   if (span->list_size > limit) {
875     uint32_t list_size = 1;
876     span_t *last = span;
877     next = span->next;
878     while (list_size < limit) {
879       last = next;
880       next = next->next;
881       ++list_size;
882     }
883     last->next = 0;
884     assert(next);
885     next->list_size = span->list_size - list_size;
886     span->list_size = list_size;
887     span->prev = 0;
888   }
889   return next;
890 }
891 
892 #endif
893 
894 //! Add a span to double linked list at the head
895 static void
_rpmalloc_span_double_link_list_add(span_t ** head,span_t * span)896 _rpmalloc_span_double_link_list_add(span_t **head, span_t *span) {
897   if (*head) {
898     span->next = *head;
899     (*head)->prev = span;
900   } else {
901     span->next = 0;
902   }
903   *head = span;
904 }
905 
906 //! Pop head span from double linked list
907 static void
_rpmalloc_span_double_link_list_pop_head(span_t ** head,span_t * span)908 _rpmalloc_span_double_link_list_pop_head(span_t **head, span_t *span) {
909   assert(*head == span);
910   span = *head;
911   *head = span->next;
912 }
913 
914 //! Remove a span from double linked list
915 static void
_rpmalloc_span_double_link_list_remove(span_t ** head,span_t * span)916 _rpmalloc_span_double_link_list_remove(span_t **head, span_t *span) {
917   assert(*head);
918   if (*head == span) {
919     *head = span->next;
920   } else {
921     span_t *next_span = span->next;
922     span_t *prev_span = span->prev;
923     prev_span->next = next_span;
924     if (EXPECTED(next_span != 0)) {
925       next_span->prev = prev_span;
926     }
927   }
928 }
929 
930 
931 ////////////
932 ///
933 /// Span control
934 ///
935 //////
936 
937 static void
938 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span);
939 
940 static void
941 _rpmalloc_heap_finalize(heap_t *heap);
942 
943 static void
944 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count);
945 
946 //! Declare the span to be a subspan and store distance from master span and span count
947 static void
_rpmalloc_span_mark_as_subspan_unless_master(span_t * master,span_t * subspan,size_t span_count)948 _rpmalloc_span_mark_as_subspan_unless_master(span_t *master, span_t *subspan, size_t span_count) {
949   assert((subspan != master) || (subspan->flags & SPAN_FLAG_MASTER));
950   if (subspan != master) {
951     subspan->flags = SPAN_FLAG_SUBSPAN;
952     subspan->offset_from_master = (uint32_t)((uintptr_t)pointer_diff(subspan, master) >> _memory_span_size_shift);
953     subspan->align_offset = 0;
954   }
955   subspan->span_count = (uint32_t)span_count;
956 }
957 
958 //! Use reserved spans to fulfill a memory map request (reserve size must be checked by caller)
959 static span_t *
_rpmalloc_span_map_from_reserve(heap_t * heap,size_t span_count)960 _rpmalloc_span_map_from_reserve(heap_t *heap, size_t span_count) {
961   //Update the heap span reserve
962   span_t *span = heap->span_reserve;
963   heap->span_reserve = (span_t *)pointer_offset(span, span_count * _memory_span_size);
964   heap->spans_reserved -= span_count;
965 
966   _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, span, span_count);
967   if (span_count <= LARGE_CLASS_COUNT)
968     _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_from_reserved);
969 
970   return span;
971 }
972 
973 //! Get the aligned number of spans to map in based on wanted count, configured mapping granularity and the page size
974 static size_t
_rpmalloc_span_align_count(size_t span_count)975 _rpmalloc_span_align_count(size_t span_count) {
976   size_t request_count = (span_count > _memory_span_map_count) ? span_count : _memory_span_map_count;
977   if ((_memory_page_size > _memory_span_size) && ((request_count * _memory_span_size) % _memory_page_size))
978     request_count += _memory_span_map_count - (request_count % _memory_span_map_count);
979   return request_count;
980 }
981 
982 //! Setup a newly mapped span
983 static void
_rpmalloc_span_initialize(span_t * span,size_t total_span_count,size_t span_count,size_t align_offset)984 _rpmalloc_span_initialize(span_t *span, size_t total_span_count, size_t span_count, size_t align_offset) {
985   span->total_spans = (uint32_t)total_span_count;
986   span->span_count = (uint32_t)span_count;
987   span->align_offset = (uint32_t)align_offset;
988   span->flags = SPAN_FLAG_MASTER;
989   atomic_store32(&span->remaining_spans, (int32_t)total_span_count);
990 }
991 
992 //! Map an aligned set of spans, taking configured mapping granularity and the page size into account
993 static span_t *
_rpmalloc_span_map_aligned_count(heap_t * heap,size_t span_count)994 _rpmalloc_span_map_aligned_count(heap_t *heap, size_t span_count) {
995   //If we already have some, but not enough, reserved spans, release those to heap cache and map a new
996   //full set of spans. Otherwise we would waste memory if page size > span size (huge pages)
997   size_t aligned_span_count = _rpmalloc_span_align_count(span_count);
998   size_t align_offset = 0;
999   span_t *span = (span_t *)_rpmalloc_mmap(aligned_span_count * _memory_span_size, &align_offset);
1000   if (!span)
1001     return 0;
1002   _rpmalloc_span_initialize(span, aligned_span_count, span_count, align_offset);
1003   _rpmalloc_stat_add(&_reserved_spans, aligned_span_count);
1004   _rpmalloc_stat_inc(&_master_spans);
1005   if (span_count <= LARGE_CLASS_COUNT)
1006     _rpmalloc_stat_inc(&heap->span_use[span_count - 1].spans_map_calls);
1007   if (aligned_span_count > span_count) {
1008     span_t *reserved_spans = (span_t *)pointer_offset(span, span_count * _memory_span_size);
1009     size_t reserved_count = aligned_span_count - span_count;
1010     if (heap->spans_reserved) {
1011       _rpmalloc_span_mark_as_subspan_unless_master(heap->span_reserve_master, heap->span_reserve, heap->spans_reserved);
1012       _rpmalloc_heap_cache_insert(heap, heap->span_reserve);
1013     }
1014     _rpmalloc_heap_set_reserved_spans(heap, span, reserved_spans, reserved_count);
1015   }
1016   return span;
1017 }
1018 
1019 //! Map in memory pages for the given number of spans (or use previously reserved pages)
1020 static span_t *
_rpmalloc_span_map(heap_t * heap,size_t span_count)1021 _rpmalloc_span_map(heap_t *heap, size_t span_count) {
1022   if (span_count <= heap->spans_reserved)
1023     return _rpmalloc_span_map_from_reserve(heap, span_count);
1024   return _rpmalloc_span_map_aligned_count(heap, span_count);
1025 }
1026 
1027 //! Unmap memory pages for the given number of spans (or mark as unused if no partial unmappings)
1028 static void
_rpmalloc_span_unmap(span_t * span)1029 _rpmalloc_span_unmap(span_t *span) {
1030   assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
1031   assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
1032 
1033   int is_master = !!(span->flags & SPAN_FLAG_MASTER);
1034   span_t *master = is_master ? span : ((span_t *)pointer_offset(span,
1035                                        -(intptr_t)((uintptr_t)span->offset_from_master * _memory_span_size)));
1036   assert(is_master || (span->flags & SPAN_FLAG_SUBSPAN));
1037   assert(master->flags & SPAN_FLAG_MASTER);
1038 
1039   size_t span_count = span->span_count;
1040   if (!is_master) {
1041     //Directly unmap subspans (unless huge pages, in which case we defer and unmap entire page range with master)
1042     assert(span->align_offset == 0);
1043     if (_memory_span_size >= _memory_page_size) {
1044       _rpmalloc_unmap(span, span_count * _memory_span_size, 0, 0);
1045       _rpmalloc_stat_sub(&_reserved_spans, span_count);
1046     }
1047   } else {
1048     //Special double flag to denote an unmapped master
1049     //It must be kept in memory since span header must be used
1050     span->flags |= SPAN_FLAG_MASTER | SPAN_FLAG_SUBSPAN;
1051   }
1052 
1053   if (atomic_add32(&master->remaining_spans, -(int32_t)span_count) <= 0) {
1054     //Everything unmapped, unmap the master span with release flag to unmap the entire range of the super span
1055     assert(!!(master->flags & SPAN_FLAG_MASTER) && !!(master->flags & SPAN_FLAG_SUBSPAN));
1056     size_t unmap_count = master->span_count;
1057     if (_memory_span_size < _memory_page_size)
1058       unmap_count = master->total_spans;
1059     _rpmalloc_stat_sub(&_reserved_spans, unmap_count);
1060     _rpmalloc_stat_sub(&_master_spans, 1);
1061     _rpmalloc_unmap(master, unmap_count * _memory_span_size, master->align_offset, (size_t)master->total_spans * _memory_span_size);
1062   }
1063 }
1064 
1065 //! Move the span (used for small or medium allocations) to the heap thread cache
1066 static void
_rpmalloc_span_release_to_cache(heap_t * heap,span_t * span)1067 _rpmalloc_span_release_to_cache(heap_t *heap, span_t *span) {
1068   assert(heap == span->heap);
1069   assert(span->size_class < SIZE_CLASS_COUNT);
1070 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1071   atomic_decr32(&heap->span_use[0].current);
1072 #endif
1073   _rpmalloc_stat_inc(&heap->span_use[0].spans_to_cache);
1074   _rpmalloc_stat_inc(&heap->size_class_use[span->size_class].spans_to_cache);
1075   _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1076   _rpmalloc_heap_cache_insert(heap, span);
1077 }
1078 
1079 //! Initialize a (partial) free list up to next system memory page, while reserving the first block
1080 //! as allocated, returning number of blocks in list
1081 static uint32_t
free_list_partial_init(void ** list,void ** first_block,void * page_start,void * block_start,uint32_t block_count,uint32_t block_size)1082 free_list_partial_init(void **list, void **first_block, void *page_start, void *block_start,
1083                        uint32_t block_count, uint32_t block_size) {
1084   assert(block_count);
1085   *first_block = block_start;
1086   if (block_count > 1) {
1087     void *free_block = pointer_offset(block_start, block_size);
1088     void *block_end = pointer_offset(block_start, (size_t)block_size * block_count);
1089     //If block size is less than half a memory page, bound init to next memory page boundary
1090     if (block_size < (_memory_page_size >> 1)) {
1091       void *page_end = pointer_offset(page_start, _memory_page_size);
1092       if (page_end < block_end)
1093         block_end = page_end;
1094     }
1095     *list = free_block;
1096     block_count = 2;
1097     void *next_block = pointer_offset(free_block, block_size);
1098     while (next_block < block_end) {
1099       *((void **)free_block) = next_block;
1100       free_block = next_block;
1101       ++block_count;
1102       next_block = pointer_offset(next_block, block_size);
1103     }
1104     *((void **)free_block) = 0;
1105   } else {
1106     *list = 0;
1107   }
1108   return block_count;
1109 }
1110 
1111 //! Initialize an unused span (from cache or mapped) to be new active span, putting the initial free list in heap class free list
1112 static void *
_rpmalloc_span_initialize_new(heap_t * heap,span_t * span,uint32_t class_idx)1113 _rpmalloc_span_initialize_new(heap_t *heap, span_t *span, uint32_t class_idx) {
1114   assert(span->span_count == 1);
1115   size_class_t *size_class = _memory_size_class + class_idx;
1116   span->size_class = class_idx;
1117   span->heap = heap;
1118   span->flags &= ~SPAN_FLAG_ALIGNED_BLOCKS;
1119   span->block_size = size_class->block_size;
1120   span->block_count = size_class->block_count;
1121   span->free_list = 0;
1122   span->list_size = 0;
1123   atomic_store_ptr_release(&span->free_list_deferred, 0);
1124 
1125   //Setup free list. Only initialize one system page worth of free blocks in list
1126   void *block;
1127   span->free_list_limit = free_list_partial_init(&heap->free_list[class_idx], &block,
1128                           span, pointer_offset(span, SPAN_HEADER_SIZE), size_class->block_count, size_class->block_size);
1129   //Link span as partial if there remains blocks to be initialized as free list, or full if fully initialized
1130   if (span->free_list_limit < span->block_count) {
1131     _rpmalloc_span_double_link_list_add(&heap->partial_span[class_idx], span);
1132     span->used_count = span->free_list_limit;
1133   } else {
1134 #if RPMALLOC_FIRST_CLASS_HEAPS
1135     _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1136 #endif
1137     ++heap->full_span_count;
1138     span->used_count = span->block_count;
1139   }
1140   return block;
1141 }
1142 
1143 static void
_rpmalloc_span_extract_free_list_deferred(span_t * span)1144 _rpmalloc_span_extract_free_list_deferred(span_t *span) {
1145   // We need acquire semantics on the CAS operation since we are interested in the list size
1146   // Refer to _rpmalloc_deallocate_defer_small_or_medium for further comments on this dependency
1147   do {
1148     span->free_list = atomic_load_ptr(&span->free_list_deferred);
1149   } while ((span->free_list == INVALID_POINTER) ||
1150            !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, span->free_list));
1151   span->used_count -= span->list_size;
1152   span->list_size = 0;
1153   atomic_store_ptr_release(&span->free_list_deferred, 0);
1154 }
1155 
1156 static int
_rpmalloc_span_is_fully_utilized(span_t * span)1157 _rpmalloc_span_is_fully_utilized(span_t *span) {
1158   assert(span->free_list_limit <= span->block_count);
1159   return !span->free_list && (span->free_list_limit >= span->block_count);
1160 }
1161 
1162 static int
_rpmalloc_span_finalize(heap_t * heap,size_t iclass,span_t * span,span_t ** list_head)1163 _rpmalloc_span_finalize(heap_t *heap, size_t iclass, span_t *span, span_t **list_head) {
1164   span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1165   if (span == class_span) {
1166     // Adopt the heap class free list back into the span free list
1167     void *block = span->free_list;
1168     void *last_block = 0;
1169     while (block) {
1170       last_block = block;
1171       block = *((void **)block);
1172     }
1173     uint32_t free_count = 0;
1174     block = heap->free_list[iclass];
1175     while (block) {
1176       ++free_count;
1177       block = *((void **)block);
1178     }
1179     if (last_block) {
1180       *((void **)last_block) = heap->free_list[iclass];
1181     } else {
1182       span->free_list = heap->free_list[iclass];
1183     }
1184     heap->free_list[iclass] = 0;
1185     span->used_count -= free_count;
1186   }
1187   //If this assert triggers you have memory leaks
1188   assert(span->list_size == span->used_count);
1189   if (span->list_size == span->used_count) {
1190     _rpmalloc_stat_dec(&heap->span_use[0].current);
1191     _rpmalloc_stat_dec(&heap->size_class_use[iclass].spans_current);
1192     // This function only used for spans in double linked lists
1193     if (list_head)
1194       _rpmalloc_span_double_link_list_remove(list_head, span);
1195     _rpmalloc_span_unmap(span);
1196     return 1;
1197   }
1198   return 0;
1199 }
1200 
1201 
1202 ////////////
1203 ///
1204 /// Global cache
1205 ///
1206 //////
1207 
1208 #if ENABLE_GLOBAL_CACHE
1209 
1210 //! Insert the given list of memory page spans in the global cache
1211 static void
_rpmalloc_global_cache_insert(global_cache_t * cache,span_t * span,size_t cache_limit)1212 _rpmalloc_global_cache_insert(global_cache_t *cache, span_t *span, size_t cache_limit) {
1213   assert((span->list_size == 1) || (span->next != 0));
1214   int32_t list_size = (int32_t)span->list_size;
1215   //Unmap if cache has reached the limit. Does not need stronger synchronization, the worst
1216   //case is that the span list is unmapped when it could have been cached (no real dependency
1217   //between the two variables)
1218   if (atomic_add32(&cache->size, list_size) > (int32_t)cache_limit) {
1219 #if !ENABLE_UNLIMITED_GLOBAL_CACHE
1220     _rpmalloc_span_list_unmap_all(span);
1221     atomic_add32(&cache->size, -list_size);
1222     return;
1223 #endif
1224   }
1225   void *current_cache, *new_cache;
1226   do {
1227     current_cache = atomic_load_ptr(&cache->cache);
1228     span->prev = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1229     new_cache = (void *)((uintptr_t)span | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1230   } while (!atomic_cas_ptr(&cache->cache, new_cache, current_cache));
1231 }
1232 
1233 //! Extract a number of memory page spans from the global cache
1234 static span_t *
_rpmalloc_global_cache_extract(global_cache_t * cache)1235 _rpmalloc_global_cache_extract(global_cache_t *cache) {
1236   uintptr_t span_ptr;
1237   do {
1238     void *global_span = atomic_load_ptr(&cache->cache);
1239     span_ptr = (uintptr_t)global_span & _memory_span_mask;
1240     if (span_ptr) {
1241       span_t *span = (span_t *)span_ptr;
1242       //By accessing the span ptr before it is swapped out of list we assume that a contending thread
1243       //does not manage to traverse the span to being unmapped before we access it
1244       void *new_cache = (void *)((uintptr_t)span->prev | ((uintptr_t)atomic_incr32(&cache->counter) & ~_memory_span_mask));
1245       if (atomic_cas_ptr(&cache->cache, new_cache, global_span)) {
1246         atomic_add32(&cache->size, -(int32_t)span->list_size);
1247         return span;
1248       }
1249     }
1250   } while (span_ptr);
1251   return 0;
1252 }
1253 
1254 //! Finalize a global cache, only valid from allocator finalization (not thread safe)
1255 static void
_rpmalloc_global_cache_finalize(global_cache_t * cache)1256 _rpmalloc_global_cache_finalize(global_cache_t *cache) {
1257   void *current_cache = atomic_load_ptr(&cache->cache);
1258   span_t *span = (span_t *)((uintptr_t)current_cache & _memory_span_mask);
1259   while (span) {
1260     span_t *skip_span = (span_t *)((uintptr_t)span->prev & _memory_span_mask);
1261     atomic_add32(&cache->size, -(int32_t)span->list_size);
1262     _rpmalloc_span_list_unmap_all(span);
1263     span = skip_span;
1264   }
1265   assert(!atomic_load32(&cache->size));
1266   atomic_store_ptr(&cache->cache, 0);
1267   atomic_store32(&cache->size, 0);
1268 }
1269 
1270 //! Insert the given list of memory page spans in the global cache
1271 static void
_rpmalloc_global_cache_insert_span_list(span_t * span)1272 _rpmalloc_global_cache_insert_span_list(span_t *span) {
1273   size_t span_count = span->span_count;
1274 #if ENABLE_UNLIMITED_GLOBAL_CACHE
1275   _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, 0);
1276 #else
1277   const size_t cache_limit = (GLOBAL_CACHE_MULTIPLIER * ((span_count == 1) ? _memory_span_release_count :
1278                               _memory_span_release_count_large));
1279   _rpmalloc_global_cache_insert(&_memory_span_cache[span_count - 1], span, cache_limit);
1280 #endif
1281 }
1282 
1283 //! Extract a number of memory page spans from the global cache for large blocks
1284 static span_t *
_rpmalloc_global_cache_extract_span_list(size_t span_count)1285 _rpmalloc_global_cache_extract_span_list(size_t span_count) {
1286   span_t *span = _rpmalloc_global_cache_extract(&_memory_span_cache[span_count - 1]);
1287   assert(!span || (span->span_count == span_count));
1288   return span;
1289 }
1290 
1291 #endif
1292 
1293 
1294 ////////////
1295 ///
1296 /// Heap control
1297 ///
1298 //////
1299 
1300 static void _rpmalloc_deallocate_huge(span_t *);
1301 
1302 //! Store the given spans as reserve in the given heap
1303 static void
_rpmalloc_heap_set_reserved_spans(heap_t * heap,span_t * master,span_t * reserve,size_t reserve_span_count)1304 _rpmalloc_heap_set_reserved_spans(heap_t *heap, span_t *master, span_t *reserve, size_t reserve_span_count) {
1305   heap->span_reserve_master = master;
1306   heap->span_reserve = reserve;
1307   heap->spans_reserved = reserve_span_count;
1308 }
1309 
1310 //! Adopt the deferred span cache list, optionally extracting the first single span for immediate re-use
1311 static void
_rpmalloc_heap_cache_adopt_deferred(heap_t * heap,span_t ** single_span)1312 _rpmalloc_heap_cache_adopt_deferred(heap_t *heap, span_t **single_span) {
1313   span_t *span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1314   if (!span)
1315     return;
1316   while (!atomic_cas_ptr(&heap->span_free_deferred, 0, span))
1317     span = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
1318   while (span) {
1319     span_t *next_span = (span_t *)span->free_list;
1320     assert(span->heap == heap);
1321     if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
1322       assert(heap->full_span_count);
1323       --heap->full_span_count;
1324 #if RPMALLOC_FIRST_CLASS_HEAPS
1325       _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
1326 #endif
1327       if (single_span && !*single_span) {
1328         *single_span = span;
1329       } else {
1330         _rpmalloc_stat_dec(&heap->span_use[0].current);
1331         _rpmalloc_stat_dec(&heap->size_class_use[span->size_class].spans_current);
1332         _rpmalloc_heap_cache_insert(heap, span);
1333       }
1334     } else {
1335       if (span->size_class == SIZE_CLASS_HUGE) {
1336         _rpmalloc_deallocate_huge(span);
1337       } else {
1338         assert(span->size_class == SIZE_CLASS_LARGE);
1339         assert(heap->full_span_count);
1340         --heap->full_span_count;
1341 #if RPMALLOC_FIRST_CLASS_HEAPS
1342         _rpmalloc_span_double_link_list_remove(&heap->large_huge_span, span);
1343 #endif
1344         uint32_t idx = span->span_count - 1;
1345         if (!idx && single_span && !*single_span) {
1346           *single_span = span;
1347         } else {
1348           _rpmalloc_stat_dec(&heap->span_use[idx].current);
1349           _rpmalloc_heap_cache_insert(heap, span);
1350         }
1351       }
1352     }
1353     span = next_span;
1354   }
1355 }
1356 
1357 static void
_rpmalloc_heap_unmap(heap_t * heap)1358 _rpmalloc_heap_unmap(heap_t *heap) {
1359   if (!heap->master_heap) {
1360     if ((heap->finalize > 1) && !atomic_load32(&heap->child_count)) {
1361       size_t heap_size = sizeof(heap_t);
1362       size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1363       _rpmalloc_unmap(heap, block_size, heap->align_offset, block_size);
1364     }
1365   } else {
1366     if (atomic_decr32(&heap->master_heap->child_count) == 0) {
1367       _rpmalloc_heap_unmap(heap->master_heap);
1368     }
1369   }
1370 }
1371 
1372 static void
_rpmalloc_heap_global_finalize(heap_t * heap)1373 _rpmalloc_heap_global_finalize(heap_t *heap) {
1374   if (heap->finalize++ > 1) {
1375     --heap->finalize;
1376     return;
1377   }
1378 
1379   _rpmalloc_heap_finalize(heap);
1380 
1381 #if ENABLE_THREAD_CACHE
1382   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1383     span_t *span = heap->span_cache[iclass];
1384     heap->span_cache[iclass] = 0;
1385     if (span)
1386       _rpmalloc_span_list_unmap_all(span);
1387   }
1388 #endif
1389 
1390   if (heap->full_span_count) {
1391     --heap->finalize;
1392     return;
1393   }
1394 
1395   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1396     if (heap->free_list[iclass] || heap->partial_span[iclass]) {
1397       --heap->finalize;
1398       return;
1399     }
1400   }
1401   //Heap is now completely free, unmap and remove from heap list
1402   size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1403   heap_t *list_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1404   if (list_heap == heap) {
1405     atomic_store_ptr(&_memory_heaps[list_idx], heap->next_heap);
1406   } else {
1407     while (list_heap->next_heap != heap)
1408       list_heap = list_heap->next_heap;
1409     list_heap->next_heap = heap->next_heap;
1410   }
1411 
1412   _rpmalloc_heap_unmap(heap);
1413 }
1414 
1415 //! Insert a single span into thread heap cache, releasing to global cache if overflow
1416 static void
_rpmalloc_heap_cache_insert(heap_t * heap,span_t * span)1417 _rpmalloc_heap_cache_insert(heap_t *heap, span_t *span) {
1418   if (UNEXPECTED(heap->finalize != 0)) {
1419     _rpmalloc_span_unmap(span);
1420     _rpmalloc_heap_global_finalize(heap);
1421     return;
1422   }
1423 #if ENABLE_THREAD_CACHE
1424   size_t span_count = span->span_count;
1425   size_t idx = span_count - 1;
1426   _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_cache);
1427 #if ENABLE_UNLIMITED_THREAD_CACHE
1428   _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1429 #else
1430   const size_t release_count = (!idx ? _memory_span_release_count : _memory_span_release_count_large);
1431   size_t current_cache_size = _rpmalloc_span_list_push(&heap->span_cache[idx], span);
1432   if (current_cache_size <= release_count)
1433     return;
1434   const size_t hard_limit = release_count * THREAD_CACHE_MULTIPLIER;
1435   if (current_cache_size <= hard_limit) {
1436 #if ENABLE_ADAPTIVE_THREAD_CACHE
1437     //Require 25% of high water mark to remain in cache (and at least 1, if use is 0)
1438     const size_t high_mark = heap->span_use[idx].high;
1439     const size_t min_limit = (high_mark >> 2) + release_count + 1;
1440     if (current_cache_size < min_limit)
1441       return;
1442 #else
1443     return;
1444 #endif
1445   }
1446   heap->span_cache[idx] = _rpmalloc_span_list_split(span, release_count);
1447   assert(span->list_size == release_count);
1448 #if ENABLE_GLOBAL_CACHE
1449   _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span_count * _memory_span_size);
1450   _rpmalloc_stat_add(&heap->span_use[idx].spans_to_global, span->list_size);
1451   _rpmalloc_global_cache_insert_span_list(span);
1452 #else
1453   _rpmalloc_span_list_unmap_all(span);
1454 #endif
1455 #endif
1456 #else
1457   (void)sizeof(heap);
1458   _rpmalloc_span_unmap(span);
1459 #endif
1460 }
1461 
1462 //! Extract the given number of spans from the different cache levels
1463 static span_t *
_rpmalloc_heap_thread_cache_extract(heap_t * heap,size_t span_count)1464 _rpmalloc_heap_thread_cache_extract(heap_t *heap, size_t span_count) {
1465   span_t *span = 0;
1466   size_t idx = span_count - 1;
1467   if (!idx)
1468     _rpmalloc_heap_cache_adopt_deferred(heap, &span);
1469 #if ENABLE_THREAD_CACHE
1470   if (!span && heap->span_cache[idx]) {
1471     _rpmalloc_stat_inc(&heap->span_use[idx].spans_from_cache);
1472     span = _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1473   }
1474 #endif
1475   return span;
1476 }
1477 
1478 static span_t *
_rpmalloc_heap_reserved_extract(heap_t * heap,size_t span_count)1479 _rpmalloc_heap_reserved_extract(heap_t *heap, size_t span_count) {
1480   if (heap->spans_reserved >= span_count)
1481     return _rpmalloc_span_map(heap, span_count);
1482   return 0;
1483 }
1484 
1485 //! Extract a span from the global cache
1486 static span_t *
_rpmalloc_heap_global_cache_extract(heap_t * heap,size_t span_count)1487 _rpmalloc_heap_global_cache_extract(heap_t *heap, size_t span_count) {
1488 #if ENABLE_GLOBAL_CACHE
1489   size_t idx = span_count - 1;
1490   heap->span_cache[idx] = _rpmalloc_global_cache_extract_span_list(span_count);
1491   if (heap->span_cache[idx]) {
1492     _rpmalloc_stat_add64(&heap->global_to_thread, (size_t)heap->span_cache[idx]->list_size * span_count * _memory_span_size);
1493     _rpmalloc_stat_add(&heap->span_use[idx].spans_from_global, heap->span_cache[idx]->list_size);
1494     return _rpmalloc_span_list_pop(&heap->span_cache[idx]);
1495   }
1496 #endif
1497   (void)sizeof(heap);
1498   (void)sizeof(span_count);
1499   return 0;
1500 }
1501 
1502 //! Get a span from one of the cache levels (thread cache, reserved, global cache) or fallback to mapping more memory
1503 static span_t *
_rpmalloc_heap_extract_new_span(heap_t * heap,size_t span_count,uint32_t class_idx)1504 _rpmalloc_heap_extract_new_span(heap_t *heap, size_t span_count, uint32_t class_idx) {
1505   (void)sizeof(class_idx);
1506 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
1507   uint32_t idx = (uint32_t)span_count - 1;
1508   uint32_t current_count = (uint32_t)atomic_incr32(&heap->span_use[idx].current);
1509   if (current_count > (uint32_t)atomic_load32(&heap->span_use[idx].high))
1510     atomic_store32(&heap->span_use[idx].high, (int32_t)current_count);
1511   _rpmalloc_stat_add_peak(&heap->size_class_use[class_idx].spans_current, 1, heap->size_class_use[class_idx].spans_peak);
1512 #endif
1513   span_t *span = _rpmalloc_heap_thread_cache_extract(heap, span_count);
1514   if (EXPECTED(span != 0)) {
1515     _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1516     return span;
1517   }
1518   span = _rpmalloc_heap_reserved_extract(heap, span_count);
1519   if (EXPECTED(span != 0)) {
1520     _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_reserved);
1521     return span;
1522   }
1523   span = _rpmalloc_heap_global_cache_extract(heap, span_count);
1524   if (EXPECTED(span != 0)) {
1525     _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_from_cache);
1526     return span;
1527   }
1528   //Final fallback, map in more virtual memory
1529   span = _rpmalloc_span_map(heap, span_count);
1530   _rpmalloc_stat_inc(&heap->size_class_use[class_idx].spans_map_calls);
1531   return span;
1532 }
1533 
1534 static void
_rpmalloc_heap_initialize(heap_t * heap)1535 _rpmalloc_heap_initialize(heap_t *heap) {
1536   memset(heap, 0, sizeof(heap_t));
1537 
1538   //Get a new heap ID
1539   heap->id = 1 + atomic_incr32(&_memory_heap_id);
1540 
1541   //Link in heap in heap ID map
1542   heap_t *next_heap;
1543   size_t list_idx = heap->id % HEAP_ARRAY_SIZE;
1544   do {
1545     next_heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
1546     heap->next_heap = next_heap;
1547   } while (!atomic_cas_ptr(&_memory_heaps[list_idx], heap, next_heap));
1548 }
1549 
1550 static void
_rpmalloc_heap_orphan(heap_t * heap,int first_class)1551 _rpmalloc_heap_orphan(heap_t *heap, int first_class) {
1552   void *raw_heap;
1553   uintptr_t orphan_counter;
1554   heap_t *last_heap;
1555   heap->owner_thread = (uintptr_t) - 1;
1556 #if RPMALLOC_FIRST_CLASS_HEAPS
1557   atomicptr_t *heap_list = (first_class ? &_memory_first_class_orphan_heaps : &_memory_orphan_heaps);
1558 #else
1559   (void)sizeof(first_class);
1560   atomicptr_t *heap_list = &_memory_orphan_heaps;
1561 #endif
1562   do {
1563     last_heap = (heap_t *)atomic_load_ptr(heap_list);
1564     heap->next_orphan = (heap_t *)((uintptr_t)last_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1565     orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1566     raw_heap = (void *)((uintptr_t)heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1567   } while (!atomic_cas_ptr(heap_list, raw_heap, last_heap));
1568 }
1569 
1570 //! Allocate a new heap from newly mapped memory pages
1571 static heap_t *
_rpmalloc_heap_allocate_new(void)1572 _rpmalloc_heap_allocate_new(void) {
1573   //Map in pages for a new heap
1574   size_t align_offset = 0;
1575   size_t heap_size = sizeof(heap_t);
1576   size_t block_size = _memory_page_size * ((heap_size + _memory_page_size - 1) >> _memory_page_size_shift);
1577   heap_t *heap = (heap_t *)_rpmalloc_mmap(block_size, &align_offset);
1578   if (!heap)
1579     return heap;
1580 
1581   _rpmalloc_heap_initialize(heap);
1582   heap->align_offset = align_offset;
1583 
1584   //Put extra heaps as orphans, aligning to make sure ABA protection bits fit in pointer low bits
1585   size_t aligned_heap_size = HEAP_ORPHAN_ABA_SIZE * ((heap_size + HEAP_ORPHAN_ABA_SIZE - 1) / HEAP_ORPHAN_ABA_SIZE);
1586   size_t num_heaps = block_size / aligned_heap_size;
1587   atomic_store32(&heap->child_count, (int32_t)num_heaps - 1);
1588   heap_t *extra_heap = (heap_t *)pointer_offset(heap, aligned_heap_size);
1589   while (num_heaps > 1) {
1590     _rpmalloc_heap_initialize(extra_heap);
1591     extra_heap->master_heap = heap;
1592     _rpmalloc_heap_orphan(extra_heap, 1);
1593     extra_heap = (heap_t *)pointer_offset(extra_heap, aligned_heap_size);
1594     --num_heaps;
1595   }
1596   return heap;
1597 }
1598 
1599 static heap_t *
_rpmalloc_heap_extract_orphan(atomicptr_t * heap_list)1600 _rpmalloc_heap_extract_orphan(atomicptr_t *heap_list) {
1601   void *raw_heap;
1602   void *next_raw_heap;
1603   uintptr_t orphan_counter;
1604   heap_t *heap;
1605   heap_t *next_heap;
1606   do {
1607     raw_heap = atomic_load_ptr(heap_list);
1608     heap = (heap_t *)((uintptr_t)raw_heap & ~(uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1));
1609     if (!heap)
1610       break;
1611     next_heap = heap->next_orphan;
1612     orphan_counter = (uintptr_t)atomic_incr32(&_memory_orphan_counter);
1613     next_raw_heap = (void *)((uintptr_t)next_heap | (orphan_counter & (uintptr_t)(HEAP_ORPHAN_ABA_SIZE - 1)));
1614   } while (!atomic_cas_ptr(heap_list, next_raw_heap, raw_heap));
1615   return heap;
1616 }
1617 
1618 //! Allocate a new heap, potentially reusing a previously orphaned heap
1619 static heap_t *
_rpmalloc_heap_allocate(int first_class)1620 _rpmalloc_heap_allocate(int first_class) {
1621   heap_t *heap = 0;
1622   if (first_class == 0)
1623     heap = _rpmalloc_heap_extract_orphan(&_memory_orphan_heaps);
1624 #if RPMALLOC_FIRST_CLASS_HEAPS
1625   if (!heap)
1626     heap = _rpmalloc_heap_extract_orphan(&_memory_first_class_orphan_heaps);
1627 #endif
1628   if (!heap)
1629     heap = _rpmalloc_heap_allocate_new();
1630   return heap;
1631 }
1632 
1633 static void
_rpmalloc_heap_release(void * heapptr,int first_class)1634 _rpmalloc_heap_release(void *heapptr, int first_class) {
1635   heap_t *heap = (heap_t *)heapptr;
1636   if (!heap)
1637     return;
1638   //Release thread cache spans back to global cache
1639   _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1640 #if ENABLE_THREAD_CACHE
1641   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1642     span_t *span = heap->span_cache[iclass];
1643     heap->span_cache[iclass] = 0;
1644     if (span && heap->finalize) {
1645       _rpmalloc_span_list_unmap_all(span);
1646       continue;
1647     }
1648 #if ENABLE_GLOBAL_CACHE
1649     while (span) {
1650       assert(span->span_count == (iclass + 1));
1651       size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
1652       span_t *next = _rpmalloc_span_list_split(span, (uint32_t)release_count);
1653       _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
1654       _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
1655       _rpmalloc_global_cache_insert_span_list(span);
1656       span = next;
1657     }
1658 #else
1659     if (span)
1660       _rpmalloc_span_list_unmap_all(span);
1661 #endif
1662   }
1663 #endif
1664 
1665   //Orphan the heap
1666   _rpmalloc_heap_orphan(heap, first_class);
1667 
1668   if (get_thread_heap_raw() == heap)
1669     set_thread_heap(0);
1670 #if ENABLE_STATISTICS
1671   atomic_decr32(&_memory_active_heaps);
1672   assert(atomic_load32(&_memory_active_heaps) >= 0);
1673 #endif
1674 }
1675 
1676 static void
_rpmalloc_heap_release_raw(void * heapptr)1677 _rpmalloc_heap_release_raw(void *heapptr) {
1678   _rpmalloc_heap_release(heapptr, 0);
1679 }
1680 
1681 static void
_rpmalloc_heap_finalize(heap_t * heap)1682 _rpmalloc_heap_finalize(heap_t *heap) {
1683   if (heap->spans_reserved) {
1684     span_t *span = _rpmalloc_span_map(heap, heap->spans_reserved);
1685     _rpmalloc_span_unmap(span);
1686     heap->spans_reserved = 0;
1687   }
1688 
1689   _rpmalloc_heap_cache_adopt_deferred(heap, 0);
1690 
1691   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
1692     span_t *span = heap->partial_span[iclass];
1693     while (span) {
1694       span_t *next = span->next;
1695       _rpmalloc_span_finalize(heap, iclass, span, &heap->partial_span[iclass]);
1696       span = next;
1697     }
1698     // If class still has a free list it must be a full span
1699     if (heap->free_list[iclass]) {
1700       span_t *class_span = (span_t *)((uintptr_t)heap->free_list[iclass] & _memory_span_mask);
1701       span_t **list = 0;
1702 #if RPMALLOC_FIRST_CLASS_HEAPS
1703       list = &heap->full_span[iclass];
1704 #endif
1705       --heap->full_span_count;
1706       if (!_rpmalloc_span_finalize(heap, iclass, class_span, list)) {
1707         if (list)
1708           _rpmalloc_span_double_link_list_remove(list, class_span);
1709         _rpmalloc_span_double_link_list_add(&heap->partial_span[iclass], class_span);
1710       }
1711     }
1712   }
1713 
1714 #if ENABLE_THREAD_CACHE
1715   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
1716     if (heap->span_cache[iclass]) {
1717       _rpmalloc_span_list_unmap_all(heap->span_cache[iclass]);
1718       heap->span_cache[iclass] = 0;
1719     }
1720   }
1721 #endif
1722   assert(!atomic_load_ptr(&heap->span_free_deferred));
1723 }
1724 
1725 
1726 ////////////
1727 ///
1728 /// Allocation entry points
1729 ///
1730 //////
1731 
1732 //! Pop first block from a free list
1733 static void *
free_list_pop(void ** list)1734 free_list_pop(void **list) {
1735   void *block = *list;
1736   *list = *((void **)block);
1737   return block;
1738 }
1739 
1740 //! Allocate a small/medium sized memory block from the given heap
1741 static void *
_rpmalloc_allocate_from_heap_fallback(heap_t * heap,uint32_t class_idx)1742 _rpmalloc_allocate_from_heap_fallback(heap_t *heap, uint32_t class_idx) {
1743   span_t *span = heap->partial_span[class_idx];
1744   if (EXPECTED(span != 0)) {
1745     assert(span->block_count == _memory_size_class[span->size_class].block_count);
1746     assert(!_rpmalloc_span_is_fully_utilized(span));
1747     void *block;
1748     if (span->free_list) {
1749       //Swap in free list if not empty
1750       heap->free_list[class_idx] = span->free_list;
1751       span->free_list = 0;
1752       block = free_list_pop(&heap->free_list[class_idx]);
1753     } else {
1754       //If the span did not fully initialize free list, link up another page worth of blocks
1755       void *block_start = pointer_offset(span, SPAN_HEADER_SIZE + ((size_t)span->free_list_limit * span->block_size));
1756       span->free_list_limit += free_list_partial_init(&heap->free_list[class_idx], &block,
1757                                (void *)((uintptr_t)block_start & ~(_memory_page_size - 1)), block_start,
1758                                span->block_count - span->free_list_limit, span->block_size);
1759     }
1760     assert(span->free_list_limit <= span->block_count);
1761     span->used_count = span->free_list_limit;
1762 
1763     //Swap in deferred free list if present
1764     if (atomic_load_ptr(&span->free_list_deferred))
1765       _rpmalloc_span_extract_free_list_deferred(span);
1766 
1767     //If span is still not fully utilized keep it in partial list and early return block
1768     if (!_rpmalloc_span_is_fully_utilized(span))
1769       return block;
1770 
1771     //The span is fully utilized, unlink from partial list and add to fully utilized list
1772     _rpmalloc_span_double_link_list_pop_head(&heap->partial_span[class_idx], span);
1773 #if RPMALLOC_FIRST_CLASS_HEAPS
1774     _rpmalloc_span_double_link_list_add(&heap->full_span[class_idx], span);
1775 #endif
1776     ++heap->full_span_count;
1777     return block;
1778   }
1779 
1780   //Find a span in one of the cache levels
1781   span = _rpmalloc_heap_extract_new_span(heap, 1, class_idx);
1782   if (EXPECTED(span != 0)) {
1783     //Mark span as owned by this heap and set base data, return first block
1784     return _rpmalloc_span_initialize_new(heap, span, class_idx);
1785   }
1786 
1787   return 0;
1788 }
1789 
1790 //! Allocate a small sized memory block from the given heap
1791 static void *
_rpmalloc_allocate_small(heap_t * heap,size_t size)1792 _rpmalloc_allocate_small(heap_t *heap, size_t size) {
1793   assert(heap);
1794   //Small sizes have unique size classes
1795   const uint32_t class_idx = (uint32_t)((size + (SMALL_GRANULARITY - 1)) >> SMALL_GRANULARITY_SHIFT);
1796   _rpmalloc_stat_inc_alloc(heap, class_idx);
1797   if (EXPECTED(heap->free_list[class_idx] != 0))
1798     return free_list_pop(&heap->free_list[class_idx]);
1799   return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1800 }
1801 
1802 //! Allocate a medium sized memory block from the given heap
1803 static void *
_rpmalloc_allocate_medium(heap_t * heap,size_t size)1804 _rpmalloc_allocate_medium(heap_t *heap, size_t size) {
1805   assert(heap);
1806   //Calculate the size class index and do a dependent lookup of the final class index (in case of merged classes)
1807   const uint32_t base_idx = (uint32_t)(SMALL_CLASS_COUNT + ((size - (SMALL_SIZE_LIMIT + 1)) >> MEDIUM_GRANULARITY_SHIFT));
1808   const uint32_t class_idx = _memory_size_class[base_idx].class_idx;
1809   _rpmalloc_stat_inc_alloc(heap, class_idx);
1810   if (EXPECTED(heap->free_list[class_idx] != 0))
1811     return free_list_pop(&heap->free_list[class_idx]);
1812   return _rpmalloc_allocate_from_heap_fallback(heap, class_idx);
1813 }
1814 
1815 //! Allocate a large sized memory block from the given heap
1816 static void *
_rpmalloc_allocate_large(heap_t * heap,size_t size)1817 _rpmalloc_allocate_large(heap_t *heap, size_t size) {
1818   assert(heap);
1819   //Calculate number of needed max sized spans (including header)
1820   //Since this function is never called if size > LARGE_SIZE_LIMIT
1821   //the span_count is guaranteed to be <= LARGE_CLASS_COUNT
1822   size += SPAN_HEADER_SIZE;
1823   size_t span_count = size >> _memory_span_size_shift;
1824   if (size & (_memory_span_size - 1))
1825     ++span_count;
1826 
1827   //Find a span in one of the cache levels
1828   span_t *span = _rpmalloc_heap_extract_new_span(heap, span_count, SIZE_CLASS_LARGE);
1829   if (!span)
1830     return span;
1831 
1832   //Mark span as owned by this heap and set base data
1833   assert(span->span_count == span_count);
1834   span->size_class = SIZE_CLASS_LARGE;
1835   span->heap = heap;
1836 
1837 #if RPMALLOC_FIRST_CLASS_HEAPS
1838   _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1839 #endif
1840   ++heap->full_span_count;
1841 
1842   return pointer_offset(span, SPAN_HEADER_SIZE);
1843 }
1844 
1845 //! Allocate a huge block by mapping memory pages directly
1846 static void *
_rpmalloc_allocate_huge(heap_t * heap,size_t size)1847 _rpmalloc_allocate_huge(heap_t *heap, size_t size) {
1848   assert(heap);
1849   size += SPAN_HEADER_SIZE;
1850   size_t num_pages = size >> _memory_page_size_shift;
1851   if (size & (_memory_page_size - 1))
1852     ++num_pages;
1853   size_t align_offset = 0;
1854   span_t *span = (span_t *)_rpmalloc_mmap(num_pages * _memory_page_size, &align_offset);
1855   if (!span)
1856     return span;
1857 
1858   //Store page count in span_count
1859   span->size_class = SIZE_CLASS_HUGE;
1860   span->span_count = (uint32_t)num_pages;
1861   span->align_offset = (uint32_t)align_offset;
1862   span->heap = heap;
1863   _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1864 
1865 #if RPMALLOC_FIRST_CLASS_HEAPS
1866   _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1867 #endif
1868   ++heap->full_span_count;
1869 
1870   return pointer_offset(span, SPAN_HEADER_SIZE);
1871 }
1872 
1873 //! Allocate a block of the given size
1874 static void *
_rpmalloc_allocate(heap_t * heap,size_t size)1875 _rpmalloc_allocate(heap_t *heap, size_t size) {
1876   if (EXPECTED(size <= SMALL_SIZE_LIMIT))
1877     return _rpmalloc_allocate_small(heap, size);
1878   else if (size <= _memory_medium_size_limit)
1879     return _rpmalloc_allocate_medium(heap, size);
1880   else if (size <= LARGE_SIZE_LIMIT)
1881     return _rpmalloc_allocate_large(heap, size);
1882   return _rpmalloc_allocate_huge(heap, size);
1883 }
1884 
1885 static void *
_rpmalloc_aligned_allocate(heap_t * heap,size_t alignment,size_t size)1886 _rpmalloc_aligned_allocate(heap_t *heap, size_t alignment, size_t size) {
1887   if (alignment <= SMALL_GRANULARITY)
1888     return _rpmalloc_allocate(heap, size);
1889 
1890 #if ENABLE_VALIDATE_ARGS
1891   if ((size + alignment) < size) {
1892     errno = EINVAL;
1893     return 0;
1894   }
1895   if (alignment & (alignment - 1)) {
1896     errno = EINVAL;
1897     return 0;
1898   }
1899 #endif
1900 
1901   if ((alignment <= SPAN_HEADER_SIZE) && (size < _memory_medium_size_limit)) {
1902     // If alignment is less or equal to span header size (which is power of two),
1903     // and size aligned to span header size multiples is less than size + alignment,
1904     // then use natural alignment of blocks to provide alignment
1905     size_t multiple_size = size ? (size + (SPAN_HEADER_SIZE - 1)) & ~(uintptr_t)(SPAN_HEADER_SIZE - 1) : SPAN_HEADER_SIZE;
1906     assert(!(multiple_size % SPAN_HEADER_SIZE));
1907     if (multiple_size <= (size + alignment))
1908       return _rpmalloc_allocate(heap, multiple_size);
1909   }
1910 
1911   void *ptr = 0;
1912   size_t align_mask = alignment - 1;
1913   if (alignment <= _memory_page_size) {
1914     ptr = _rpmalloc_allocate(heap, size + alignment);
1915     if ((uintptr_t)ptr & align_mask) {
1916       ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1917       //Mark as having aligned blocks
1918       span_t *span = (span_t *)((uintptr_t)ptr & _memory_span_mask);
1919       span->flags |= SPAN_FLAG_ALIGNED_BLOCKS;
1920     }
1921     return ptr;
1922   }
1923 
1924   // Fallback to mapping new pages for this request. Since pointers passed
1925   // to rpfree must be able to reach the start of the span by bitmasking of
1926   // the address with the span size, the returned aligned pointer from this
1927   // function must be with a span size of the start of the mapped area.
1928   // In worst case this requires us to loop and map pages until we get a
1929   // suitable memory address. It also means we can never align to span size
1930   // or greater, since the span header will push alignment more than one
1931   // span size away from span start (thus causing pointer mask to give us
1932   // an invalid span start on free)
1933   if (alignment & align_mask) {
1934     errno = EINVAL;
1935     return 0;
1936   }
1937   if (alignment >= _memory_span_size) {
1938     errno = EINVAL;
1939     return 0;
1940   }
1941 
1942   size_t extra_pages = alignment / _memory_page_size;
1943 
1944   // Since each span has a header, we will at least need one extra memory page
1945   size_t num_pages = 1 + (size / _memory_page_size);
1946   if (size & (_memory_page_size - 1))
1947     ++num_pages;
1948 
1949   if (extra_pages > num_pages)
1950     num_pages = 1 + extra_pages;
1951 
1952   size_t original_pages = num_pages;
1953   size_t limit_pages = (_memory_span_size / _memory_page_size) * 2;
1954   if (limit_pages < (original_pages * 2))
1955     limit_pages = original_pages * 2;
1956 
1957   size_t mapped_size, align_offset;
1958   span_t *span;
1959 
1960 retry:
1961   align_offset = 0;
1962   mapped_size = num_pages * _memory_page_size;
1963 
1964   span = (span_t *)_rpmalloc_mmap(mapped_size, &align_offset);
1965   if (!span) {
1966     errno = ENOMEM;
1967     return 0;
1968   }
1969   ptr = pointer_offset(span, SPAN_HEADER_SIZE);
1970 
1971   if ((uintptr_t)ptr & align_mask)
1972     ptr = (void *)(((uintptr_t)ptr & ~(uintptr_t)align_mask) + alignment);
1973 
1974   if (((size_t)pointer_diff(ptr, span) >= _memory_span_size) ||
1975       (pointer_offset(ptr, size) > pointer_offset(span, mapped_size)) ||
1976       (((uintptr_t)ptr & _memory_span_mask) != (uintptr_t)span)) {
1977     _rpmalloc_unmap(span, mapped_size, align_offset, mapped_size);
1978     ++num_pages;
1979     if (num_pages > limit_pages) {
1980       errno = EINVAL;
1981       return 0;
1982     }
1983     goto retry;
1984   }
1985 
1986   //Store page count in span_count
1987   span->size_class = SIZE_CLASS_HUGE;
1988   span->span_count = (uint32_t)num_pages;
1989   span->align_offset = (uint32_t)align_offset;
1990   span->heap = heap;
1991   _rpmalloc_stat_add_peak(&_huge_pages_current, num_pages, _huge_pages_peak);
1992 
1993 #if RPMALLOC_FIRST_CLASS_HEAPS
1994   _rpmalloc_span_double_link_list_add(&heap->large_huge_span, span);
1995 #endif
1996   ++heap->full_span_count;
1997 
1998   return ptr;
1999 }
2000 
2001 
2002 ////////////
2003 ///
2004 /// Deallocation entry points
2005 ///
2006 //////
2007 
2008 //! Deallocate the given small/medium memory block in the current thread local heap
2009 static void
_rpmalloc_deallocate_direct_small_or_medium(span_t * span,void * block)2010 _rpmalloc_deallocate_direct_small_or_medium(span_t *span, void *block) {
2011   heap_t *heap = span->heap;
2012   assert(heap->owner_thread == get_thread_id() || heap->finalize);
2013   //Add block to free list
2014   if (UNEXPECTED(_rpmalloc_span_is_fully_utilized(span))) {
2015     span->used_count = span->block_count;
2016 #if RPMALLOC_FIRST_CLASS_HEAPS
2017     _rpmalloc_span_double_link_list_remove(&heap->full_span[span->size_class], span);
2018 #endif
2019     _rpmalloc_span_double_link_list_add(&heap->partial_span[span->size_class], span);
2020     --heap->full_span_count;
2021   }
2022   --span->used_count;
2023   *((void **)block) = span->free_list;
2024   span->free_list = block;
2025   if (UNEXPECTED(span->used_count == span->list_size)) {
2026     _rpmalloc_span_double_link_list_remove(&heap->partial_span[span->size_class], span);
2027     _rpmalloc_span_release_to_cache(heap, span);
2028   }
2029 }
2030 
2031 static void
_rpmalloc_deallocate_defer_free_span(heap_t * heap,span_t * span)2032 _rpmalloc_deallocate_defer_free_span(heap_t *heap, span_t *span) {
2033   //This list does not need ABA protection, no mutable side state
2034   do {
2035     span->free_list = atomic_load_ptr(&heap->span_free_deferred);
2036   } while (!atomic_cas_ptr(&heap->span_free_deferred, span, span->free_list));
2037 }
2038 
2039 //! Put the block in the deferred free list of the owning span
2040 static void
_rpmalloc_deallocate_defer_small_or_medium(span_t * span,void * block)2041 _rpmalloc_deallocate_defer_small_or_medium(span_t *span, void *block) {
2042   // The memory ordering here is a bit tricky, to avoid having to ABA protect
2043   // the deferred free list to avoid desynchronization of list and list size
2044   // we need to have acquire semantics on successful CAS of the pointer to
2045   // guarantee the list_size variable validity + release semantics on pointer store
2046   void *free_list;
2047   do {
2048     free_list = atomic_load_ptr(&span->free_list_deferred);
2049     *((void **)block) = free_list;
2050   } while ((free_list == INVALID_POINTER) || !atomic_cas_ptr_acquire(&span->free_list_deferred, INVALID_POINTER, free_list));
2051   uint32_t free_count = ++span->list_size;
2052   atomic_store_ptr_release(&span->free_list_deferred, block);
2053   if (free_count == span->block_count) {
2054     // Span was completely freed by this block. Due to the INVALID_POINTER spin lock
2055     // no other thread can reach this state simultaneously on this span.
2056     // Safe to move to owner heap deferred cache
2057     _rpmalloc_deallocate_defer_free_span(span->heap, span);
2058   }
2059 }
2060 
2061 static void
_rpmalloc_deallocate_small_or_medium(span_t * span,void * p)2062 _rpmalloc_deallocate_small_or_medium(span_t *span, void *p) {
2063   _rpmalloc_stat_inc_free(span->heap, span->size_class);
2064   if (span->flags & SPAN_FLAG_ALIGNED_BLOCKS) {
2065     //Realign pointer to block start
2066     void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2067     uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2068     p = pointer_offset(p, -(int32_t)(block_offset % span->block_size));
2069   }
2070   //Check if block belongs to this heap or if deallocation should be deferred
2071   if ((span->heap->owner_thread == get_thread_id()) || span->heap->finalize)
2072     _rpmalloc_deallocate_direct_small_or_medium(span, p);
2073   else
2074     _rpmalloc_deallocate_defer_small_or_medium(span, p);
2075 }
2076 
2077 //! Deallocate the given large memory block to the current heap
2078 static void
_rpmalloc_deallocate_large(span_t * span)2079 _rpmalloc_deallocate_large(span_t *span) {
2080   assert(span->size_class == SIZE_CLASS_LARGE);
2081   assert(!(span->flags & SPAN_FLAG_MASTER) || !(span->flags & SPAN_FLAG_SUBSPAN));
2082   assert((span->flags & SPAN_FLAG_MASTER) || (span->flags & SPAN_FLAG_SUBSPAN));
2083   //We must always defer (unless finalizing) if from another heap since we cannot touch the list or counters of another heap
2084   int defer = (span->heap->owner_thread != get_thread_id()) && !span->heap->finalize;
2085   if (defer) {
2086     _rpmalloc_deallocate_defer_free_span(span->heap, span);
2087     return;
2088   }
2089   assert(span->heap->full_span_count);
2090   --span->heap->full_span_count;
2091 #if RPMALLOC_FIRST_CLASS_HEAPS
2092   _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2093 #endif
2094 #if ENABLE_ADAPTIVE_THREAD_CACHE || ENABLE_STATISTICS
2095   //Decrease counter
2096   size_t idx = span->span_count - 1;
2097   atomic_decr32(&span->heap->span_use[idx].current);
2098 #endif
2099   heap_t *heap = get_thread_heap();
2100   assert(heap);
2101   span->heap = heap;
2102   if ((span->span_count > 1) && !heap->finalize && !heap->spans_reserved) {
2103     heap->span_reserve = span;
2104     heap->spans_reserved = span->span_count;
2105     if (span->flags & SPAN_FLAG_MASTER) {
2106       heap->span_reserve_master = span;
2107     } else { //SPAN_FLAG_SUBSPAN
2108       span_t *master = (span_t *)pointer_offset(span, -(intptr_t)((size_t)span->offset_from_master * _memory_span_size));
2109       heap->span_reserve_master = master;
2110       assert(master->flags & SPAN_FLAG_MASTER);
2111       assert(atomic_load32(&master->remaining_spans) >= (int32_t)span->span_count);
2112     }
2113     _rpmalloc_stat_inc(&heap->span_use[idx].spans_to_reserved);
2114   } else {
2115     //Insert into cache list
2116     _rpmalloc_heap_cache_insert(heap, span);
2117   }
2118 }
2119 
2120 //! Deallocate the given huge span
2121 static void
_rpmalloc_deallocate_huge(span_t * span)2122 _rpmalloc_deallocate_huge(span_t *span) {
2123   assert(span->heap);
2124   if ((span->heap->owner_thread != get_thread_id()) && !span->heap->finalize) {
2125     _rpmalloc_deallocate_defer_free_span(span->heap, span);
2126     return;
2127   }
2128   assert(span->heap->full_span_count);
2129   --span->heap->full_span_count;
2130 #if RPMALLOC_FIRST_CLASS_HEAPS
2131   _rpmalloc_span_double_link_list_remove(&span->heap->large_huge_span, span);
2132 #endif
2133 
2134   //Oversized allocation, page count is stored in span_count
2135   size_t num_pages = span->span_count;
2136   _rpmalloc_unmap(span, num_pages * _memory_page_size, span->align_offset, num_pages * _memory_page_size);
2137   _rpmalloc_stat_sub(&_huge_pages_current, num_pages);
2138 }
2139 
2140 //! Deallocate the given block
2141 static void
_rpmalloc_deallocate(void * p)2142 _rpmalloc_deallocate(void *p) {
2143   //Grab the span (always at start of span, using span alignment)
2144   span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2145 
2146   if (UNEXPECTED(!span))
2147     return;
2148   if (EXPECTED(span->size_class < SIZE_CLASS_COUNT))
2149     _rpmalloc_deallocate_small_or_medium(span, p);
2150   else if (span->size_class == SIZE_CLASS_LARGE)
2151     _rpmalloc_deallocate_large(span);
2152   else
2153     _rpmalloc_deallocate_huge(span);
2154 }
2155 
2156 
2157 ////////////
2158 ///
2159 /// Reallocation entry points
2160 ///
2161 //////
2162 
2163 static size_t
2164 _rpmalloc_usable_size(void *p);
2165 
2166 //! Reallocate the given block to the given size
2167 static void *
_rpmalloc_reallocate(heap_t * heap,void * p,size_t size,size_t oldsize,unsigned int flags)2168 _rpmalloc_reallocate(heap_t *heap, void *p, size_t size, size_t oldsize, unsigned int flags) {
2169   if (p) {
2170     //Grab the span using guaranteed span alignment
2171     span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2172     if (EXPECTED(span->size_class < SIZE_CLASS_COUNT)) {
2173       //Small/medium sized block
2174       assert(span->span_count == 1);
2175       void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2176       uint32_t block_offset = (uint32_t)pointer_diff(p, blocks_start);
2177       uint32_t block_idx = block_offset / span->block_size;
2178       void *block = pointer_offset(blocks_start, (size_t)block_idx * span->block_size);
2179       if (!oldsize)
2180         oldsize = (size_t)((ptrdiff_t)span->block_size - pointer_diff(p, block));
2181       if ((size_t)span->block_size >= size) {
2182         //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2183         if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2184           memmove(block, p, oldsize);
2185         return block;
2186       }
2187     } else if (span->size_class == SIZE_CLASS_LARGE) {
2188       //Large block
2189       size_t total_size = size + SPAN_HEADER_SIZE;
2190       size_t num_spans = total_size >> _memory_span_size_shift;
2191       if (total_size & (_memory_span_mask - 1))
2192         ++num_spans;
2193       size_t current_spans = span->span_count;
2194       void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2195       if (!oldsize)
2196         oldsize = (current_spans * _memory_span_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2197       if ((current_spans >= num_spans) && (num_spans >= (current_spans / 2))) {
2198         //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2199         if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2200           memmove(block, p, oldsize);
2201         return block;
2202       }
2203     } else {
2204       //Oversized block
2205       size_t total_size = size + SPAN_HEADER_SIZE;
2206       size_t num_pages = total_size >> _memory_page_size_shift;
2207       if (total_size & (_memory_page_size - 1))
2208         ++num_pages;
2209       //Page count is stored in span_count
2210       size_t current_pages = span->span_count;
2211       void *block = pointer_offset(span, SPAN_HEADER_SIZE);
2212       if (!oldsize)
2213         oldsize = (current_pages * _memory_page_size) - (size_t)pointer_diff(p, block) - SPAN_HEADER_SIZE;
2214       if ((current_pages >= num_pages) && (num_pages >= (current_pages / 2))) {
2215         //Still fits in block, never mind trying to save memory, but preserve data if alignment changed
2216         if ((p != block) && !(flags & RPMALLOC_NO_PRESERVE))
2217           memmove(block, p, oldsize);
2218         return block;
2219       }
2220     }
2221   } else {
2222     oldsize = 0;
2223   }
2224 
2225   if (!!(flags & RPMALLOC_GROW_OR_FAIL))
2226     return 0;
2227 
2228   //Size is greater than block size, need to allocate a new block and deallocate the old
2229   //Avoid hysteresis by overallocating if increase is small (below 37%)
2230   size_t lower_bound = oldsize + (oldsize >> 2) + (oldsize >> 3);
2231   size_t new_size = (size > lower_bound) ? size : ((size > oldsize) ? lower_bound : size);
2232   void *block = _rpmalloc_allocate(heap, new_size);
2233   if (p && block) {
2234     if (!(flags & RPMALLOC_NO_PRESERVE))
2235       memcpy(block, p, oldsize < new_size ? oldsize : new_size);
2236     _rpmalloc_deallocate(p);
2237   }
2238 
2239   return block;
2240 }
2241 
2242 static void *
_rpmalloc_aligned_reallocate(heap_t * heap,void * ptr,size_t alignment,size_t size,size_t oldsize,unsigned int flags)2243 _rpmalloc_aligned_reallocate(heap_t *heap, void *ptr, size_t alignment, size_t size, size_t oldsize,
2244                              unsigned int flags) {
2245   if (alignment <= SMALL_GRANULARITY)
2246     return _rpmalloc_reallocate(heap, ptr, size, oldsize, flags);
2247 
2248   int no_alloc = !!(flags & RPMALLOC_GROW_OR_FAIL);
2249   size_t usablesize = _rpmalloc_usable_size(ptr);
2250   if ((usablesize >= size) && !((uintptr_t)ptr & (alignment - 1))) {
2251     if (no_alloc || (size >= (usablesize / 2)))
2252       return ptr;
2253   }
2254   // Aligned alloc marks span as having aligned blocks
2255   void *block = (!no_alloc ? _rpmalloc_aligned_allocate(heap, alignment, size) : 0);
2256   if (EXPECTED(block != 0)) {
2257     if (!(flags & RPMALLOC_NO_PRESERVE) && ptr) {
2258       if (!oldsize)
2259         oldsize = usablesize;
2260       memcpy(block, ptr, oldsize < size ? oldsize : size);
2261     }
2262     _rpmalloc_deallocate(ptr);
2263   }
2264   return block;
2265 }
2266 
2267 
2268 ////////////
2269 ///
2270 /// Initialization, finalization and utility
2271 ///
2272 //////
2273 
2274 //! Get the usable size of the given block
2275 static size_t
_rpmalloc_usable_size(void * p)2276 _rpmalloc_usable_size(void *p) {
2277   //Grab the span using guaranteed span alignment
2278   span_t *span = (span_t *)((uintptr_t)p & _memory_span_mask);
2279   if (span->size_class < SIZE_CLASS_COUNT) {
2280     //Small/medium block
2281     void *blocks_start = pointer_offset(span, SPAN_HEADER_SIZE);
2282     fprintf(stderr, "RPV %p, %p %p %d\n", span, blocks_start, p, span->block_size);
2283     return span->block_size - ((size_t)pointer_diff(p, blocks_start) % span->block_size);
2284   }
2285   if (span->size_class == SIZE_CLASS_LARGE) {
2286     //Large block
2287     size_t current_spans = span->span_count;
2288     return (current_spans * _memory_span_size) - (size_t)pointer_diff(p, span);
2289   }
2290   //Oversized block, page count is stored in span_count
2291   size_t current_pages = span->span_count;
2292   return (current_pages * _memory_page_size) - (size_t)pointer_diff(p, span);
2293 }
2294 
2295 //! Adjust and optimize the size class properties for the given class
2296 static void
_rpmalloc_adjust_size_class(size_t iclass)2297 _rpmalloc_adjust_size_class(size_t iclass) {
2298   size_t block_size = _memory_size_class[iclass].block_size;
2299   size_t block_count = (_memory_span_size - SPAN_HEADER_SIZE) / block_size;
2300 
2301   _memory_size_class[iclass].block_count = (uint16_t)block_count;
2302   _memory_size_class[iclass].class_idx = (uint16_t)iclass;
2303 
2304   //Check if previous size classes can be merged
2305   size_t prevclass = iclass;
2306   while (prevclass > 0) {
2307     --prevclass;
2308     //A class can be merged if number of pages and number of blocks are equal
2309     if (_memory_size_class[prevclass].block_count == _memory_size_class[iclass].block_count)
2310       memcpy(_memory_size_class + prevclass, _memory_size_class + iclass, sizeof(_memory_size_class[iclass]));
2311     else
2312       break;
2313   }
2314 }
2315 
2316 //! Initialize the allocator and setup global data
2317 extern inline int
rpmalloc_initialize(void)2318 rpmalloc_initialize(void) {
2319   if (_rpmalloc_initialized) {
2320     rpmalloc_thread_initialize();
2321     return 0;
2322   }
2323   return rpmalloc_initialize_config(0);
2324 }
2325 
2326 int
rpmalloc_initialize_config(const rpmalloc_config_t * config)2327 rpmalloc_initialize_config(const rpmalloc_config_t *config) {
2328   if (_rpmalloc_initialized) {
2329     rpmalloc_thread_initialize();
2330     return 0;
2331   }
2332   _rpmalloc_initialized = 1;
2333 
2334   if (config)
2335     memcpy(&_memory_config, config, sizeof(rpmalloc_config_t));
2336   else
2337     memset(&_memory_config, 0, sizeof(rpmalloc_config_t));
2338 
2339   if (!_memory_config.memory_map || !_memory_config.memory_unmap) {
2340     _memory_config.memory_map = _rpmalloc_mmap_os;
2341     _memory_config.memory_unmap = _rpmalloc_unmap_os;
2342   }
2343 
2344 #if RPMALLOC_CONFIGURABLE
2345   _memory_page_size = _memory_config.page_size;
2346 #else
2347   _memory_page_size = 0;
2348 #endif
2349   _memory_huge_pages = 0;
2350   _memory_map_granularity = _memory_page_size;
2351   if (!_memory_page_size) {
2352 #if PLATFORM_WINDOWS
2353     SYSTEM_INFO system_info;
2354     memset(&system_info, 0, sizeof(system_info));
2355     GetSystemInfo(&system_info);
2356     _memory_page_size = system_info.dwPageSize;
2357     _memory_map_granularity = system_info.dwAllocationGranularity;
2358 #else
2359     _memory_page_size = (size_t)sysconf(_SC_PAGESIZE);
2360     _memory_map_granularity = _memory_page_size;
2361     if (_memory_config.enable_huge_pages) {
2362 #if defined(__linux__)
2363       size_t huge_page_size = 0;
2364       FILE *meminfo = fopen("/proc/meminfo", "r");
2365       if (meminfo) {
2366         char line[128];
2367         while (!huge_page_size && fgets(line, sizeof(line) - 1, meminfo)) {
2368           line[sizeof(line) - 1] = 0;
2369           if (strstr(line, "Hugepagesize:"))
2370             huge_page_size = (size_t)strtol(line + 13, 0, 10) * 1024;
2371         }
2372         fclose(meminfo);
2373       }
2374       if (huge_page_size) {
2375         _memory_huge_pages = 1;
2376         _memory_page_size = huge_page_size;
2377         _memory_map_granularity = huge_page_size;
2378       }
2379 #elif defined(__FreeBSD__)
2380       int rc;
2381       size_t sz = sizeof(rc);
2382 
2383       if (sysctlbyname("vm.pmap.pg_ps_enabled", &rc, &sz, NULL, 0) == 0 && rc == 1) {
2384         _memory_huge_pages = 1;
2385         _memory_page_size = 2 * 1024 * 1024;
2386         _memory_map_granularity = _memory_page_size;
2387       }
2388 #elif defined(__APPLE__)
2389       _memory_huge_pages = 1;
2390       _memory_page_size = 2 * 1024 * 1024;
2391       _memory_map_granularity = _memory_page_size;
2392 #endif
2393     }
2394 #endif
2395   } else {
2396     if (_memory_config.enable_huge_pages)
2397       _memory_huge_pages = 1;
2398   }
2399 
2400 #if PLATFORM_WINDOWS
2401   if (_memory_config.enable_huge_pages) {
2402     HANDLE token = 0;
2403     size_t large_page_minimum = GetLargePageMinimum();
2404     if (large_page_minimum)
2405       OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token);
2406     if (token) {
2407       LUID luid;
2408       if (LookupPrivilegeValue(0, SE_LOCK_MEMORY_NAME, &luid)) {
2409         TOKEN_PRIVILEGES token_privileges;
2410         memset(&token_privileges, 0, sizeof(token_privileges));
2411         token_privileges.PrivilegeCount = 1;
2412         token_privileges.Privileges[0].Luid = luid;
2413         token_privileges.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
2414         if (AdjustTokenPrivileges(token, FALSE, &token_privileges, 0, 0, 0)) {
2415           DWORD err = GetLastError();
2416           if (err == ERROR_SUCCESS) {
2417             _memory_huge_pages = 1;
2418             if (large_page_minimum > _memory_page_size)
2419               _memory_page_size = large_page_minimum;
2420             if (large_page_minimum > _memory_map_granularity)
2421               _memory_map_granularity = large_page_minimum;
2422           }
2423         }
2424       }
2425       CloseHandle(token);
2426     }
2427   }
2428 #endif
2429 
2430   //The ABA counter in heap orphan list is tied to using HEAP_ORPHAN_ABA_SIZE
2431   size_t min_span_size = HEAP_ORPHAN_ABA_SIZE;
2432   size_t max_page_size;
2433 #if UINTPTR_MAX > 0xFFFFFFFF
2434   max_page_size = 4096ULL * 1024ULL * 1024ULL;
2435 #else
2436   max_page_size = 4 * 1024 * 1024;
2437 #endif
2438   if (_memory_page_size < min_span_size)
2439     _memory_page_size = min_span_size;
2440   if (_memory_page_size > max_page_size)
2441     _memory_page_size = max_page_size;
2442   _memory_page_size_shift = 0;
2443   size_t page_size_bit = _memory_page_size;
2444   while (page_size_bit != 1) {
2445     ++_memory_page_size_shift;
2446     page_size_bit >>= 1;
2447   }
2448   _memory_page_size = ((size_t)1 << _memory_page_size_shift);
2449 
2450 #if RPMALLOC_CONFIGURABLE
2451   size_t span_size = _memory_config.span_size;
2452   if (!span_size)
2453     span_size = (64 * 1024);
2454   if (span_size > (256 * 1024))
2455     span_size = (256 * 1024);
2456   _memory_span_size = 4096;
2457   _memory_span_size_shift = 12;
2458   while (_memory_span_size < span_size) {
2459     _memory_span_size <<= 1;
2460     ++_memory_span_size_shift;
2461   }
2462   _memory_span_mask = ~(uintptr_t)(_memory_span_size - 1);
2463 #endif
2464 
2465   _memory_span_map_count = (_memory_config.span_map_count ? _memory_config.span_map_count : DEFAULT_SPAN_MAP_COUNT);
2466   if ((_memory_span_size * _memory_span_map_count) < _memory_page_size)
2467     _memory_span_map_count = (_memory_page_size / _memory_span_size);
2468   if ((_memory_page_size >= _memory_span_size) && ((_memory_span_map_count * _memory_span_size) % _memory_page_size))
2469     _memory_span_map_count = (_memory_page_size / _memory_span_size);
2470 
2471   _memory_config.page_size = _memory_page_size;
2472   _memory_config.span_size = _memory_span_size;
2473   _memory_config.span_map_count = _memory_span_map_count;
2474   _memory_config.enable_huge_pages = _memory_huge_pages;
2475 
2476   _memory_span_release_count = (_memory_span_map_count > 4 ? ((_memory_span_map_count < 64) ? _memory_span_map_count : 64) : 4);
2477   _memory_span_release_count_large = (_memory_span_release_count > 8 ? (_memory_span_release_count / 4) : 2);
2478 
2479 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2480   if (pthread_key_create(&_memory_thread_heap, _memory_heap_release_raw))
2481     return -1;
2482 #endif
2483 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2484   fls_key = FlsAlloc(&_rpmalloc_thread_destructor);
2485 #endif
2486 
2487   //Setup all small and medium size classes
2488   size_t iclass = 0;
2489   _memory_size_class[iclass].block_size = SMALL_GRANULARITY;
2490   _rpmalloc_adjust_size_class(iclass);
2491   for (iclass = 1; iclass < SMALL_CLASS_COUNT; ++iclass) {
2492     size_t size = iclass * SMALL_GRANULARITY;
2493     _memory_size_class[iclass].block_size = (uint32_t)size;
2494     _rpmalloc_adjust_size_class(iclass);
2495   }
2496   //At least two blocks per span, then fall back to large allocations
2497   _memory_medium_size_limit = (_memory_span_size - SPAN_HEADER_SIZE) >> 1;
2498   if (_memory_medium_size_limit > MEDIUM_SIZE_LIMIT)
2499     _memory_medium_size_limit = MEDIUM_SIZE_LIMIT;
2500   for (iclass = 0; iclass < MEDIUM_CLASS_COUNT; ++iclass) {
2501     size_t size = SMALL_SIZE_LIMIT + ((iclass + 1) * MEDIUM_GRANULARITY);
2502     if (size > _memory_medium_size_limit)
2503       break;
2504     _memory_size_class[SMALL_CLASS_COUNT + iclass].block_size = (uint32_t)size;
2505     _rpmalloc_adjust_size_class(SMALL_CLASS_COUNT + iclass);
2506   }
2507 
2508   atomic_store_ptr(&_memory_orphan_heaps, 0);
2509 #if RPMALLOC_FIRST_CLASS_HEAPS
2510   atomic_store_ptr(&_memory_first_class_orphan_heaps, 0);
2511 #endif
2512   for (size_t ilist = 0, lsize = (sizeof(_memory_heaps) / sizeof(_memory_heaps[0])); ilist < lsize; ++ilist)
2513     atomic_store_ptr(&_memory_heaps[ilist], 0);
2514 
2515   //Initialize this thread
2516   rpmalloc_thread_initialize();
2517   return 0;
2518 }
2519 
2520 //! Finalize the allocator
2521 void
rpmalloc_finalize(void)2522 rpmalloc_finalize(void) {
2523   rpmalloc_thread_finalize();
2524   //rpmalloc_dump_statistics(stderr);
2525 
2526   //Free all thread caches and fully free spans
2527   for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2528     heap_t *heap = (heap_t *)atomic_load_ptr(&_memory_heaps[list_idx]);
2529     while (heap) {
2530       heap_t *next_heap = heap->next_heap;
2531       heap->finalize = 1;
2532       _rpmalloc_heap_global_finalize(heap);
2533       heap = next_heap;
2534     }
2535   }
2536 
2537 #if ENABLE_GLOBAL_CACHE
2538   //Free global caches
2539   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass)
2540     _rpmalloc_global_cache_finalize(&_memory_span_cache[iclass]);
2541 #endif
2542 
2543 #if (defined(__APPLE__) || defined(__HAIKU__)) && ENABLE_PRELOAD
2544   pthread_key_delete(_memory_thread_heap);
2545 #endif
2546 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2547   FlsFree(fls_key);
2548   fls_key = 0;
2549 #endif
2550 #if ENABLE_STATISTICS
2551   //If you hit these asserts you probably have memory leaks (perhaps global scope data doing dynamic allocations) or double frees in your code
2552   assert(!atomic_load32(&_mapped_pages));
2553   assert(!atomic_load32(&_reserved_spans));
2554   assert(!atomic_load32(&_mapped_pages_os));
2555 #endif
2556 
2557   _rpmalloc_initialized = 0;
2558 }
2559 
2560 //! Initialize thread, assign heap
2561 extern inline void
rpmalloc_thread_initialize(void)2562 rpmalloc_thread_initialize(void) {
2563   if (!get_thread_heap_raw()) {
2564     heap_t *heap = _rpmalloc_heap_allocate(0);
2565     if (heap) {
2566       _rpmalloc_stat_inc(&_memory_active_heaps);
2567       set_thread_heap(heap);
2568 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2569       FlsSetValue(fls_key, heap);
2570 #endif
2571     }
2572   }
2573 }
2574 
2575 //! Finalize thread, orphan heap
2576 void
rpmalloc_thread_finalize(void)2577 rpmalloc_thread_finalize(void) {
2578   heap_t *heap = get_thread_heap_raw();
2579   if (heap)
2580     _rpmalloc_heap_release_raw(heap);
2581   set_thread_heap(0);
2582 #if defined(_WIN32) && (!defined(BUILD_DYNAMIC_LINK) || !BUILD_DYNAMIC_LINK)
2583   FlsSetValue(fls_key, 0);
2584 #endif
2585 }
2586 
2587 int
rpmalloc_is_thread_initialized(void)2588 rpmalloc_is_thread_initialized(void) {
2589   return (get_thread_heap_raw() != 0) ? 1 : 0;
2590 }
2591 
2592 const rpmalloc_config_t *
rpmalloc_config(void)2593 rpmalloc_config(void) {
2594   return &_memory_config;
2595 }
2596 
2597 // Extern interface
2598 
2599 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc(size_t size)2600 rpmalloc(size_t size) {
2601 #if ENABLE_VALIDATE_ARGS
2602   if (size >= MAX_ALLOC_SIZE) {
2603     errno = EINVAL;
2604     return 0;
2605   }
2606 #endif
2607   heap_t *heap = get_thread_heap();
2608   return _rpmalloc_allocate(heap, size);
2609 }
2610 
2611 extern inline void
rpfree(void * ptr)2612 rpfree(void *ptr) {
2613   _rpmalloc_deallocate(ptr);
2614 }
2615 
2616 extern inline RPMALLOC_ALLOCATOR void *
rpcalloc(size_t num,size_t size)2617 rpcalloc(size_t num, size_t size) {
2618   size_t total;
2619 #if ENABLE_VALIDATE_ARGS
2620 #if PLATFORM_WINDOWS
2621   int err = SizeTMult(num, size, &total);
2622   if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2623     errno = EINVAL;
2624     return 0;
2625   }
2626 #else
2627   int err = __builtin_umull_overflow(num, size, &total);
2628   if (err || (total >= MAX_ALLOC_SIZE)) {
2629     errno = EINVAL;
2630     return 0;
2631   }
2632 #endif
2633 #else
2634   total = num * size;
2635 #endif
2636   heap_t *heap = get_thread_heap();
2637   void *block = _rpmalloc_allocate(heap, total);
2638   if (block)
2639     memset(block, 0, total);
2640   return block;
2641 }
2642 
2643 extern inline RPMALLOC_ALLOCATOR void *
rprealloc(void * ptr,size_t size)2644 rprealloc(void *ptr, size_t size) {
2645 #if ENABLE_VALIDATE_ARGS
2646   if (size >= MAX_ALLOC_SIZE) {
2647     errno = EINVAL;
2648     return ptr;
2649   }
2650 #endif
2651   heap_t *heap = get_thread_heap();
2652   return _rpmalloc_reallocate(heap, ptr, size, 0, 0);
2653 }
2654 
2655 extern RPMALLOC_ALLOCATOR void *
rpaligned_realloc(void * ptr,size_t alignment,size_t size,size_t oldsize,unsigned int flags)2656 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
2657                   unsigned int flags) {
2658 #if ENABLE_VALIDATE_ARGS
2659   if ((size + alignment < size) || (alignment > _memory_page_size)) {
2660     errno = EINVAL;
2661     return 0;
2662   }
2663 #endif
2664   heap_t *heap = get_thread_heap();
2665   return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, oldsize, flags);
2666 }
2667 
2668 extern RPMALLOC_ALLOCATOR void *
rpaligned_alloc(size_t alignment,size_t size)2669 rpaligned_alloc(size_t alignment, size_t size) {
2670   heap_t *heap = get_thread_heap();
2671   return _rpmalloc_aligned_allocate(heap, alignment, size);
2672 }
2673 
2674 extern inline RPMALLOC_ALLOCATOR void *
rpaligned_calloc(size_t alignment,size_t num,size_t size)2675 rpaligned_calloc(size_t alignment, size_t num, size_t size) {
2676   size_t total;
2677 #if ENABLE_VALIDATE_ARGS
2678 #if PLATFORM_WINDOWS
2679   int err = SizeTMult(num, size, &total);
2680   if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2681     errno = EINVAL;
2682     return 0;
2683   }
2684 #else
2685   int err = __builtin_umull_overflow(num, size, &total);
2686   if (err || (total >= MAX_ALLOC_SIZE)) {
2687     errno = EINVAL;
2688     return 0;
2689   }
2690 #endif
2691 #else
2692   total = num * size;
2693 #endif
2694   void *block = rpaligned_alloc(alignment, total);
2695   if (block)
2696     memset(block, 0, total);
2697   return block;
2698 }
2699 
2700 extern inline RPMALLOC_ALLOCATOR void *
rpmemalign(size_t alignment,size_t size)2701 rpmemalign(size_t alignment, size_t size) {
2702   return rpaligned_alloc(alignment, size);
2703 }
2704 
2705 extern inline int
rpposix_memalign(void ** memptr,size_t alignment,size_t size)2706 rpposix_memalign(void **memptr, size_t alignment, size_t size) {
2707   if (memptr)
2708     *memptr = rpaligned_alloc(alignment, size);
2709   else
2710     return EINVAL;
2711   return *memptr ? 0 : ENOMEM;
2712 }
2713 
2714 extern inline size_t
rpmalloc_usable_size(void * ptr)2715 rpmalloc_usable_size(void *ptr) {
2716   return (ptr ? _rpmalloc_usable_size(ptr) : 0);
2717 }
2718 
2719 extern inline void
rpmalloc_thread_collect(void)2720 rpmalloc_thread_collect(void) {
2721 }
2722 
2723 void
rpmalloc_thread_statistics(rpmalloc_thread_statistics_t * stats)2724 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats) {
2725   memset(stats, 0, sizeof(rpmalloc_thread_statistics_t));
2726   heap_t *heap = get_thread_heap_raw();
2727   if (!heap)
2728     return;
2729 
2730   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2731     size_class_t *size_class = _memory_size_class + iclass;
2732     span_t *span = heap->partial_span[iclass];
2733     while (span) {
2734       size_t free_count = span->list_size;
2735       size_t block_count = size_class->block_count;
2736       if (span->free_list_limit < block_count)
2737         block_count = span->free_list_limit;
2738       free_count += (block_count - span->used_count);
2739       stats->sizecache = free_count * size_class->block_size;
2740       span = span->next;
2741     }
2742   }
2743 
2744 #if ENABLE_THREAD_CACHE
2745   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2746     if (heap->span_cache[iclass])
2747       stats->spancache = (size_t)heap->span_cache[iclass]->list_size * (iclass + 1) * _memory_span_size;
2748   }
2749 #endif
2750 
2751   span_t *deferred = (span_t *)atomic_load_ptr(&heap->span_free_deferred);
2752   while (deferred) {
2753     if (deferred->size_class != SIZE_CLASS_HUGE)
2754       stats->spancache = (size_t)deferred->span_count * _memory_span_size;
2755     deferred = (span_t *)deferred->free_list;
2756   }
2757 
2758 #if ENABLE_STATISTICS
2759   stats->thread_to_global = (size_t)atomic_load64(&heap->thread_to_global);
2760   stats->global_to_thread = (size_t)atomic_load64(&heap->global_to_thread);
2761 
2762   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2763     stats->span_use[iclass].current = (size_t)atomic_load32(&heap->span_use[iclass].current);
2764     stats->span_use[iclass].peak = (size_t)atomic_load32(&heap->span_use[iclass].high);
2765     stats->span_use[iclass].to_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_global);
2766     stats->span_use[iclass].from_global = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_global);
2767     stats->span_use[iclass].to_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache);
2768     stats->span_use[iclass].from_cache = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache);
2769     stats->span_use[iclass].to_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved);
2770     stats->span_use[iclass].from_reserved = (size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved);
2771     stats->span_use[iclass].map_calls = (size_t)atomic_load32(&heap->span_use[iclass].spans_map_calls);
2772   }
2773   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2774     stats->size_use[iclass].alloc_current = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_current);
2775     stats->size_use[iclass].alloc_peak = (size_t)heap->size_class_use[iclass].alloc_peak;
2776     stats->size_use[iclass].alloc_total = (size_t)atomic_load32(&heap->size_class_use[iclass].alloc_total);
2777     stats->size_use[iclass].free_total = (size_t)atomic_load32(&heap->size_class_use[iclass].free_total);
2778     stats->size_use[iclass].spans_to_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache);
2779     stats->size_use[iclass].spans_from_cache = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache);
2780     stats->size_use[iclass].spans_from_reserved = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved);
2781     stats->size_use[iclass].map_calls = (size_t)atomic_load32(&heap->size_class_use[iclass].spans_map_calls);
2782   }
2783 #endif
2784 }
2785 
2786 void
rpmalloc_global_statistics(rpmalloc_global_statistics_t * stats)2787 rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats) {
2788   memset(stats, 0, sizeof(rpmalloc_global_statistics_t));
2789 #if ENABLE_STATISTICS
2790   stats->mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2791   stats->mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2792   stats->mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2793   stats->unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2794   stats->huge_alloc = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2795   stats->huge_alloc_peak = (size_t)_huge_pages_peak * _memory_page_size;
2796 #endif
2797 #if ENABLE_GLOBAL_CACHE
2798   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2799     stats->cached += (size_t)atomic_load32(&_memory_span_cache[iclass].size) * (iclass + 1) * _memory_span_size;
2800   }
2801 #endif
2802 }
2803 
2804 #if ENABLE_STATISTICS
2805 
2806 static void
_memory_heap_dump_statistics(heap_t * heap,void * file)2807 _memory_heap_dump_statistics(heap_t *heap, void *file) {
2808   fprintf(file, "Heap %d stats:\n", heap->id);
2809   fprintf(file,
2810           "Class   CurAlloc  PeakAlloc   TotAlloc    TotFree  BlkSize BlkCount SpansCur SpansPeak  PeakAllocMiB  ToCacheMiB FromCacheMiB FromReserveMiB MmapCalls\n");
2811   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
2812     if (!atomic_load32(&heap->size_class_use[iclass].alloc_total))
2813       continue;
2814     fprintf(file, "%3u:  %10u %10u %10u %10u %8u %8u %8d %9d %13zu %11zu %12zu %14zu %9u\n", (uint32_t)iclass,
2815             atomic_load32(&heap->size_class_use[iclass].alloc_current),
2816             heap->size_class_use[iclass].alloc_peak,
2817             atomic_load32(&heap->size_class_use[iclass].alloc_total),
2818             atomic_load32(&heap->size_class_use[iclass].free_total),
2819             _memory_size_class[iclass].block_size,
2820             _memory_size_class[iclass].block_count,
2821             atomic_load32(&heap->size_class_use[iclass].spans_current),
2822             heap->size_class_use[iclass].spans_peak,
2823             ((size_t)heap->size_class_use[iclass].alloc_peak * (size_t)_memory_size_class[iclass].block_size) / (size_t)(1024 * 1024),
2824             ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_to_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2825             ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_cache) * _memory_span_size) / (size_t)(1024 * 1024),
2826             ((size_t)atomic_load32(&heap->size_class_use[iclass].spans_from_reserved) * _memory_span_size) / (size_t)(1024 * 1024),
2827             atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2828   }
2829   fprintf(file,
2830           "Spans  Current     Peak  PeakMiB  Cached  ToCacheMiB FromCacheMiB ToReserveMiB FromReserveMiB ToGlobalMiB FromGlobalMiB  MmapCalls\n");
2831   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
2832     if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2833       continue;
2834     fprintf(file, "%4u: %8d %8u %8zu %7u %11zu %12zu %12zu %14zu %11zu %13zu %10u\n", (uint32_t)(iclass + 1),
2835             atomic_load32(&heap->span_use[iclass].current),
2836             atomic_load32(&heap->span_use[iclass].high),
2837             ((size_t)atomic_load32(&heap->span_use[iclass].high) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(1024 * 1024),
2838 #if ENABLE_THREAD_CACHE
2839             heap->span_cache[iclass] ? heap->span_cache[iclass]->list_size : 0,
2840             ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2841             ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_cache) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2842 #else
2843             0, 0ULL, 0ULL,
2844 #endif
2845             ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2846             ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_reserved) * (iclass + 1) * _memory_span_size) / (size_t)(1024 * 1024),
2847             ((size_t)atomic_load32(&heap->span_use[iclass].spans_to_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2848               1024 * 1024),
2849             ((size_t)atomic_load32(&heap->span_use[iclass].spans_from_global) * (size_t)_memory_span_size * (iclass + 1)) / (size_t)(
2850               1024 * 1024),
2851             atomic_load32(&heap->span_use[iclass].spans_map_calls));
2852   }
2853   fprintf(file, "ThreadToGlobalMiB GlobalToThreadMiB\n");
2854   fprintf(file, "%17zu %17zu\n", (size_t)atomic_load64(&heap->thread_to_global) / (size_t)(1024 * 1024),
2855           (size_t)atomic_load64(&heap->global_to_thread) / (size_t)(1024 * 1024));
2856 }
2857 
2858 #endif
2859 
2860 void
rpmalloc_dump_statistics(void * file)2861 rpmalloc_dump_statistics(void *file) {
2862 #if ENABLE_STATISTICS
2863   //If you hit this assert, you still have active threads or forgot to finalize some thread(s)
2864   assert(atomic_load32(&_memory_active_heaps) == 0);
2865   for (size_t list_idx = 0; list_idx < HEAP_ARRAY_SIZE; ++list_idx) {
2866     heap_t *heap = atomic_load_ptr(&_memory_heaps[list_idx]);
2867     while (heap) {
2868       int need_dump = 0;
2869       for (size_t iclass = 0; !need_dump && (iclass < SIZE_CLASS_COUNT); ++iclass) {
2870         if (!atomic_load32(&heap->size_class_use[iclass].alloc_total)) {
2871           assert(!atomic_load32(&heap->size_class_use[iclass].free_total));
2872           assert(!atomic_load32(&heap->size_class_use[iclass].spans_map_calls));
2873           continue;
2874         }
2875         need_dump = 1;
2876       }
2877       for (size_t iclass = 0; !need_dump && (iclass < LARGE_CLASS_COUNT); ++iclass) {
2878         if (!atomic_load32(&heap->span_use[iclass].high) && !atomic_load32(&heap->span_use[iclass].spans_map_calls))
2879           continue;
2880         need_dump = 1;
2881       }
2882       if (need_dump)
2883         _memory_heap_dump_statistics(heap, file);
2884       heap = heap->next_heap;
2885     }
2886   }
2887   fprintf(file, "Global stats:\n");
2888   size_t huge_current = (size_t)atomic_load32(&_huge_pages_current) * _memory_page_size;
2889   size_t huge_peak = (size_t)_huge_pages_peak * _memory_page_size;
2890   fprintf(file, "HugeCurrentMiB HugePeakMiB\n");
2891   fprintf(file, "%14zu %11zu\n", huge_current / (size_t)(1024 * 1024), huge_peak / (size_t)(1024 * 1024));
2892 
2893   size_t mapped = (size_t)atomic_load32(&_mapped_pages) * _memory_page_size;
2894   size_t mapped_os = (size_t)atomic_load32(&_mapped_pages_os) * _memory_page_size;
2895   size_t mapped_peak = (size_t)_mapped_pages_peak * _memory_page_size;
2896   size_t mapped_total = (size_t)atomic_load32(&_mapped_total) * _memory_page_size;
2897   size_t unmapped_total = (size_t)atomic_load32(&_unmapped_total) * _memory_page_size;
2898   size_t reserved_total = (size_t)atomic_load32(&_reserved_spans) * _memory_span_size;
2899   fprintf(file, "MappedMiB MappedOSMiB MappedPeakMiB MappedTotalMiB UnmappedTotalMiB ReservedTotalMiB\n");
2900   fprintf(file, "%9zu %11zu %13zu %14zu %16zu %16zu\n",
2901           mapped / (size_t)(1024 * 1024),
2902           mapped_os / (size_t)(1024 * 1024),
2903           mapped_peak / (size_t)(1024 * 1024),
2904           mapped_total / (size_t)(1024 * 1024),
2905           unmapped_total / (size_t)(1024 * 1024),
2906           reserved_total / (size_t)(1024 * 1024));
2907 
2908   fprintf(file, "\n");
2909 #else
2910   (void)sizeof(file);
2911 #endif
2912 }
2913 
2914 #if RPMALLOC_FIRST_CLASS_HEAPS
2915 
2916 extern inline rpmalloc_heap_t *
rpmalloc_heap_acquire(void)2917 rpmalloc_heap_acquire(void) {
2918   // Must be a pristine heap from newly mapped memory pages, or else memory blocks
2919   // could already be allocated from the heap which would (wrongly) be released when
2920   // heap is cleared with rpmalloc_heap_free_all()
2921   heap_t *heap = _rpmalloc_heap_allocate(1);
2922   _rpmalloc_stat_inc(&_memory_active_heaps);
2923   return heap;
2924 }
2925 
2926 extern inline void
rpmalloc_heap_release(rpmalloc_heap_t * heap)2927 rpmalloc_heap_release(rpmalloc_heap_t *heap) {
2928   if (heap)
2929     _rpmalloc_heap_release(heap, 1);
2930 }
2931 
2932 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_alloc(rpmalloc_heap_t * heap,size_t size)2933 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) {
2934 #if ENABLE_VALIDATE_ARGS
2935   if (size >= MAX_ALLOC_SIZE) {
2936     errno = EINVAL;
2937     return ptr;
2938   }
2939 #endif
2940   return _rpmalloc_allocate(heap, size);
2941 }
2942 
2943 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_alloc(rpmalloc_heap_t * heap,size_t alignment,size_t size)2944 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment, size_t size) {
2945 #if ENABLE_VALIDATE_ARGS
2946   if (size >= MAX_ALLOC_SIZE) {
2947     errno = EINVAL;
2948     return ptr;
2949   }
2950 #endif
2951   return _rpmalloc_aligned_allocate(heap, alignment, size);
2952 }
2953 
2954 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_calloc(rpmalloc_heap_t * heap,size_t num,size_t size)2955 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) {
2956   return rpmalloc_heap_aligned_calloc(heap, 0, num, size);
2957 }
2958 
2959 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_calloc(rpmalloc_heap_t * heap,size_t alignment,size_t num,size_t size)2960 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment, size_t num, size_t size) {
2961   size_t total;
2962 #if ENABLE_VALIDATE_ARGS
2963 #if PLATFORM_WINDOWS
2964   int err = SizeTMult(num, size, &total);
2965   if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
2966     errno = EINVAL;
2967     return 0;
2968   }
2969 #else
2970   int err = __builtin_umull_overflow(num, size, &total);
2971   if (err || (total >= MAX_ALLOC_SIZE)) {
2972     errno = EINVAL;
2973     return 0;
2974   }
2975 #endif
2976 #else
2977   total = num * size;
2978 #endif
2979   void *block = _rpmalloc_aligned_allocate(heap, alignment, total);
2980   if (block)
2981     memset(block, 0, total);
2982   return block;
2983 }
2984 
2985 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_realloc(rpmalloc_heap_t * heap,void * ptr,size_t size,unsigned int flags)2986 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size, unsigned int flags) {
2987 #if ENABLE_VALIDATE_ARGS
2988   if (size >= MAX_ALLOC_SIZE) {
2989     errno = EINVAL;
2990     return ptr;
2991   }
2992 #endif
2993   return _rpmalloc_reallocate(heap, ptr, size, 0, flags);
2994 }
2995 
2996 extern inline RPMALLOC_ALLOCATOR void *
rpmalloc_heap_aligned_realloc(rpmalloc_heap_t * heap,void * ptr,size_t alignment,size_t size,unsigned int flags)2997 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size, unsigned int flags) {
2998 #if ENABLE_VALIDATE_ARGS
2999   if ((size + alignment < size) || (alignment > _memory_page_size)) {
3000     errno = EINVAL;
3001     return 0;
3002   }
3003 #endif
3004   return _rpmalloc_aligned_reallocate(heap, ptr, alignment, size, 0, flags);
3005 }
3006 
3007 extern inline void
rpmalloc_heap_free(rpmalloc_heap_t * heap,void * ptr)3008 rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr) {
3009   (void)sizeof(heap);
3010   _rpmalloc_deallocate(ptr);
3011 }
3012 
3013 extern inline void
rpmalloc_heap_free_all(rpmalloc_heap_t * heap)3014 rpmalloc_heap_free_all(rpmalloc_heap_t *heap) {
3015   span_t *span;
3016   span_t *next_span;
3017 
3018   _rpmalloc_heap_cache_adopt_deferred(heap, 0);
3019 
3020   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3021     span = heap->partial_span[iclass];
3022     while (span) {
3023       next_span = span->next;
3024       _rpmalloc_heap_cache_insert(heap, span);
3025       span = next_span;
3026     }
3027     heap->partial_span[iclass] = 0;
3028     span = heap->full_span[iclass];
3029     while (span) {
3030       next_span = span->next;
3031       _rpmalloc_heap_cache_insert(heap, span);
3032       span = next_span;
3033     }
3034   }
3035   memset(heap->free_list, 0, sizeof(heap->free_list));
3036   memset(heap->partial_span, 0, sizeof(heap->partial_span));
3037   memset(heap->full_span, 0, sizeof(heap->full_span));
3038 
3039   span = heap->large_huge_span;
3040   while (span) {
3041     next_span = span->next;
3042     if (UNEXPECTED(span->size_class == SIZE_CLASS_HUGE))
3043       _rpmalloc_deallocate_huge(span);
3044     else
3045       _rpmalloc_heap_cache_insert(heap, span);
3046     span = next_span;
3047   }
3048   heap->large_huge_span = 0;
3049   heap->full_span_count = 0;
3050 
3051 #if ENABLE_THREAD_CACHE
3052   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3053     span = heap->span_cache[iclass];
3054 #if ENABLE_GLOBAL_CACHE
3055     while (span) {
3056       assert(span->span_count == (iclass + 1));
3057       size_t release_count = (!iclass ? _memory_span_release_count : _memory_span_release_count_large);
3058       next_span = _rpmalloc_span_list_split(span, (uint32_t)release_count);
3059       _rpmalloc_stat_add64(&heap->thread_to_global, (size_t)span->list_size * span->span_count * _memory_span_size);
3060       _rpmalloc_stat_add(&heap->span_use[iclass].spans_to_global, span->list_size);
3061       _rpmalloc_global_cache_insert_span_list(span);
3062       span = next_span;
3063     }
3064 #else
3065     if (span)
3066       _rpmalloc_span_list_unmap_all(span);
3067 #endif
3068     heap->span_cache[iclass] = 0;
3069   }
3070 #endif
3071 
3072 #if ENABLE_STATISTICS
3073   for (size_t iclass = 0; iclass < SIZE_CLASS_COUNT; ++iclass) {
3074     atomic_store32(&heap->size_class_use[iclass].alloc_current, 0);
3075     atomic_store32(&heap->size_class_use[iclass].spans_current, 0);
3076   }
3077   for (size_t iclass = 0; iclass < LARGE_CLASS_COUNT; ++iclass) {
3078     atomic_store32(&heap->span_use[iclass].current, 0);
3079   }
3080 #endif
3081 }
3082 
3083 extern inline void
rpmalloc_heap_thread_set_current(rpmalloc_heap_t * heap)3084 rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap) {
3085   heap_t *prev_heap = get_thread_heap_raw();
3086   if (prev_heap != heap) {
3087     set_thread_heap(heap);
3088     if (prev_heap)
3089       rpmalloc_heap_release(prev_heap);
3090   }
3091 }
3092 
3093 #endif
3094 
3095 #if ENABLE_PRELOAD || ENABLE_OVERRIDE
3096 
3097 #include "malloc.c"
3098 
3099 #endif
3100