1 /* rpmalloc.h - Memory allocator - Public Domain - 2016 Mattias Jansson 2 3 This library provides a cross-platform lock free thread caching malloc implementation in C11. 4 The latest source code is always available at 5 6 https://github.com/mjansson/rpmalloc 7 8 This library is put in the public domain; you can redistribute it and/or modify it without any restrictions. 9 10 */ 11 12 #pragma once 13 14 #include <stddef.h> 15 16 #ifdef __cplusplus 17 extern "C" { 18 #endif 19 20 #if defined(__clang__) || defined(__GNUC__) 21 # define RPMALLOC_EXPORT __attribute__((visibility("default"))) 22 # define RPMALLOC_ALLOCATOR 23 # if (defined(__clang_major__) && (__clang_major__ < 4)) || (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD) 24 # define RPMALLOC_ATTRIB_MALLOC 25 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size) 26 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) 27 # else 28 # define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__)) 29 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size))) 30 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size) __attribute__((alloc_size(count, size))) 31 # endif 32 # define RPMALLOC_CDECL 33 #elif defined(_MSC_VER) 34 # define RPMALLOC_EXPORT 35 # define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict) 36 # define RPMALLOC_ATTRIB_MALLOC 37 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size) 38 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size) 39 # define RPMALLOC_CDECL __cdecl 40 #else 41 # define RPMALLOC_EXPORT 42 # define RPMALLOC_ALLOCATOR 43 # define RPMALLOC_ATTRIB_MALLOC 44 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size) 45 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size) 46 # define RPMALLOC_CDECL 47 #endif 48 49 //! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce 50 // a very small overhead due to some size calculations not being compile time constants 51 #ifndef RPMALLOC_CONFIGURABLE 52 #define RPMALLOC_CONFIGURABLE 0 53 #endif 54 55 //! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_* functions). 56 // Will introduce a very small overhead to track fully allocated spans in heaps 57 #ifndef RPMALLOC_FIRST_CLASS_HEAPS 58 #define RPMALLOC_FIRST_CLASS_HEAPS 0 59 #endif 60 61 //! Flag to rpaligned_realloc to not preserve content in reallocation 62 #define RPMALLOC_NO_PRESERVE 1 63 //! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place, 64 // in which case the original pointer is still valid (just like a call to realloc which failes to allocate 65 // a new block). 66 #define RPMALLOC_GROW_OR_FAIL 2 67 68 typedef struct rpmalloc_global_statistics_t { 69 //! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1) 70 size_t mapped; 71 //! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1) 72 size_t mapped_peak; 73 //! Current amount of memory in global caches for small and medium sizes (<32KiB) 74 size_t cached; 75 //! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1) 76 size_t huge_alloc; 77 //! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1) 78 size_t huge_alloc_peak; 79 //! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1) 80 size_t mapped_total; 81 //! Total amount of memory unmapped since initialization (only if ENABLE_STATISTICS=1) 82 size_t unmapped_total; 83 } rpmalloc_global_statistics_t; 84 85 typedef struct rpmalloc_thread_statistics_t { 86 //! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB) 87 size_t sizecache; 88 //! Current number of bytes available in thread span caches for small and medium sizes (<32KiB) 89 size_t spancache; 90 //! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1) 91 size_t thread_to_global; 92 //! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1) 93 size_t global_to_thread; 94 //! Per span count statistics (only if ENABLE_STATISTICS=1) 95 struct { 96 //! Currently used number of spans 97 size_t current; 98 //! High water mark of spans used 99 size_t peak; 100 //! Number of spans transitioned to global cache 101 size_t to_global; 102 //! Number of spans transitioned from global cache 103 size_t from_global; 104 //! Number of spans transitioned to thread cache 105 size_t to_cache; 106 //! Number of spans transitioned from thread cache 107 size_t from_cache; 108 //! Number of spans transitioned to reserved state 109 size_t to_reserved; 110 //! Number of spans transitioned from reserved state 111 size_t from_reserved; 112 //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls) 113 size_t map_calls; 114 } span_use[32]; 115 //! Per size class statistics (only if ENABLE_STATISTICS=1) 116 struct { 117 //! Current number of allocations 118 size_t alloc_current; 119 //! Peak number of allocations 120 size_t alloc_peak; 121 //! Total number of allocations 122 size_t alloc_total; 123 //! Total number of frees 124 size_t free_total; 125 //! Number of spans transitioned to cache 126 size_t spans_to_cache; 127 //! Number of spans transitioned from cache 128 size_t spans_from_cache; 129 //! Number of spans transitioned from reserved state 130 size_t spans_from_reserved; 131 //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls) 132 size_t map_calls; 133 } size_use[128]; 134 } rpmalloc_thread_statistics_t; 135 136 typedef struct rpmalloc_config_t { 137 //! Map memory pages for the given number of bytes. The returned address MUST be 138 // aligned to the rpmalloc span size, which will always be a power of two. 139 // Optionally the function can store an alignment offset in the offset variable 140 // in case it performs alignment and the returned pointer is offset from the 141 // actual start of the memory region due to this alignment. The alignment offset 142 // will be passed to the memory unmap function. The alignment offset MUST NOT be 143 // larger than 65535 (storable in an uint16_t), if it is you must use natural 144 // alignment to shift it into 16 bits. If you set a memory_map function, you 145 // must also set a memory_unmap function or else the default implementation will 146 // be used for both. 147 void *(*memory_map)(size_t size, size_t *offset); 148 //! Unmap the memory pages starting at address and spanning the given number of bytes. 149 // If release is set to non-zero, the unmap is for an entire span range as returned by 150 // a previous call to memory_map and that the entire range should be released. The 151 // release argument holds the size of the entire span range. If release is set to 0, 152 // the unmap is a partial decommit of a subset of the mapped memory range. 153 // If you set a memory_unmap function, you must also set a memory_map function or 154 // else the default implementation will be used for both. 155 void (*memory_unmap)(void *address, size_t size, size_t offset, size_t release); 156 //! Size of memory pages. The page size MUST be a power of two. All memory mapping 157 // requests to memory_map will be made with size set to a multiple of the page size. 158 // Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used. 159 size_t page_size; 160 //! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144] 161 // range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE 162 // is defined to 1. 163 size_t span_size; 164 //! Number of spans to map at each request to map new virtual memory blocks. This can 165 // be used to minimize the system call overhead at the cost of virtual memory address 166 // space. The extra mapped pages will not be written until actually used, so physical 167 // committed memory should not be affected in the default implementation. Will be 168 // aligned to a multiple of spans that match memory page size in case of huge pages. 169 size_t span_map_count; 170 //! Enable use of large/huge pages. If this flag is set to non-zero and page size is 171 // zero, the allocator will try to enable huge pages and auto detect the configuration. 172 // If this is set to non-zero and page_size is also non-zero, the allocator will 173 // assume huge pages have been configured and enabled prior to initializing the 174 // allocator. 175 // For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support 176 // For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt 177 int enable_huge_pages; 178 } rpmalloc_config_t; 179 180 //! Initialize allocator with default configuration 181 RPMALLOC_EXPORT int 182 rpmalloc_initialize(void); 183 184 //! Initialize allocator with given configuration 185 RPMALLOC_EXPORT int 186 rpmalloc_initialize_config(const rpmalloc_config_t *config); 187 188 //! Get allocator configuration 189 RPMALLOC_EXPORT const rpmalloc_config_t * 190 rpmalloc_config(void); 191 192 //! Finalize allocator 193 RPMALLOC_EXPORT void 194 rpmalloc_finalize(void); 195 196 //! Initialize allocator for calling thread 197 RPMALLOC_EXPORT void 198 rpmalloc_thread_initialize(void); 199 200 //! Finalize allocator for calling thread 201 RPMALLOC_EXPORT void 202 rpmalloc_thread_finalize(void); 203 204 //! Perform deferred deallocations pending for the calling thread heap 205 RPMALLOC_EXPORT void 206 rpmalloc_thread_collect(void); 207 208 //! Query if allocator is initialized for calling thread 209 RPMALLOC_EXPORT int 210 rpmalloc_is_thread_initialized(void); 211 212 //! Get per-thread statistics 213 RPMALLOC_EXPORT void 214 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats); 215 216 //! Get global statistics 217 RPMALLOC_EXPORT void 218 rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats); 219 220 //! Dump all statistics in human readable format to file (should be a FILE*) 221 RPMALLOC_EXPORT void 222 rpmalloc_dump_statistics(void *file); 223 224 //! Allocate a memory block of at least the given size 225 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 226 rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1); 227 228 //! Free the given memory block 229 RPMALLOC_EXPORT void 230 rpfree(void *ptr); 231 232 //! Allocate a memory block of at least the given size and zero initialize it 233 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 234 rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2); 235 236 //! Reallocate the given block to at least the given size 237 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 238 rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); 239 240 //! Reallocate the given block to at least the given size and alignment, 241 // with optional control flags (see RPMALLOC_NO_PRESERVE). 242 // Alignment must be a power of two and a multiple of sizeof(void*), 243 // and should ideally be less than memory page size. A caveat of rpmalloc 244 // internals is that this must also be strictly less than the span size (default 64KiB) 245 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 246 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize, 247 unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); 248 249 //! Allocate a memory block of at least the given size and alignment. 250 // Alignment must be a power of two and a multiple of sizeof(void*), 251 // and should ideally be less than memory page size. A caveat of rpmalloc 252 // internals is that this must also be strictly less than the span size (default 64KiB) 253 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 254 rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); 255 256 //! Allocate a memory block of at least the given size and alignment, and zero initialize it. 257 // Alignment must be a power of two and a multiple of sizeof(void*), 258 // and should ideally be less than memory page size. A caveat of rpmalloc 259 // internals is that this must also be strictly less than the span size (default 64KiB) 260 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 261 rpaligned_calloc(size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); 262 263 //! Allocate a memory block of at least the given size and alignment. 264 // Alignment must be a power of two and a multiple of sizeof(void*), 265 // and should ideally be less than memory page size. A caveat of rpmalloc 266 // internals is that this must also be strictly less than the span size (default 64KiB) 267 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 268 rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); 269 270 //! Allocate a memory block of at least the given size and alignment. 271 // Alignment must be a power of two and a multiple of sizeof(void*), 272 // and should ideally be less than memory page size. A caveat of rpmalloc 273 // internals is that this must also be strictly less than the span size (default 64KiB) 274 RPMALLOC_EXPORT int 275 rpposix_memalign(void **memptr, size_t alignment, size_t size); 276 277 //! Query the usable size of the given memory block (from given pointer to the end of block) 278 RPMALLOC_EXPORT size_t 279 rpmalloc_usable_size(void *ptr); 280 281 #if RPMALLOC_FIRST_CLASS_HEAPS 282 283 //! Heap type 284 typedef struct heap_t rpmalloc_heap_t; 285 286 //! Acquire a new heap. Will reuse existing released heaps or allocate memory for a new heap 287 // if none available. Heap API is imlemented with the strict assumption that only one single 288 // thread will call heap functions for a given heap at any given time, no functions are thread safe. 289 RPMALLOC_EXPORT rpmalloc_heap_t * 290 rpmalloc_heap_acquire(void); 291 292 //! Release a heap (does NOT free the memory allocated by the heap, use rpmalloc_heap_free_all before destroying the heap). 293 // Releasing a heap will enable it to be reused by other threads. Safe to pass a null pointer. 294 RPMALLOC_EXPORT void 295 rpmalloc_heap_release(rpmalloc_heap_t *heap); 296 297 //! Allocate a memory block of at least the given size using the given heap. 298 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 299 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2); 300 301 //! Allocate a memory block of at least the given size using the given heap. The returned 302 // block will have the requested alignment. Alignment must be a power of two and a multiple of sizeof(void*), 303 // and should ideally be less than memory page size. A caveat of rpmalloc 304 // internals is that this must also be strictly less than the span size (default 64KiB). 305 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 306 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment, 307 size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); 308 309 //! Allocate a memory block of at least the given size using the given heap and zero initialize it. 310 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 311 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); 312 313 //! Allocate a memory block of at least the given size using the given heap and zero initialize it. The returned 314 // block will have the requested alignment. Alignment must either be zero, or a power of two and a multiple of sizeof(void*), 315 // and should ideally be less than memory page size. A caveat of rpmalloc 316 // internals is that this must also be strictly less than the span size (default 64KiB). 317 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 318 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment, size_t num, 319 size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3); 320 321 //! Reallocate the given block to at least the given size. The memory block MUST be allocated 322 // by the same heap given to this function. 323 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 324 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size, 325 unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); 326 327 //! Reallocate the given block to at least the given size. The memory block MUST be allocated 328 // by the same heap given to this function. The returned block will have the requested alignment. 329 // Alignment must be either zero, or a power of two and a multiple of sizeof(void*), and should ideally be 330 // less than memory page size. A caveat of rpmalloc internals is that this must also be strictly less than 331 // the span size (default 64KiB). 332 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void * 333 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size, 334 unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3); 335 336 //! Free the given memory block from the given heap. The memory block MUST be allocated 337 // by the same heap given to this function. 338 RPMALLOC_EXPORT void 339 rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr); 340 341 //! Free all memory allocated by the heap 342 RPMALLOC_EXPORT void 343 rpmalloc_heap_free_all(rpmalloc_heap_t *heap); 344 345 //! Set the given heap as the current heap for the calling thread. A heap MUST only be current heap 346 // for a single thread, a heap can never be shared between multiple threads. The previous 347 // current heap for the calling thread is released to be reused by other threads. 348 RPMALLOC_EXPORT void 349 rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap); 350 351 #endif 352 353 #ifdef __cplusplus 354 } 355 #endif 356