1 /* rpmalloc.h  -  Memory allocator  -  Public Domain  -  2016 Mattias Jansson
2 
3    This library provides a cross-platform lock free thread caching malloc implementation in C11.
4    The latest source code is always available at
5 
6    https://github.com/mjansson/rpmalloc
7 
8    This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9 
10 */
11 
12 #pragma once
13 
14 #include <stddef.h>
15 
16 #ifdef __cplusplus
17 extern "C" {
18 #endif
19 
20 #if defined(__clang__) || defined(__GNUC__)
21 # define RPMALLOC_EXPORT __attribute__((visibility("default")))
22 # define RPMALLOC_ALLOCATOR
23 # if (defined(__clang_major__) && (__clang_major__ < 4)) || (defined(__GNUC__) && defined(ENABLE_PRELOAD) && ENABLE_PRELOAD)
24 # define RPMALLOC_ATTRIB_MALLOC
25 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
26 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)
27 # else
28 # define RPMALLOC_ATTRIB_MALLOC __attribute__((__malloc__))
29 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size) __attribute__((alloc_size(size)))
30 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count, size)  __attribute__((alloc_size(count, size)))
31 # endif
32 # define RPMALLOC_CDECL
33 #elif defined(_MSC_VER)
34 # define RPMALLOC_EXPORT
35 # define RPMALLOC_ALLOCATOR __declspec(allocator) __declspec(restrict)
36 # define RPMALLOC_ATTRIB_MALLOC
37 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
38 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
39 # define RPMALLOC_CDECL __cdecl
40 #else
41 # define RPMALLOC_EXPORT
42 # define RPMALLOC_ALLOCATOR
43 # define RPMALLOC_ATTRIB_MALLOC
44 # define RPMALLOC_ATTRIB_ALLOC_SIZE(size)
45 # define RPMALLOC_ATTRIB_ALLOC_SIZE2(count,size)
46 # define RPMALLOC_CDECL
47 #endif
48 
49 //! Define RPMALLOC_CONFIGURABLE to enable configuring sizes. Will introduce
50 //  a very small overhead due to some size calculations not being compile time constants
51 #ifndef RPMALLOC_CONFIGURABLE
52 #define RPMALLOC_CONFIGURABLE 0
53 #endif
54 
55 //! Define RPMALLOC_FIRST_CLASS_HEAPS to enable heap based API (rpmalloc_heap_* functions).
56 //  Will introduce a very small overhead to track fully allocated spans in heaps
57 #ifndef RPMALLOC_FIRST_CLASS_HEAPS
58 #define RPMALLOC_FIRST_CLASS_HEAPS 0
59 #endif
60 
61 //! Flag to rpaligned_realloc to not preserve content in reallocation
62 #define RPMALLOC_NO_PRESERVE    1
63 //! Flag to rpaligned_realloc to fail and return null pointer if grow cannot be done in-place,
64 //  in which case the original pointer is still valid (just like a call to realloc which failes to allocate
65 //  a new block).
66 #define RPMALLOC_GROW_OR_FAIL   2
67 
68 typedef struct rpmalloc_global_statistics_t {
69   //! Current amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
70   size_t mapped;
71   //! Peak amount of virtual memory mapped, all of which might not have been committed (only if ENABLE_STATISTICS=1)
72   size_t mapped_peak;
73   //! Current amount of memory in global caches for small and medium sizes (<32KiB)
74   size_t cached;
75   //! Current amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
76   size_t huge_alloc;
77   //! Peak amount of memory allocated in huge allocations, i.e larger than LARGE_SIZE_LIMIT which is 2MiB by default (only if ENABLE_STATISTICS=1)
78   size_t huge_alloc_peak;
79   //! Total amount of memory mapped since initialization (only if ENABLE_STATISTICS=1)
80   size_t mapped_total;
81   //! Total amount of memory unmapped since initialization  (only if ENABLE_STATISTICS=1)
82   size_t unmapped_total;
83 } rpmalloc_global_statistics_t;
84 
85 typedef struct rpmalloc_thread_statistics_t {
86   //! Current number of bytes available in thread size class caches for small and medium sizes (<32KiB)
87   size_t sizecache;
88   //! Current number of bytes available in thread span caches for small and medium sizes (<32KiB)
89   size_t spancache;
90   //! Total number of bytes transitioned from thread cache to global cache (only if ENABLE_STATISTICS=1)
91   size_t thread_to_global;
92   //! Total number of bytes transitioned from global cache to thread cache (only if ENABLE_STATISTICS=1)
93   size_t global_to_thread;
94   //! Per span count statistics (only if ENABLE_STATISTICS=1)
95   struct {
96     //! Currently used number of spans
97     size_t current;
98     //! High water mark of spans used
99     size_t peak;
100     //! Number of spans transitioned to global cache
101     size_t to_global;
102     //! Number of spans transitioned from global cache
103     size_t from_global;
104     //! Number of spans transitioned to thread cache
105     size_t to_cache;
106     //! Number of spans transitioned from thread cache
107     size_t from_cache;
108     //! Number of spans transitioned to reserved state
109     size_t to_reserved;
110     //! Number of spans transitioned from reserved state
111     size_t from_reserved;
112     //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
113     size_t map_calls;
114   } span_use[32];
115   //! Per size class statistics (only if ENABLE_STATISTICS=1)
116   struct {
117     //! Current number of allocations
118     size_t alloc_current;
119     //! Peak number of allocations
120     size_t alloc_peak;
121     //! Total number of allocations
122     size_t alloc_total;
123     //! Total number of frees
124     size_t free_total;
125     //! Number of spans transitioned to cache
126     size_t spans_to_cache;
127     //! Number of spans transitioned from cache
128     size_t spans_from_cache;
129     //! Number of spans transitioned from reserved state
130     size_t spans_from_reserved;
131     //! Number of raw memory map calls (not hitting the reserve spans but resulting in actual OS mmap calls)
132     size_t map_calls;
133   } size_use[128];
134 } rpmalloc_thread_statistics_t;
135 
136 typedef struct rpmalloc_config_t {
137   //! Map memory pages for the given number of bytes. The returned address MUST be
138   //  aligned to the rpmalloc span size, which will always be a power of two.
139   //  Optionally the function can store an alignment offset in the offset variable
140   //  in case it performs alignment and the returned pointer is offset from the
141   //  actual start of the memory region due to this alignment. The alignment offset
142   //  will be passed to the memory unmap function. The alignment offset MUST NOT be
143   //  larger than 65535 (storable in an uint16_t), if it is you must use natural
144   //  alignment to shift it into 16 bits. If you set a memory_map function, you
145   //  must also set a memory_unmap function or else the default implementation will
146   //  be used for both.
147   void *(*memory_map)(size_t size, size_t *offset);
148   //! Unmap the memory pages starting at address and spanning the given number of bytes.
149   //  If release is set to non-zero, the unmap is for an entire span range as returned by
150   //  a previous call to memory_map and that the entire range should be released. The
151   //  release argument holds the size of the entire span range. If release is set to 0,
152   //  the unmap is a partial decommit of a subset of the mapped memory range.
153   //  If you set a memory_unmap function, you must also set a memory_map function or
154   //  else the default implementation will be used for both.
155   void (*memory_unmap)(void *address, size_t size, size_t offset, size_t release);
156   //! Size of memory pages. The page size MUST be a power of two. All memory mapping
157   //  requests to memory_map will be made with size set to a multiple of the page size.
158   //  Used if RPMALLOC_CONFIGURABLE is defined to 1, otherwise system page size is used.
159   size_t page_size;
160   //! Size of a span of memory blocks. MUST be a power of two, and in [4096,262144]
161   //  range (unless 0 - set to 0 to use the default span size). Used if RPMALLOC_CONFIGURABLE
162   //  is defined to 1.
163   size_t span_size;
164   //! Number of spans to map at each request to map new virtual memory blocks. This can
165   //  be used to minimize the system call overhead at the cost of virtual memory address
166   //  space. The extra mapped pages will not be written until actually used, so physical
167   //  committed memory should not be affected in the default implementation. Will be
168   //  aligned to a multiple of spans that match memory page size in case of huge pages.
169   size_t span_map_count;
170   //! Enable use of large/huge pages. If this flag is set to non-zero and page size is
171   //  zero, the allocator will try to enable huge pages and auto detect the configuration.
172   //  If this is set to non-zero and page_size is also non-zero, the allocator will
173   //  assume huge pages have been configured and enabled prior to initializing the
174   //  allocator.
175   //  For Windows, see https://docs.microsoft.com/en-us/windows/desktop/memory/large-page-support
176   //  For Linux, see https://www.kernel.org/doc/Documentation/vm/hugetlbpage.txt
177   int enable_huge_pages;
178 } rpmalloc_config_t;
179 
180 //! Initialize allocator with default configuration
181 RPMALLOC_EXPORT int
182 rpmalloc_initialize(void);
183 
184 //! Initialize allocator with given configuration
185 RPMALLOC_EXPORT int
186 rpmalloc_initialize_config(const rpmalloc_config_t *config);
187 
188 //! Get allocator configuration
189 RPMALLOC_EXPORT const rpmalloc_config_t *
190 rpmalloc_config(void);
191 
192 //! Finalize allocator
193 RPMALLOC_EXPORT void
194 rpmalloc_finalize(void);
195 
196 //! Initialize allocator for calling thread
197 RPMALLOC_EXPORT void
198 rpmalloc_thread_initialize(void);
199 
200 //! Finalize allocator for calling thread
201 RPMALLOC_EXPORT void
202 rpmalloc_thread_finalize(void);
203 
204 //! Perform deferred deallocations pending for the calling thread heap
205 RPMALLOC_EXPORT void
206 rpmalloc_thread_collect(void);
207 
208 //! Query if allocator is initialized for calling thread
209 RPMALLOC_EXPORT int
210 rpmalloc_is_thread_initialized(void);
211 
212 //! Get per-thread statistics
213 RPMALLOC_EXPORT void
214 rpmalloc_thread_statistics(rpmalloc_thread_statistics_t *stats);
215 
216 //! Get global statistics
217 RPMALLOC_EXPORT void
218 rpmalloc_global_statistics(rpmalloc_global_statistics_t *stats);
219 
220 //! Dump all statistics in human readable format to file (should be a FILE*)
221 RPMALLOC_EXPORT void
222 rpmalloc_dump_statistics(void *file);
223 
224 //! Allocate a memory block of at least the given size
225 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
226 rpmalloc(size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(1);
227 
228 //! Free the given memory block
229 RPMALLOC_EXPORT void
230 rpfree(void *ptr);
231 
232 //! Allocate a memory block of at least the given size and zero initialize it
233 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
234 rpcalloc(size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(1, 2);
235 
236 //! Reallocate the given block to at least the given size
237 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
238 rprealloc(void *ptr, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
239 
240 //! Reallocate the given block to at least the given size and alignment,
241 //  with optional control flags (see RPMALLOC_NO_PRESERVE).
242 //  Alignment must be a power of two and a multiple of sizeof(void*),
243 //  and should ideally be less than memory page size. A caveat of rpmalloc
244 //  internals is that this must also be strictly less than the span size (default 64KiB)
245 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
246 rpaligned_realloc(void *ptr, size_t alignment, size_t size, size_t oldsize,
247                   unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
248 
249 //! Allocate a memory block of at least the given size and alignment.
250 //  Alignment must be a power of two and a multiple of sizeof(void*),
251 //  and should ideally be less than memory page size. A caveat of rpmalloc
252 //  internals is that this must also be strictly less than the span size (default 64KiB)
253 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
254 rpaligned_alloc(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
255 
256 //! Allocate a memory block of at least the given size and alignment, and zero initialize it.
257 //  Alignment must be a power of two and a multiple of sizeof(void*),
258 //  and should ideally be less than memory page size. A caveat of rpmalloc
259 //  internals is that this must also be strictly less than the span size (default 64KiB)
260 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
261 rpaligned_calloc(size_t alignment, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
262 
263 //! Allocate a memory block of at least the given size and alignment.
264 //  Alignment must be a power of two and a multiple of sizeof(void*),
265 //  and should ideally be less than memory page size. A caveat of rpmalloc
266 //  internals is that this must also be strictly less than the span size (default 64KiB)
267 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
268 rpmemalign(size_t alignment, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
269 
270 //! Allocate a memory block of at least the given size and alignment.
271 //  Alignment must be a power of two and a multiple of sizeof(void*),
272 //  and should ideally be less than memory page size. A caveat of rpmalloc
273 //  internals is that this must also be strictly less than the span size (default 64KiB)
274 RPMALLOC_EXPORT int
275 rpposix_memalign(void **memptr, size_t alignment, size_t size);
276 
277 //! Query the usable size of the given memory block (from given pointer to the end of block)
278 RPMALLOC_EXPORT size_t
279 rpmalloc_usable_size(void *ptr);
280 
281 #if RPMALLOC_FIRST_CLASS_HEAPS
282 
283 //! Heap type
284 typedef struct heap_t rpmalloc_heap_t;
285 
286 //! Acquire a new heap. Will reuse existing released heaps or allocate memory for a new heap
287 //  if none available. Heap API is imlemented with the strict assumption that only one single
288 //  thread will call heap functions for a given heap at any given time, no functions are thread safe.
289 RPMALLOC_EXPORT rpmalloc_heap_t *
290 rpmalloc_heap_acquire(void);
291 
292 //! Release a heap (does NOT free the memory allocated by the heap, use rpmalloc_heap_free_all before destroying the heap).
293 //  Releasing a heap will enable it to be reused by other threads. Safe to pass a null pointer.
294 RPMALLOC_EXPORT void
295 rpmalloc_heap_release(rpmalloc_heap_t *heap);
296 
297 //! Allocate a memory block of at least the given size using the given heap.
298 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
299 rpmalloc_heap_alloc(rpmalloc_heap_t *heap, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(2);
300 
301 //! Allocate a memory block of at least the given size using the given heap. The returned
302 //  block will have the requested alignment. Alignment must be a power of two and a multiple of sizeof(void*),
303 //  and should ideally be less than memory page size. A caveat of rpmalloc
304 //  internals is that this must also be strictly less than the span size (default 64KiB).
305 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
306 rpmalloc_heap_aligned_alloc(rpmalloc_heap_t *heap, size_t alignment,
307                             size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
308 
309 //! Allocate a memory block of at least the given size using the given heap and zero initialize it.
310 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
311 rpmalloc_heap_calloc(rpmalloc_heap_t *heap, size_t num, size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
312 
313 //! Allocate a memory block of at least the given size using the given heap and zero initialize it. The returned
314 //  block will have the requested alignment. Alignment must either be zero, or a power of two and a multiple of sizeof(void*),
315 //  and should ideally be less than memory page size. A caveat of rpmalloc
316 //  internals is that this must also be strictly less than the span size (default 64KiB).
317 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
318 rpmalloc_heap_aligned_calloc(rpmalloc_heap_t *heap, size_t alignment, size_t num,
319                              size_t size) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE2(2, 3);
320 
321 //! Reallocate the given block to at least the given size. The memory block MUST be allocated
322 //  by the same heap given to this function.
323 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
324 rpmalloc_heap_realloc(rpmalloc_heap_t *heap, void *ptr, size_t size,
325                       unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
326 
327 //! Reallocate the given block to at least the given size. The memory block MUST be allocated
328 //  by the same heap given to this function. The returned block will have the requested alignment.
329 //  Alignment must be either zero, or a power of two and a multiple of sizeof(void*), and should ideally be
330 //  less than memory page size. A caveat of rpmalloc internals is that this must also be strictly less than
331 //  the span size (default 64KiB).
332 RPMALLOC_EXPORT RPMALLOC_ALLOCATOR void *
333 rpmalloc_heap_aligned_realloc(rpmalloc_heap_t *heap, void *ptr, size_t alignment, size_t size,
334                               unsigned int flags) RPMALLOC_ATTRIB_MALLOC RPMALLOC_ATTRIB_ALLOC_SIZE(3);
335 
336 //! Free the given memory block from the given heap. The memory block MUST be allocated
337 //  by the same heap given to this function.
338 RPMALLOC_EXPORT void
339 rpmalloc_heap_free(rpmalloc_heap_t *heap, void *ptr);
340 
341 //! Free all memory allocated by the heap
342 RPMALLOC_EXPORT void
343 rpmalloc_heap_free_all(rpmalloc_heap_t *heap);
344 
345 //! Set the given heap as the current heap for the calling thread. A heap MUST only be current heap
346 //  for a single thread, a heap can never be shared between multiple threads. The previous
347 //  current heap for the calling thread is released to be reused by other threads.
348 RPMALLOC_EXPORT void
349 rpmalloc_heap_thread_set_current(rpmalloc_heap_t *heap);
350 
351 #endif
352 
353 #ifdef __cplusplus
354 }
355 #endif
356