1 /* malloc.c - Memory allocator - Public Domain - 2016 Mattias Jansson
2 *
3 * This library provides a cross-platform lock free thread caching malloc implementation in C11.
4 * The latest source code is always available at
5 *
6 * https://github.com/mjansson/rpmalloc
7 *
8 * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9 *
10 */
11
12 //
13 // This file provides overrides for the standard library malloc entry points for C and new/delete operators for C++
14 // It also provides automatic initialization/finalization of process and threads
15 //
16
17 #ifndef ARCH_64BIT
18 # if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
19 # define ARCH_64BIT 1
20 _Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
21 _Static_assert(sizeof(void*) == 8, "Data type size mismatch");
22 # else
23 # define ARCH_64BIT 0
24 _Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
25 _Static_assert(sizeof(void*) == 4, "Data type size mismatch");
26 # endif
27 #endif
28
29 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
30 #pragma GCC visibility push(default)
31 #endif
32
33 #if ENABLE_OVERRIDE
34
35 #define USE_IMPLEMENT 1
36 #define USE_INTERPOSE 0
37 #define USE_ALIAS 0
38
39 #if defined(__APPLE__) && ENABLE_PRELOAD
40 #undef USE_INTERPOSE
41 #define USE_INTERPOSE 1
42 #endif
43
44 #if !defined(_WIN32) && !USE_INTERPOSE
45 #undef USE_IMPLEMENT
46 #undef USE_ALIAS
47 #define USE_IMPLEMENT 0
48 #define USE_ALIAS 1
49 #endif
50
51 #ifdef _MSC_VER
52 #pragma warning (disable : 4100)
53 #undef malloc
54 #undef free
55 #undef calloc
56 #endif
57
58 #if USE_IMPLEMENT
59
malloc(size_t size)60 extern inline void* RPMALLOC_CDECL malloc(size_t size) { return rpmalloc(size); }
calloc(size_t count,size_t size)61 extern inline void* RPMALLOC_CDECL calloc(size_t count, size_t size) { return rpcalloc(count, size); }
realloc(void * ptr,size_t size)62 extern inline void* RPMALLOC_CDECL realloc(void* ptr, size_t size) { return rprealloc(ptr, size); }
reallocf(void * ptr,size_t size)63 extern inline void* RPMALLOC_CDECL reallocf(void* ptr, size_t size) { return rprealloc(ptr, size); }
aligned_alloc(size_t alignment,size_t size)64 extern inline void* RPMALLOC_CDECL aligned_alloc(size_t alignment, size_t size) { return rpaligned_alloc(alignment, size); }
memalign(size_t alignment,size_t size)65 extern inline void* RPMALLOC_CDECL memalign(size_t alignment, size_t size) { return rpmemalign(alignment, size); }
posix_memalign(void ** memptr,size_t alignment,size_t size)66 extern inline int RPMALLOC_CDECL posix_memalign(void** memptr, size_t alignment, size_t size) { return rpposix_memalign(memptr, alignment, size); }
free(void * ptr)67 extern inline void RPMALLOC_CDECL free(void* ptr) { rpfree(ptr); }
cfree(void * ptr)68 extern inline void RPMALLOC_CDECL cfree(void* ptr) { rpfree(ptr); }
malloc_usable_size(void * ptr)69 extern inline size_t RPMALLOC_CDECL malloc_usable_size(void* ptr) { return rpmalloc_usable_size(ptr); }
malloc_size(void * ptr)70 extern inline size_t RPMALLOC_CDECL malloc_size(void* ptr) { return rpmalloc_usable_size(ptr); }
71
72 // Overload the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
73 // operators delete and delete[]
_ZdlPv(void * p)74 extern void _ZdlPv(void* p); void _ZdlPv(void* p) { rpfree(p); }
_ZdaPv(void * p)75 extern void _ZdaPv(void* p); void _ZdaPv(void* p) { rpfree(p); }
76 #if ARCH_64BIT
77 // 64-bit operators new and new[], normal and aligned
_Znwm(uint64_t size)78 extern void* _Znwm(uint64_t size); void* _Znwm(uint64_t size) { return rpmalloc(size); }
_Znam(uint64_t size)79 extern void* _Znam(uint64_t size); void* _Znam(uint64_t size) { return rpmalloc(size); }
_Znwmm(uint64_t size,uint64_t align)80 extern void* _Znwmm(uint64_t size, uint64_t align); void* _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znamm(uint64_t size,uint64_t align)81 extern void* _Znamm(uint64_t size, uint64_t align); void* _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
82 #else
83 // 32-bit operators new and new[], normal and aligned
_Znwj(uint32_t size)84 extern void* _Znwj(uint32_t size); void* _Znwj(uint32_t size) { return rpmalloc(size); }
_Znaj(uint32_t size)85 extern void* _Znaj(uint32_t size); void* _Znaj(uint32_t size) { return rpmalloc(size); }
_Znwjj(uint64_t size,uint64_t align)86 extern void* _Znwjj(uint64_t size, uint64_t align); void* _Znwjj(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znajj(uint64_t size,uint64_t align)87 extern void* _Znajj(uint64_t size, uint64_t align); void* _Znajj(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
88 #endif
89
90 #endif
91
92 #if USE_INTERPOSE
93
94 typedef struct interpose_t {
95 void* new_func;
96 void* orig_func;
97 } interpose_t;
98
99 #define MAC_INTERPOSE_PAIR(newf, oldf) { (void*)newf, (void*)oldf }
100 #define MAC_INTERPOSE_SINGLE(newf, oldf) \
101 __attribute__((used)) static const interpose_t macinterpose##newf##oldf \
102 __attribute__ ((section("__DATA, __interpose"))) = MAC_INTERPOSE_PAIR(newf, oldf)
103
104 __attribute__((used)) static const interpose_t macinterpose_malloc[]
105 __attribute__ ((section("__DATA, __interpose"))) = {
106 //new and new[]
107 MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
108 MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
109 //delete and delete[]
110 MAC_INTERPOSE_PAIR(rpfree, _ZdlPv),
111 MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
112 MAC_INTERPOSE_PAIR(rpmalloc, malloc),
113 MAC_INTERPOSE_PAIR(rpmalloc, calloc),
114 MAC_INTERPOSE_PAIR(rprealloc, realloc),
115 MAC_INTERPOSE_PAIR(rprealloc, reallocf),
116 MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
117 MAC_INTERPOSE_PAIR(rpmemalign, memalign),
118 MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
119 MAC_INTERPOSE_PAIR(rpfree, free),
120 MAC_INTERPOSE_PAIR(rpfree, cfree),
121 MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
122 MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)
123 };
124
125 #endif
126
127 #if USE_ALIAS
128
129 #define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
130
131 // Alias the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
132
133 // operators delete and delete[]
_ZdlPv(void * p)134 void _ZdlPv(void* p) RPALIAS(rpfree)
135 void _ZdaPv(void* p) RPALIAS(rpfree)
136
137 #if ARCH_64BIT
138 // 64-bit operators new and new[], normal and aligned
139 void* _Znwm(uint64_t size) RPALIAS(rpmalloc)
140 void* _Znam(uint64_t size) RPALIAS(rpmalloc)
141 extern inline void* _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znamm(uint64_t size,uint64_t align)142 extern inline void* _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
143 #else
144 // 32-bit operators new and new[], normal and aligned
145 void* _Znwj(uint32_t size) RPALIAS(rpmalloc)
146 void* _Znaj(uint32_t size) RPALIAS(rpmalloc)
147 extern inline void* _Znwjj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
148 extern inline void* _Znajj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
149 #endif
150
malloc(size_t size)151 void* malloc(size_t size) RPALIAS(rpmalloc)
152 void* calloc(size_t count, size_t size) RPALIAS(rpcalloc)
153 void* realloc(void* ptr, size_t size) RPALIAS(rprealloc)
154 void* reallocf(void* ptr, size_t size) RPALIAS(rprealloc)
155 void* aligned_alloc(size_t alignment, size_t size) RPALIAS(rpaligned_alloc)
156 void* memalign(size_t alignment, size_t size) RPALIAS(rpmemalign)
157 int posix_memalign(void** memptr, size_t alignment, size_t size) RPALIAS(rpposix_memalign)
158 void free(void* ptr) RPALIAS(rpfree)
159 void cfree(void* ptr) RPALIAS(rpfree)
160 size_t malloc_usable_size(void* ptr) RPALIAS(rpmalloc_usable_size)
161 size_t malloc_size(void* ptr) RPALIAS(rpmalloc_usable_size)
162
163 #endif
164
165 extern inline void* RPMALLOC_CDECL
166 reallocarray(void* ptr, size_t count, size_t size) {
167 size_t total;
168 #if ENABLE_VALIDATE_ARGS
169 #ifdef _MSC_VER
170 int err = SizeTMult(count, size, &total);
171 if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
172 errno = EINVAL;
173 return 0;
174 }
175 #else
176 int err = __builtin_umull_overflow(count, size, &total);
177 if (err || (total >= MAX_ALLOC_SIZE)) {
178 errno = EINVAL;
179 return 0;
180 }
181 #endif
182 #else
183 total = count * size;
184 #endif
185 return realloc(ptr, total);
186 }
187
188 extern inline void* RPMALLOC_CDECL
valloc(size_t size)189 valloc(size_t size) {
190 get_thread_heap();
191 if (!size)
192 size = _memory_page_size;
193 size_t total_size = size + _memory_page_size;
194 #if ENABLE_VALIDATE_ARGS
195 if (total_size < size) {
196 errno = EINVAL;
197 return 0;
198 }
199 #endif
200 void* buffer = rpmalloc(total_size);
201 if ((uintptr_t)buffer & (_memory_page_size - 1))
202 return (void*)(((uintptr_t)buffer & ~(_memory_page_size - 1)) + _memory_page_size);
203 return buffer;
204 }
205
206 extern inline void* RPMALLOC_CDECL
pvalloc(size_t size)207 pvalloc(size_t size) {
208 get_thread_heap();
209 size_t aligned_size = size;
210 if (aligned_size % _memory_page_size)
211 aligned_size = (1 + (aligned_size / _memory_page_size)) * _memory_page_size;
212 #if ENABLE_VALIDATE_ARGS
213 if (aligned_size < size) {
214 errno = EINVAL;
215 return 0;
216 }
217 #endif
218 return valloc(size);
219 }
220
221 #endif // ENABLE_OVERRIDE
222
223 #if ENABLE_PRELOAD
224
225 #ifdef _WIN32
226
227 #if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
228
229 __declspec(dllexport) BOOL WINAPI
DllMain(HINSTANCE instance,DWORD reason,LPVOID reserved)230 DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved) {
231 (void)sizeof(reserved);
232 (void)sizeof(instance);
233 if (reason == DLL_PROCESS_ATTACH)
234 rpmalloc_initialize();
235 else if (reason == DLL_PROCESS_DETACH)
236 rpmalloc_finalize();
237 else if (reason == DLL_THREAD_ATTACH)
238 rpmalloc_thread_initialize();
239 else if (reason == DLL_THREAD_DETACH)
240 rpmalloc_thread_finalize();
241 return TRUE;
242 }
243
244 #endif
245
246 #else
247
248 #include <pthread.h>
249 #include <stdlib.h>
250 #include <stdint.h>
251 #include <unistd.h>
252
253 static pthread_key_t destructor_key;
254
255 static void
256 thread_destructor(void*);
257
258 static void __attribute__((constructor))
initializer(void)259 initializer(void) {
260 rpmalloc_initialize();
261 pthread_key_create(&destructor_key, thread_destructor);
262 }
263
264 static void __attribute__((destructor))
finalizer(void)265 finalizer(void) {
266 rpmalloc_finalize();
267 }
268
269 typedef struct {
270 void* (*real_start)(void*);
271 void* real_arg;
272 } thread_starter_arg;
273
274 static void*
thread_starter(void * argptr)275 thread_starter(void* argptr) {
276 thread_starter_arg* arg = argptr;
277 void* (*real_start)(void*) = arg->real_start;
278 void* real_arg = arg->real_arg;
279 rpmalloc_thread_initialize();
280 rpfree(argptr);
281 pthread_setspecific(destructor_key, (void*)1);
282 return (*real_start)(real_arg);
283 }
284
285 static void
thread_destructor(void * value)286 thread_destructor(void* value) {
287 (void)sizeof(value);
288 rpmalloc_thread_finalize();
289 }
290
291 #ifdef __APPLE__
292
293 static int
pthread_create_proxy(pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)294 pthread_create_proxy(pthread_t* thread,
295 const pthread_attr_t* attr,
296 void* (*start_routine)(void*),
297 void* arg) {
298 rpmalloc_initialize();
299 thread_starter_arg* starter_arg = rpmalloc(sizeof(thread_starter_arg));
300 starter_arg->real_start = start_routine;
301 starter_arg->real_arg = arg;
302 return pthread_create(thread, attr, thread_starter, starter_arg);
303 }
304
305 MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
306
307 #else
308
309 #include <dlfcn.h>
310
311 int
pthread_create(pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)312 pthread_create(pthread_t* thread,
313 const pthread_attr_t* attr,
314 void* (*start_routine)(void*),
315 void* arg) {
316 #if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__DragonFly__)
317 char fname[] = "pthread_create";
318 #else
319 char fname[] = "_pthread_create";
320 #endif
321 void* real_pthread_create = dlsym(RTLD_NEXT, fname);
322 rpmalloc_thread_initialize();
323 thread_starter_arg* starter_arg = rpmalloc(sizeof(thread_starter_arg));
324 starter_arg->real_start = start_routine;
325 starter_arg->real_arg = arg;
326 return (*(int (*)(pthread_t*, const pthread_attr_t*, void* (*)(void*), void*))real_pthread_create)(thread, attr, thread_starter, starter_arg);
327 }
328
329 #endif
330
331 #endif
332
333 #endif
334
335 #if ENABLE_OVERRIDE
336
337 #if defined(__GLIBC__) && defined(__linux__)
338
339 void* __libc_malloc(size_t size) RPALIAS(rpmalloc)
340 void* __libc_calloc(size_t count, size_t size) RPALIAS(rpcalloc)
341 void* __libc_realloc(void* p, size_t size) RPALIAS(rprealloc)
342 void __libc_free(void* p) RPALIAS(rpfree)
343 void __libc_cfree(void* p) RPALIAS(rpfree)
344 void* __libc_memalign(size_t align, size_t size) RPALIAS(rpmemalign)
345 int __posix_memalign(void** p, size_t align, size_t size) RPALIAS(rpposix_memalign)
346
347 extern void* __libc_valloc(size_t size);
348 extern void* __libc_pvalloc(size_t size);
349
350 void*
__libc_valloc(size_t size)351 __libc_valloc(size_t size) {
352 return valloc(size);
353 }
354
355 void*
__libc_pvalloc(size_t size)356 __libc_pvalloc(size_t size) {
357 return pvalloc(size);
358 }
359
360 #endif
361
362 #endif
363
364 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
365 #pragma GCC visibility pop
366 #endif
367