1 /* malloc.c  -  Memory allocator  -  Public Domain  -  2016 Mattias Jansson
2  *
3  * This library provides a cross-platform lock free thread caching malloc implementation in C11.
4  * The latest source code is always available at
5  *
6  * https://github.com/mjansson/rpmalloc
7  *
8  * This library is put in the public domain; you can redistribute it and/or modify it without any restrictions.
9  *
10  */
11 
12 //
13 // This file provides overrides for the standard library malloc entry points for C and new/delete operators for C++
14 // It also provides automatic initialization/finalization of process and threads
15 //
16 
17 #ifndef ARCH_64BIT
18 #  if defined(__LLP64__) || defined(__LP64__) || defined(_WIN64)
19 #    define ARCH_64BIT 1
20 _Static_assert(sizeof(size_t) == 8, "Data type size mismatch");
21 _Static_assert(sizeof(void*) == 8, "Data type size mismatch");
22 #  else
23 #    define ARCH_64BIT 0
24 _Static_assert(sizeof(size_t) == 4, "Data type size mismatch");
25 _Static_assert(sizeof(void*) == 4, "Data type size mismatch");
26 #  endif
27 #endif
28 
29 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
30 #pragma GCC visibility push(default)
31 #endif
32 
33 #if ENABLE_OVERRIDE
34 
35 #define USE_IMPLEMENT 1
36 #define USE_INTERPOSE 0
37 #define USE_ALIAS 0
38 
39 #if defined(__APPLE__) && ENABLE_PRELOAD
40 #undef USE_INTERPOSE
41 #define USE_INTERPOSE 1
42 #endif
43 
44 #if !defined(_WIN32) && !USE_INTERPOSE
45 #undef USE_IMPLEMENT
46 #undef USE_ALIAS
47 #define USE_IMPLEMENT 0
48 #define USE_ALIAS 1
49 #endif
50 
51 #ifdef _MSC_VER
52 #pragma warning (disable : 4100)
53 #undef malloc
54 #undef free
55 #undef calloc
56 #endif
57 
58 #if USE_IMPLEMENT
59 
malloc(size_t size)60 extern inline void* RPMALLOC_CDECL malloc(size_t size) { return rpmalloc(size); }
calloc(size_t count,size_t size)61 extern inline void* RPMALLOC_CDECL calloc(size_t count, size_t size) { return rpcalloc(count, size); }
realloc(void * ptr,size_t size)62 extern inline void* RPMALLOC_CDECL realloc(void* ptr, size_t size) { return rprealloc(ptr, size); }
reallocf(void * ptr,size_t size)63 extern inline void* RPMALLOC_CDECL reallocf(void* ptr, size_t size) { return rprealloc(ptr, size); }
aligned_alloc(size_t alignment,size_t size)64 extern inline void* RPMALLOC_CDECL aligned_alloc(size_t alignment, size_t size) { return rpaligned_alloc(alignment, size); }
memalign(size_t alignment,size_t size)65 extern inline void* RPMALLOC_CDECL memalign(size_t alignment, size_t size) { return rpmemalign(alignment, size); }
posix_memalign(void ** memptr,size_t alignment,size_t size)66 extern inline int RPMALLOC_CDECL posix_memalign(void** memptr, size_t alignment, size_t size) { return rpposix_memalign(memptr, alignment, size); }
free(void * ptr)67 extern inline void RPMALLOC_CDECL free(void* ptr) { rpfree(ptr); }
cfree(void * ptr)68 extern inline void RPMALLOC_CDECL cfree(void* ptr) { rpfree(ptr); }
malloc_usable_size(void * ptr)69 extern inline size_t RPMALLOC_CDECL malloc_usable_size(void* ptr) { return rpmalloc_usable_size(ptr); }
malloc_size(void * ptr)70 extern inline size_t RPMALLOC_CDECL malloc_size(void* ptr) { return rpmalloc_usable_size(ptr); }
71 
72 // Overload the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
73 // operators delete and delete[]
_ZdlPv(void * p)74 extern void _ZdlPv(void* p); void _ZdlPv(void* p) { rpfree(p); }
_ZdaPv(void * p)75 extern void _ZdaPv(void* p); void _ZdaPv(void* p) { rpfree(p); }
76 #if ARCH_64BIT
77 // 64-bit operators new and new[], normal and aligned
_Znwm(uint64_t size)78 extern void* _Znwm(uint64_t size); void* _Znwm(uint64_t size) { return rpmalloc(size); }
_Znam(uint64_t size)79 extern void* _Znam(uint64_t size); void* _Znam(uint64_t size) { return rpmalloc(size); }
_Znwmm(uint64_t size,uint64_t align)80 extern void* _Znwmm(uint64_t size, uint64_t align); void* _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znamm(uint64_t size,uint64_t align)81 extern void* _Znamm(uint64_t size, uint64_t align); void* _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
82 #else
83 // 32-bit operators new and new[], normal and aligned
_Znwj(uint32_t size)84 extern void* _Znwj(uint32_t size); void* _Znwj(uint32_t size) { return rpmalloc(size); }
_Znaj(uint32_t size)85 extern void* _Znaj(uint32_t size); void* _Znaj(uint32_t size) { return rpmalloc(size); }
_Znwjj(uint64_t size,uint64_t align)86 extern void* _Znwjj(uint64_t size, uint64_t align); void* _Znwjj(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znajj(uint64_t size,uint64_t align)87 extern void* _Znajj(uint64_t size, uint64_t align); void* _Znajj(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
88 #endif
89 
90 #endif
91 
92 #if USE_INTERPOSE
93 
94 typedef struct interpose_t {
95 	void* new_func;
96 	void* orig_func;
97 } interpose_t;
98 
99 #define MAC_INTERPOSE_PAIR(newf, oldf) 	{ (void*)newf, (void*)oldf }
100 #define MAC_INTERPOSE_SINGLE(newf, oldf) \
101 __attribute__((used)) static const interpose_t macinterpose##newf##oldf \
102 __attribute__ ((section("__DATA, __interpose"))) = MAC_INTERPOSE_PAIR(newf, oldf)
103 
104 __attribute__((used)) static const interpose_t macinterpose_malloc[]
105 __attribute__ ((section("__DATA, __interpose"))) = {
106 	//new and new[]
107 	MAC_INTERPOSE_PAIR(rpmalloc, _Znwm),
108 	MAC_INTERPOSE_PAIR(rpmalloc, _Znam),
109 	//delete and delete[]
110 	MAC_INTERPOSE_PAIR(rpfree, _ZdlPv),
111 	MAC_INTERPOSE_PAIR(rpfree, _ZdaPv),
112 	MAC_INTERPOSE_PAIR(rpmalloc, malloc),
113 	MAC_INTERPOSE_PAIR(rpmalloc, calloc),
114 	MAC_INTERPOSE_PAIR(rprealloc, realloc),
115 	MAC_INTERPOSE_PAIR(rprealloc, reallocf),
116 	MAC_INTERPOSE_PAIR(rpaligned_alloc, aligned_alloc),
117 	MAC_INTERPOSE_PAIR(rpmemalign, memalign),
118 	MAC_INTERPOSE_PAIR(rpposix_memalign, posix_memalign),
119 	MAC_INTERPOSE_PAIR(rpfree, free),
120 	MAC_INTERPOSE_PAIR(rpfree, cfree),
121 	MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_usable_size),
122 	MAC_INTERPOSE_PAIR(rpmalloc_usable_size, malloc_size)
123 };
124 
125 #endif
126 
127 #if USE_ALIAS
128 
129 #define RPALIAS(fn) __attribute__((alias(#fn), used, visibility("default")));
130 
131 // Alias the C++ operators using the mangled names (https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling)
132 
133 // operators delete and delete[]
_ZdlPv(void * p)134 void _ZdlPv(void* p) RPALIAS(rpfree)
135 void _ZdaPv(void* p) RPALIAS(rpfree)
136 
137 #if ARCH_64BIT
138 // 64-bit operators new and new[], normal and aligned
139 void* _Znwm(uint64_t size) RPALIAS(rpmalloc)
140 void* _Znam(uint64_t size) RPALIAS(rpmalloc)
141 extern inline void* _Znwmm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
_Znamm(uint64_t size,uint64_t align)142 extern inline void* _Znamm(uint64_t size, uint64_t align) { return rpaligned_alloc(align, size); }
143 #else
144 // 32-bit operators new and new[], normal and aligned
145 void* _Znwj(uint32_t size) RPALIAS(rpmalloc)
146 void* _Znaj(uint32_t size) RPALIAS(rpmalloc)
147 extern inline void* _Znwjj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
148 extern inline void* _Znajj(uint32_t size, uint32_t align) { return rpaligned_alloc(align, size); }
149 #endif
150 
malloc(size_t size)151 void* malloc(size_t size) RPALIAS(rpmalloc)
152 void* calloc(size_t count, size_t size) RPALIAS(rpcalloc)
153 void* realloc(void* ptr, size_t size) RPALIAS(rprealloc)
154 void* reallocf(void* ptr, size_t size) RPALIAS(rprealloc)
155 void* aligned_alloc(size_t alignment, size_t size) RPALIAS(rpaligned_alloc)
156 void* memalign(size_t alignment, size_t size) RPALIAS(rpmemalign)
157 int posix_memalign(void** memptr, size_t alignment, size_t size) RPALIAS(rpposix_memalign)
158 void free(void* ptr) RPALIAS(rpfree)
159 void cfree(void* ptr) RPALIAS(rpfree)
160 size_t malloc_usable_size(void* ptr) RPALIAS(rpmalloc_usable_size)
161 size_t malloc_size(void* ptr) RPALIAS(rpmalloc_usable_size)
162 
163 #endif
164 
165 extern inline void* RPMALLOC_CDECL
166 reallocarray(void* ptr, size_t count, size_t size) {
167 	size_t total;
168 #if ENABLE_VALIDATE_ARGS
169 #ifdef _MSC_VER
170 	int err = SizeTMult(count, size, &total);
171 	if ((err != S_OK) || (total >= MAX_ALLOC_SIZE)) {
172 		errno = EINVAL;
173 		return 0;
174 	}
175 #else
176 	int err = __builtin_umull_overflow(count, size, &total);
177 	if (err || (total >= MAX_ALLOC_SIZE)) {
178 		errno = EINVAL;
179 		return 0;
180 	}
181 #endif
182 #else
183 	total = count * size;
184 #endif
185 	return realloc(ptr, total);
186 }
187 
188 extern inline void* RPMALLOC_CDECL
valloc(size_t size)189 valloc(size_t size) {
190 	get_thread_heap();
191 	if (!size)
192 		size = _memory_page_size;
193 	size_t total_size = size + _memory_page_size;
194 #if ENABLE_VALIDATE_ARGS
195 	if (total_size < size) {
196 		errno = EINVAL;
197 		return 0;
198 	}
199 #endif
200 	void* buffer = rpmalloc(total_size);
201 	if ((uintptr_t)buffer & (_memory_page_size - 1))
202 		return (void*)(((uintptr_t)buffer & ~(_memory_page_size - 1)) + _memory_page_size);
203 	return buffer;
204 }
205 
206 extern inline void* RPMALLOC_CDECL
pvalloc(size_t size)207 pvalloc(size_t size) {
208 	get_thread_heap();
209 	size_t aligned_size = size;
210 	if (aligned_size % _memory_page_size)
211 		aligned_size = (1 + (aligned_size / _memory_page_size)) * _memory_page_size;
212 #if ENABLE_VALIDATE_ARGS
213 	if (aligned_size < size) {
214 		errno = EINVAL;
215 		return 0;
216 	}
217 #endif
218 	return valloc(size);
219 }
220 
221 #endif // ENABLE_OVERRIDE
222 
223 #if ENABLE_PRELOAD
224 
225 #ifdef _WIN32
226 
227 #if defined(BUILD_DYNAMIC_LINK) && BUILD_DYNAMIC_LINK
228 
229 __declspec(dllexport) BOOL WINAPI
DllMain(HINSTANCE instance,DWORD reason,LPVOID reserved)230 DllMain(HINSTANCE instance, DWORD reason, LPVOID reserved) {
231 	(void)sizeof(reserved);
232 	(void)sizeof(instance);
233 	if (reason == DLL_PROCESS_ATTACH)
234 		rpmalloc_initialize();
235 	else if (reason == DLL_PROCESS_DETACH)
236 		rpmalloc_finalize();
237 	else if (reason == DLL_THREAD_ATTACH)
238 		rpmalloc_thread_initialize();
239 	else if (reason == DLL_THREAD_DETACH)
240 		rpmalloc_thread_finalize();
241 	return TRUE;
242 }
243 
244 #endif
245 
246 #else
247 
248 #include <pthread.h>
249 #include <stdlib.h>
250 #include <stdint.h>
251 #include <unistd.h>
252 
253 static pthread_key_t destructor_key;
254 
255 static void
256 thread_destructor(void*);
257 
258 static void __attribute__((constructor))
initializer(void)259 initializer(void) {
260 	rpmalloc_initialize();
261 	pthread_key_create(&destructor_key, thread_destructor);
262 }
263 
264 static void __attribute__((destructor))
finalizer(void)265 finalizer(void) {
266 	rpmalloc_finalize();
267 }
268 
269 typedef struct {
270 	void* (*real_start)(void*);
271 	void* real_arg;
272 } thread_starter_arg;
273 
274 static void*
thread_starter(void * argptr)275 thread_starter(void* argptr) {
276 	thread_starter_arg* arg = argptr;
277 	void* (*real_start)(void*) = arg->real_start;
278 	void* real_arg = arg->real_arg;
279 	rpmalloc_thread_initialize();
280 	rpfree(argptr);
281 	pthread_setspecific(destructor_key, (void*)1);
282 	return (*real_start)(real_arg);
283 }
284 
285 static void
thread_destructor(void * value)286 thread_destructor(void* value) {
287 	(void)sizeof(value);
288 	rpmalloc_thread_finalize();
289 }
290 
291 #ifdef __APPLE__
292 
293 static int
pthread_create_proxy(pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)294 pthread_create_proxy(pthread_t* thread,
295                      const pthread_attr_t* attr,
296                      void* (*start_routine)(void*),
297                      void* arg) {
298 	rpmalloc_initialize();
299 	thread_starter_arg* starter_arg = rpmalloc(sizeof(thread_starter_arg));
300 	starter_arg->real_start = start_routine;
301 	starter_arg->real_arg = arg;
302 	return pthread_create(thread, attr, thread_starter, starter_arg);
303 }
304 
305 MAC_INTERPOSE_SINGLE(pthread_create_proxy, pthread_create);
306 
307 #else
308 
309 #include <dlfcn.h>
310 
311 int
pthread_create(pthread_t * thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * arg)312 pthread_create(pthread_t* thread,
313                const pthread_attr_t* attr,
314                void* (*start_routine)(void*),
315                void* arg) {
316 #if defined(__linux__) || defined(__FreeBSD__) || defined(__OpenBSD__) || defined(__APPLE__) || defined(__HAIKU__) || defined(__DragonFly__)
317 	char fname[] = "pthread_create";
318 #else
319 	char fname[] = "_pthread_create";
320 #endif
321 	void* real_pthread_create = dlsym(RTLD_NEXT, fname);
322 	rpmalloc_thread_initialize();
323 	thread_starter_arg* starter_arg = rpmalloc(sizeof(thread_starter_arg));
324 	starter_arg->real_start = start_routine;
325 	starter_arg->real_arg = arg;
326 	return (*(int (*)(pthread_t*, const pthread_attr_t*, void* (*)(void*), void*))real_pthread_create)(thread, attr, thread_starter, starter_arg);
327 }
328 
329 #endif
330 
331 #endif
332 
333 #endif
334 
335 #if ENABLE_OVERRIDE
336 
337 #if defined(__GLIBC__) && defined(__linux__)
338 
339 void* __libc_malloc(size_t size) RPALIAS(rpmalloc)
340 void* __libc_calloc(size_t count, size_t size) RPALIAS(rpcalloc)
341 void* __libc_realloc(void* p, size_t size) RPALIAS(rprealloc)
342 void __libc_free(void* p) RPALIAS(rpfree)
343 void __libc_cfree(void* p) RPALIAS(rpfree)
344 void* __libc_memalign(size_t align, size_t size) RPALIAS(rpmemalign)
345 int __posix_memalign(void** p, size_t align, size_t size) RPALIAS(rpposix_memalign)
346 
347 extern void* __libc_valloc(size_t size);
348 extern void* __libc_pvalloc(size_t size);
349 
350 void*
__libc_valloc(size_t size)351 __libc_valloc(size_t size) {
352 	return valloc(size);
353 }
354 
355 void*
__libc_pvalloc(size_t size)356 __libc_pvalloc(size_t size) {
357 	return pvalloc(size);
358 }
359 
360 #endif
361 
362 #endif
363 
364 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__MACH__)
365 #pragma GCC visibility pop
366 #endif
367