1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license.
5 -----------------------------------------------------------------------------*/
6
7 /* This is a stress test for the allocator, using multiple threads and
8 transferring objects between threads. It tries to reflect real-world workloads:
9 - allocation size is distributed linearly in powers of two
10 - with some fraction extra large (and some extra extra large)
11 - the allocations are initialized and read again at free
12 - pointers transfer between threads
13 - threads are terminated and recreated with some objects surviving in between
14 - uses deterministic "randomness", but execution can still depend on
15 (random) thread scheduling. Do not use this test as a benchmark!
16 */
17
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include <stdbool.h>
22 #include <string.h>
23
24 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
25 //
26 // argument defaults
27 static int THREADS = 32; // more repeatable if THREADS <= #processors
28 static int SCALE = 10; // scaling factor
29 static int ITER = 50; // N full iterations destructing and re-creating all threads
30
31 // static int THREADS = 8; // more repeatable if THREADS <= #processors
32 // static int SCALE = 100; // scaling factor
33
34 #define STRESS // undefine for leak test
35
36 static bool allow_large_objects = true; // allow very large objects?
37 static size_t use_one_size = 0; // use single object size of `N * sizeof(uintptr_t)`?
38
39
40 // #define USE_STD_MALLOC
41 #ifdef USE_STD_MALLOC
42 #define custom_calloc(n,s) calloc(n,s)
43 #define custom_realloc(p,s) realloc(p,s)
44 #define custom_free(p) free(p)
45 #else
46 #include <mimalloc.h>
47 #define custom_calloc(n,s) mi_calloc(n,s)
48 #define custom_realloc(p,s) mi_realloc(p,s)
49 #define custom_free(p) mi_free(p)
50 #endif
51
52 // transfer pointer between threads
53 #define TRANSFERS (1000)
54 static volatile void* transfer[TRANSFERS];
55
56
57 #if (UINTPTR_MAX != UINT32_MAX)
58 const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
59 #else
60 const uintptr_t cookie = 0x1ce4e5b9UL;
61 #endif
62
63 static void* atomic_exchange_ptr(volatile void** p, void* newval);
64
65 typedef uintptr_t* random_t;
66
pick(random_t r)67 static uintptr_t pick(random_t r) {
68 uintptr_t x = *r;
69 #if (UINTPTR_MAX > UINT32_MAX)
70 // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
71 x ^= x >> 30;
72 x *= 0xbf58476d1ce4e5b9UL;
73 x ^= x >> 27;
74 x *= 0x94d049bb133111ebUL;
75 x ^= x >> 31;
76 #else
77 // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
78 x ^= x >> 16;
79 x *= 0x7feb352dUL;
80 x ^= x >> 15;
81 x *= 0x846ca68bUL;
82 x ^= x >> 16;
83 #endif
84 *r = x;
85 return x;
86 }
87
chance(size_t perc,random_t r)88 static bool chance(size_t perc, random_t r) {
89 return (pick(r) % 100 <= perc);
90 }
91
alloc_items(size_t items,random_t r)92 static void* alloc_items(size_t items, random_t r) {
93 if (chance(1, r)) {
94 if (chance(1, r) && allow_large_objects) items *= 10000; // 0.01% giant
95 else if (chance(10, r) && allow_large_objects) items *= 1000; // 0.1% huge
96 else items *= 100; // 1% large objects;
97 }
98 if (items == 40) items++; // pthreads uses that size for stack increases
99 if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
100 if (items==0) items = 1;
101 uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
102 if (p != NULL) {
103 for (uintptr_t i = 0; i < items; i++) {
104 p[i] = (items - i) ^ cookie;
105 }
106 }
107 return p;
108 }
109
free_items(void * p)110 static void free_items(void* p) {
111 if (p != NULL) {
112 uintptr_t* q = (uintptr_t*)p;
113 uintptr_t items = (q[0] ^ cookie);
114 for (uintptr_t i = 0; i < items; i++) {
115 if ((q[i] ^ cookie) != items - i) {
116 fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
117 abort();
118 }
119 }
120 }
121 custom_free(p);
122 }
123
124
stress(intptr_t tid)125 static void stress(intptr_t tid) {
126 //bench_start_thread();
127 uintptr_t r = ((tid + 1) * 43); // rand();
128 const size_t max_item_shift = 5; // 128
129 const size_t max_item_retained_shift = max_item_shift + 2;
130 size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
131 size_t retain = allocs / 2;
132 void** data = NULL;
133 size_t data_size = 0;
134 size_t data_top = 0;
135 void** retained = (void**)custom_calloc(retain,sizeof(void*));
136 size_t retain_top = 0;
137
138 while (allocs > 0 || retain > 0) {
139 if (retain == 0 || (chance(50, &r) && allocs > 0)) {
140 // 50%+ alloc
141 allocs--;
142 if (data_top >= data_size) {
143 data_size += 100000;
144 data = (void**)custom_realloc(data, data_size * sizeof(void*));
145 }
146 data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
147 }
148 else {
149 // 25% retain
150 retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
151 retain--;
152 }
153 if (chance(66, &r) && data_top > 0) {
154 // 66% free previous alloc
155 size_t idx = pick(&r) % data_top;
156 free_items(data[idx]);
157 data[idx] = NULL;
158 }
159 if (chance(25, &r) && data_top > 0) {
160 // 25% exchange a local pointer with the (shared) transfer buffer.
161 size_t data_idx = pick(&r) % data_top;
162 size_t transfer_idx = pick(&r) % TRANSFERS;
163 void* p = data[data_idx];
164 void* q = atomic_exchange_ptr(&transfer[transfer_idx], p);
165 data[data_idx] = q;
166 }
167 }
168 // free everything that is left
169 for (size_t i = 0; i < retain_top; i++) {
170 free_items(retained[i]);
171 }
172 for (size_t i = 0; i < data_top; i++) {
173 free_items(data[i]);
174 }
175 custom_free(retained);
176 custom_free(data);
177 //bench_end_thread();
178 }
179
180 static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
181
test_stress(void)182 static void test_stress(void) {
183 uintptr_t r = rand();
184 for (int n = 0; n < ITER; n++) {
185 run_os_threads(THREADS, &stress);
186 for (int i = 0; i < TRANSFERS; i++) {
187 if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
188 void* p = atomic_exchange_ptr(&transfer[i], NULL);
189 free_items(p);
190 }
191 }
192 // mi_collect(false);
193 #if !defined(NDEBUG) || defined(MI_TSAN)
194 if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
195 #endif
196 }
197 }
198
199 #ifndef STRESS
leak(intptr_t tid)200 static void leak(intptr_t tid) {
201 uintptr_t r = rand();
202 void* p = alloc_items(1 /*pick(&r)%128*/, &r);
203 if (chance(50, &r)) {
204 intptr_t i = (pick(&r) % TRANSFERS);
205 void* q = atomic_exchange_ptr(&transfer[i], p);
206 free_items(q);
207 }
208 }
209
test_leak(void)210 static void test_leak(void) {
211 for (int n = 0; n < ITER; n++) {
212 run_os_threads(THREADS, &leak);
213 mi_collect(false);
214 #ifndef NDEBUG
215 if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
216 #endif
217 }
218 }
219 #endif
220
main(int argc,char ** argv)221 int main(int argc, char** argv) {
222 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
223 if (argc >= 2) {
224 char* end;
225 long n = strtol(argv[1], &end, 10);
226 if (n > 0) THREADS = n;
227 }
228 if (argc >= 3) {
229 char* end;
230 long n = (strtol(argv[2], &end, 10));
231 if (n > 0) SCALE = n;
232 }
233 if (argc >= 4) {
234 char* end;
235 long n = (strtol(argv[3], &end, 10));
236 if (n > 0) ITER = n;
237 }
238 printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
239 //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
240 //int res = mi_reserve_huge_os_pages(4,1);
241 //printf("(reserve huge: %i\n)", res);
242
243 //bench_start_program();
244
245 // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
246 srand(0x7feb352d);
247 // mi_stats_reset();
248 #ifdef STRESS
249 test_stress();
250 #else
251 test_leak();
252 #endif
253
254 #ifndef USE_STD_MALLOC
255 #ifndef NDEBUG
256 mi_collect(true);
257 #endif
258 mi_stats_print(NULL);
259 #endif
260 //bench_end_program();
261 return 0;
262 }
263
264
265 static void (*thread_entry_fun)(intptr_t) = &stress;
266
267 #ifdef _WIN32
268
269 #include <Windows.h>
270
thread_entry(LPVOID param)271 static DWORD WINAPI thread_entry(LPVOID param) {
272 thread_entry_fun((intptr_t)param);
273 return 0;
274 }
275
run_os_threads(size_t nthreads,void (* fun)(intptr_t))276 static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
277 thread_entry_fun = fun;
278 DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
279 HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
280 for (uintptr_t i = 0; i < nthreads; i++) {
281 thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
282 }
283 for (size_t i = 0; i < nthreads; i++) {
284 WaitForSingleObject(thandles[i], INFINITE);
285 }
286 for (size_t i = 0; i < nthreads; i++) {
287 CloseHandle(thandles[i]);
288 }
289 custom_free(tids);
290 custom_free(thandles);
291 }
292
atomic_exchange_ptr(volatile void ** p,void * newval)293 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
294 #if (INTPTR_MAX == INT32_MAX)
295 return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
296 #else
297 return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
298 #endif
299 }
300 #else
301
302 #include <pthread.h>
303
thread_entry(void * param)304 static void* thread_entry(void* param) {
305 thread_entry_fun((uintptr_t)param);
306 return NULL;
307 }
308
run_os_threads(size_t nthreads,void (* fun)(intptr_t))309 static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
310 thread_entry_fun = fun;
311 pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
312 memset(threads, 0, sizeof(pthread_t) * nthreads);
313 //pthread_setconcurrency(nthreads);
314 for (size_t i = 0; i < nthreads; i++) {
315 pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
316 }
317 for (size_t i = 0; i < nthreads; i++) {
318 pthread_join(threads[i], NULL);
319 }
320 custom_free(threads);
321 }
322
323 #ifdef __cplusplus
324 #include <atomic>
atomic_exchange_ptr(volatile void ** p,void * newval)325 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
326 return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
327 }
328 #else
329 #include <stdatomic.h>
atomic_exchange_ptr(volatile void ** p,void * newval)330 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
331 return atomic_exchange((volatile _Atomic(void*)*)p, newval);
332 }
333 #endif
334
335 #endif
336