1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2020 Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license.
5 -----------------------------------------------------------------------------*/
6 
7 /* This is a stress test for the allocator, using multiple threads and
8    transferring objects between threads. It tries to reflect real-world workloads:
9    - allocation size is distributed linearly in powers of two
10    - with some fraction extra large (and some extra extra large)
11    - the allocations are initialized and read again at free
12    - pointers transfer between threads
13    - threads are terminated and recreated with some objects surviving in between
14    - uses deterministic "randomness", but execution can still depend on
15      (random) thread scheduling. Do not use this test as a benchmark!
16 */
17 
18 #include <stdio.h>
19 #include <stdlib.h>
20 #include <stdint.h>
21 #include <stdbool.h>
22 #include <string.h>
23 
24 // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
25 //
26 // argument defaults
27 static int THREADS = 32;      // more repeatable if THREADS <= #processors
28 static int SCALE   = 10;      // scaling factor
29 static int ITER    = 50;      // N full iterations destructing and re-creating all threads
30 
31 // static int THREADS = 8;    // more repeatable if THREADS <= #processors
32 // static int SCALE   = 100;  // scaling factor
33 
34 #define STRESS   // undefine for leak test
35 
36 static bool   allow_large_objects = true;    // allow very large objects?
37 static size_t use_one_size = 0;              // use single object size of `N * sizeof(uintptr_t)`?
38 
39 
40 // #define USE_STD_MALLOC
41 #ifdef USE_STD_MALLOC
42 #define custom_calloc(n,s)    calloc(n,s)
43 #define custom_realloc(p,s)   realloc(p,s)
44 #define custom_free(p)        free(p)
45 #else
46 #include <mimalloc.h>
47 #define custom_calloc(n,s)    mi_calloc(n,s)
48 #define custom_realloc(p,s)   mi_realloc(p,s)
49 #define custom_free(p)        mi_free(p)
50 #endif
51 
52 // transfer pointer between threads
53 #define TRANSFERS     (1000)
54 static volatile void* transfer[TRANSFERS];
55 
56 
57 #if (UINTPTR_MAX != UINT32_MAX)
58 const uintptr_t cookie = 0xbf58476d1ce4e5b9UL;
59 #else
60 const uintptr_t cookie = 0x1ce4e5b9UL;
61 #endif
62 
63 static void* atomic_exchange_ptr(volatile void** p, void* newval);
64 
65 typedef uintptr_t* random_t;
66 
pick(random_t r)67 static uintptr_t pick(random_t r) {
68   uintptr_t x = *r;
69 #if (UINTPTR_MAX > UINT32_MAX)
70   // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
71   x ^= x >> 30;
72   x *= 0xbf58476d1ce4e5b9UL;
73   x ^= x >> 27;
74   x *= 0x94d049bb133111ebUL;
75   x ^= x >> 31;
76 #else
77   // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
78   x ^= x >> 16;
79   x *= 0x7feb352dUL;
80   x ^= x >> 15;
81   x *= 0x846ca68bUL;
82   x ^= x >> 16;
83 #endif
84   *r = x;
85   return x;
86 }
87 
chance(size_t perc,random_t r)88 static bool chance(size_t perc, random_t r) {
89   return (pick(r) % 100 <= perc);
90 }
91 
alloc_items(size_t items,random_t r)92 static void* alloc_items(size_t items, random_t r) {
93   if (chance(1, r)) {
94     if (chance(1, r) && allow_large_objects) items *= 10000;       // 0.01% giant
95     else if (chance(10, r) && allow_large_objects) items *= 1000;  // 0.1% huge
96     else items *= 100;                                             // 1% large objects;
97   }
98   if (items == 40) items++;              // pthreads uses that size for stack increases
99   if (use_one_size > 0) items = (use_one_size / sizeof(uintptr_t));
100   if (items==0) items = 1;
101   uintptr_t* p = (uintptr_t*)custom_calloc(items,sizeof(uintptr_t));
102   if (p != NULL) {
103     for (uintptr_t i = 0; i < items; i++) {
104       p[i] = (items - i) ^ cookie;
105     }
106   }
107   return p;
108 }
109 
free_items(void * p)110 static void free_items(void* p) {
111   if (p != NULL) {
112     uintptr_t* q = (uintptr_t*)p;
113     uintptr_t items = (q[0] ^ cookie);
114     for (uintptr_t i = 0; i < items; i++) {
115       if ((q[i] ^ cookie) != items - i) {
116         fprintf(stderr, "memory corruption at block %p at %zu\n", p, i);
117         abort();
118       }
119     }
120   }
121   custom_free(p);
122 }
123 
124 
stress(intptr_t tid)125 static void stress(intptr_t tid) {
126   //bench_start_thread();
127   uintptr_t r = ((tid + 1) * 43); // rand();
128   const size_t max_item_shift = 5; // 128
129   const size_t max_item_retained_shift = max_item_shift + 2;
130   size_t allocs = 100 * ((size_t)SCALE) * (tid % 8 + 1); // some threads do more
131   size_t retain = allocs / 2;
132   void** data = NULL;
133   size_t data_size = 0;
134   size_t data_top = 0;
135   void** retained = (void**)custom_calloc(retain,sizeof(void*));
136   size_t retain_top = 0;
137 
138   while (allocs > 0 || retain > 0) {
139     if (retain == 0 || (chance(50, &r) && allocs > 0)) {
140       // 50%+ alloc
141       allocs--;
142       if (data_top >= data_size) {
143         data_size += 100000;
144         data = (void**)custom_realloc(data, data_size * sizeof(void*));
145       }
146       data[data_top++] = alloc_items(1ULL << (pick(&r) % max_item_shift), &r);
147     }
148     else {
149       // 25% retain
150       retained[retain_top++] = alloc_items( 1ULL << (pick(&r) % max_item_retained_shift), &r);
151       retain--;
152     }
153     if (chance(66, &r) && data_top > 0) {
154       // 66% free previous alloc
155       size_t idx = pick(&r) % data_top;
156       free_items(data[idx]);
157       data[idx] = NULL;
158     }
159     if (chance(25, &r) && data_top > 0) {
160       // 25% exchange a local pointer with the (shared) transfer buffer.
161       size_t data_idx = pick(&r) % data_top;
162       size_t transfer_idx = pick(&r) % TRANSFERS;
163       void* p = data[data_idx];
164       void* q = atomic_exchange_ptr(&transfer[transfer_idx], p);
165       data[data_idx] = q;
166     }
167   }
168   // free everything that is left
169   for (size_t i = 0; i < retain_top; i++) {
170     free_items(retained[i]);
171   }
172   for (size_t i = 0; i < data_top; i++) {
173     free_items(data[i]);
174   }
175   custom_free(retained);
176   custom_free(data);
177   //bench_end_thread();
178 }
179 
180 static void run_os_threads(size_t nthreads, void (*entry)(intptr_t tid));
181 
test_stress(void)182 static void test_stress(void) {
183   uintptr_t r = rand();
184   for (int n = 0; n < ITER; n++) {
185     run_os_threads(THREADS, &stress);
186     for (int i = 0; i < TRANSFERS; i++) {
187       if (chance(50, &r) || n + 1 == ITER) { // free all on last run, otherwise free half of the transfers
188         void* p = atomic_exchange_ptr(&transfer[i], NULL);
189         free_items(p);
190       }
191     }
192     // mi_collect(false);
193 #if !defined(NDEBUG) || defined(MI_TSAN)
194     if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
195 #endif
196   }
197 }
198 
199 #ifndef STRESS
leak(intptr_t tid)200 static void leak(intptr_t tid) {
201   uintptr_t r = rand();
202   void* p = alloc_items(1 /*pick(&r)%128*/, &r);
203   if (chance(50, &r)) {
204     intptr_t i = (pick(&r) % TRANSFERS);
205     void* q = atomic_exchange_ptr(&transfer[i], p);
206     free_items(q);
207   }
208 }
209 
test_leak(void)210 static void test_leak(void) {
211   for (int n = 0; n < ITER; n++) {
212     run_os_threads(THREADS, &leak);
213     mi_collect(false);
214 #ifndef NDEBUG
215     if ((n + 1) % 10 == 0) { printf("- iterations left: %3d\n", ITER - (n + 1)); }
216 #endif
217   }
218 }
219 #endif
220 
main(int argc,char ** argv)221 int main(int argc, char** argv) {
222   // > mimalloc-test-stress [THREADS] [SCALE] [ITER]
223   if (argc >= 2) {
224     char* end;
225     long n = strtol(argv[1], &end, 10);
226     if (n > 0) THREADS = n;
227   }
228   if (argc >= 3) {
229     char* end;
230     long n = (strtol(argv[2], &end, 10));
231     if (n > 0) SCALE = n;
232   }
233   if (argc >= 4) {
234     char* end;
235     long n = (strtol(argv[3], &end, 10));
236     if (n > 0) ITER = n;
237   }
238   printf("Using %d threads with a %d%% load-per-thread and %d iterations\n", THREADS, SCALE, ITER);
239   //mi_reserve_os_memory(1024*1024*1024ULL, false, true);
240   //int res = mi_reserve_huge_os_pages(4,1);
241   //printf("(reserve huge: %i\n)", res);
242 
243   //bench_start_program();
244 
245   // Run ITER full iterations where half the objects in the transfer buffer survive to the next round.
246   srand(0x7feb352d);
247   // mi_stats_reset();
248 #ifdef STRESS
249     test_stress();
250 #else
251     test_leak();
252 #endif
253 
254 #ifndef USE_STD_MALLOC
255   #ifndef NDEBUG
256   mi_collect(true);
257   #endif
258   mi_stats_print(NULL);
259 #endif
260   //bench_end_program();
261   return 0;
262 }
263 
264 
265 static void (*thread_entry_fun)(intptr_t) = &stress;
266 
267 #ifdef _WIN32
268 
269 #include <Windows.h>
270 
thread_entry(LPVOID param)271 static DWORD WINAPI thread_entry(LPVOID param) {
272   thread_entry_fun((intptr_t)param);
273   return 0;
274 }
275 
run_os_threads(size_t nthreads,void (* fun)(intptr_t))276 static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
277   thread_entry_fun = fun;
278   DWORD* tids = (DWORD*)custom_calloc(nthreads,sizeof(DWORD));
279   HANDLE* thandles = (HANDLE*)custom_calloc(nthreads,sizeof(HANDLE));
280   for (uintptr_t i = 0; i < nthreads; i++) {
281     thandles[i] = CreateThread(0, 8*1024, &thread_entry, (void*)(i), 0, &tids[i]);
282   }
283   for (size_t i = 0; i < nthreads; i++) {
284     WaitForSingleObject(thandles[i], INFINITE);
285   }
286   for (size_t i = 0; i < nthreads; i++) {
287     CloseHandle(thandles[i]);
288   }
289   custom_free(tids);
290   custom_free(thandles);
291 }
292 
atomic_exchange_ptr(volatile void ** p,void * newval)293 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
294 #if (INTPTR_MAX == INT32_MAX)
295   return (void*)InterlockedExchange((volatile LONG*)p, (LONG)newval);
296 #else
297   return (void*)InterlockedExchange64((volatile LONG64*)p, (LONG64)newval);
298 #endif
299 }
300 #else
301 
302 #include <pthread.h>
303 
thread_entry(void * param)304 static void* thread_entry(void* param) {
305   thread_entry_fun((uintptr_t)param);
306   return NULL;
307 }
308 
run_os_threads(size_t nthreads,void (* fun)(intptr_t))309 static void run_os_threads(size_t nthreads, void (*fun)(intptr_t)) {
310   thread_entry_fun = fun;
311   pthread_t* threads = (pthread_t*)custom_calloc(nthreads,sizeof(pthread_t));
312   memset(threads, 0, sizeof(pthread_t) * nthreads);
313   //pthread_setconcurrency(nthreads);
314   for (size_t i = 0; i < nthreads; i++) {
315     pthread_create(&threads[i], NULL, &thread_entry, (void*)i);
316   }
317   for (size_t i = 0; i < nthreads; i++) {
318     pthread_join(threads[i], NULL);
319   }
320   custom_free(threads);
321 }
322 
323 #ifdef __cplusplus
324 #include <atomic>
atomic_exchange_ptr(volatile void ** p,void * newval)325 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
326   return std::atomic_exchange((volatile std::atomic<void*>*)p, newval);
327 }
328 #else
329 #include <stdatomic.h>
atomic_exchange_ptr(volatile void ** p,void * newval)330 static void* atomic_exchange_ptr(volatile void** p, void* newval) {
331   return atomic_exchange((volatile _Atomic(void*)*)p, newval);
332 }
333 #endif
334 
335 #endif
336