1 // This file is part of the GNU ISO C++ Library. This library is free
2 // software; you can redistribute it and/or modify it under the
3 // terms of the GNU General Public License as published by the
4 // Free Software Foundation; either version 3, or (at your option)
5 // any later version.
6
7 // This library is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 // GNU General Public License for more details.
11
12 // You should have received a copy of the GNU General Public License along
13 // with this library; see the file COPYING3. If not see
14 // <http://www.gnu.org/licenses/>.
15
16 // Override the -std flag in the check_performance script: STD=gnu++17
17
18 // Run the test as both single- and multi-threaded: TEST_B
19
20 #include <memory_resource>
21 #include <list>
22 #include <string>
23 #include <testsuite_performance.h>
24
25 const int iterations = 100;
26
27 // Insert and remove elements of various sizes in std::list containers.
28 // If timers!=nullptr the function will pause the timer while the lists
29 // are cleared and deallocated, so that only insertions/removals are timed.
30 // Otherwise, the time taken to deallocate the lists is also counted.
31 void
populate_lists(std::pmr::memory_resource * r,__gnu_test::time_counter * timers,int kmax=iterations)32 populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers,
33 int kmax = iterations)
34 {
35 struct size16 { char c[16]; };
36 struct size32 { char c[32]; };
37 struct size64 { char c[64]; };
38 struct size128 { char c[128]; };
39
40 std::pmr::list<int> l4(r);
41 std::pmr::list<size16> l16(r);
42 std::pmr::list<size32> l32(r);
43 std::pmr::list<size64> l64(r);
44 std::pmr::list<size128> l128(r);
45
46 const int imax = 1000;
47 const int jmax = 100;
48 for (int k = 0; k < kmax; ++k)
49 {
50 for (int i = 0; i < imax; ++i)
51 {
52 for (int j = 0; j < jmax; ++j)
53 {
54 l4.emplace_back();
55 l16.emplace_back();
56 l32.emplace_back();
57 l64.emplace_back();
58 l128.emplace_back();
59 }
60 l4.pop_front();
61 l16.pop_front();
62 l32.pop_front();
63 l64.pop_front();
64 l128.pop_front();
65 }
66
67 if (timers)
68 timers->stop();
69
70 // Deallocate everything:
71 l4.clear();
72 l16.clear();
73 l32.clear();
74 l64.clear();
75 l128.clear();
76
77 if (timers)
78 timers->restart();
79 }
80 }
81
82 // Test allocations and deallocations of node-based containers (std::list).
83 // In this test pmr::unsynchronized_pool_resource should be faster than
84 // pmr::new_delete_resource().
test_lists_single_thread()85 void test_lists_single_thread()
86 {
87 std::pmr::memory_resource* newdel = std::pmr::new_delete_resource();
88 std::pmr::unsynchronized_pool_resource pool;
89 #ifndef NOTHREAD
90 std::pmr::synchronized_pool_resource syncpool;
91 #endif
92
93 auto run_test = [](auto* memres, std::string name, bool time_dtors) {
94 name += " std::list push/pop";
95 if (time_dtors)
96 name += "/destroy";
97 __gnu_test::time_counter time;
98 __gnu_test::resource_counter resource;
99 start_counters(time, resource);
100 populate_lists(memres, time_dtors ? nullptr : &time);
101 stop_counters(time, resource);
102 report_performance(__FILE__, name, time, resource);
103 };
104
105 for (auto time_dtors : {false, true})
106 {
107 run_test(newdel, "new-delete-1 ", time_dtors);
108 run_test(newdel, "new-delete-2 ", time_dtors);
109 run_test(newdel, "new-delete-3 ", time_dtors);
110
111 // Start with an empty set of pools:
112 pool.release();
113 run_test(&pool, "unsync-pool-1", time_dtors);
114 // Destroy pools and start fresh:
115 pool.release();
116 run_test(&pool, "unsync-pool-2", time_dtors);
117 // Do not destroy pools, reuse allocated memory:
118 run_test(&pool, "unsync-pool-3", time_dtors);
119
120 #ifndef NOTHREAD
121 syncpool.release();
122 run_test(&syncpool, "sync-pool-1 ", time_dtors);
123 // Destroy pools and start fresh:
124 syncpool.release();
125 run_test(&syncpool, "sync-pool-2 ", time_dtors);
126 // Do not destroy pools, reuse allocated memory:
127 run_test(&syncpool, "sync-pool-3 ", time_dtors);
128 #endif
129 }
130 }
131
132 // TODO test non-pooled large allocations from (un)synchronized_pool_resource
133
134 #ifndef NOTHREAD
135 # include <thread>
136 # include <mutex>
137 # include <cassert>
138
139 // Multithreaded std::list test with each thread having its own resource.
140 // (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool)
141 //
142 // In this test both pmr::unsynchronized_pool_resource and
143 // pmr::synchronized_pool_resource should be faster than
144 // pmr::new_delete_resource().
test_lists_resource_per_thread()145 void test_lists_resource_per_thread()
146 {
147 std::mutex mx;
148 std::unique_lock<std::mutex> gate(mx, std::defer_lock);
149
150 struct state
151 {
152 std::thread thread;
153
154 // Per-thread pool resources:
155 std::pmr::unsynchronized_pool_resource unsync;
156 std::pmr::synchronized_pool_resource sync;
157
158 std::pmr::memory_resource* memres[3] = {
159 std::pmr::new_delete_resource(), &unsync, &sync
160 };
161 };
162
163 state states[4];
164
165 const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool "};
166
167 auto run_test = [&mx] (std::pmr::memory_resource* memres,
168 __gnu_test::time_counter* timers)
169 {
170 std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
171 populate_lists(memres, timers);
172 };
173
174 auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
175 __gnu_test::time_counter time;
176 __gnu_test::resource_counter resource;
177 gate.lock();
178 auto* time_ptr = time_dtors ? nullptr : &time;
179 for (auto& s : states)
180 s.thread = std::thread{ run_test, s.memres[which], time_ptr };
181 start_counters(time, resource);
182 gate.unlock(); // let the threads run
183 for (auto& s : states)
184 s.thread.join();
185 stop_counters(time, resource);
186 report_performance(__FILE__, resnames[which] + testname, time, resource);
187 };
188
189 for (auto time_dtors : {false, true})
190 {
191 std::string testname = " resource-per-thread std::list push/pop";
192 if (time_dtors)
193 testname += "/destroy";
194 for (int which : {0, 1, 2})
195 time_threads(testname, time_dtors, which);
196 }
197 }
198
199 // A naive memory_resource that adds a mutex to unsynchronized_pool_resource
200 struct locking_pool_resource : std::pmr::unsynchronized_pool_resource
201 {
do_allocatelocking_pool_resource202 void* do_allocate(std::size_t b, std::size_t a) override
203 {
204 std::lock_guard<std::mutex> l(m);
205 return unsynchronized_pool_resource::do_allocate(b, a);
206 }
207
do_deallocatelocking_pool_resource208 void do_deallocate(void* p, std::size_t b, std::size_t a) override
209 {
210 std::lock_guard<std::mutex> l(m);
211 return unsynchronized_pool_resource::do_deallocate(p, b, a);
212 }
213
214 std::mutex m;
215 };
216
217 // Multithreaded std::list test with all threads sharing the same resource.
218 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
219 //
220 // pmr::synchronized_pool_resource is not expected to be anywhere near
221 // as fast as pmr::new_delete_resource() here, but should perform much
222 // better than the naive locking_pool_resource type.
test_lists_shared_resource()223 void test_lists_shared_resource()
224 {
225 std::mutex mx;
226 std::unique_lock<std::mutex> gate(mx, std::defer_lock);
227
228 locking_pool_resource unsync;
229 std::pmr::synchronized_pool_resource sync;
230
231 std::pmr::memory_resource* memres[3] = {
232 std::pmr::new_delete_resource(), &unsync, &sync
233 };
234
235 std::thread threads[4];
236
237 const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
238
239 auto run_test = [&mx] (std::pmr::memory_resource* memres,
240 __gnu_test::time_counter* timers)
241 {
242 std::lock_guard<std::mutex>{mx}; // block until the mutex can be locked
243 populate_lists(memres, timers);
244 };
245
246 auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
247 __gnu_test::time_counter time;
248 __gnu_test::resource_counter resource;
249 gate.lock();
250 auto* time_ptr = time_dtors ? nullptr : &time;
251 for (auto& t : threads)
252 t = std::thread{ run_test, memres[which], time_ptr };
253 start_counters(time, resource);
254 gate.unlock(); // let the threads run
255 for (auto& t : threads)
256 t.join();
257 stop_counters(time, resource);
258 report_performance(__FILE__, resnames[which] + testname, time, resource);
259 };
260
261 for (auto time_dtors : {false, true})
262 {
263 std::string testname = " shared-resource std::list push/pop";
264 if (time_dtors)
265 testname += "/destroy";
266 for (int which : {0, 1, 2})
267 time_threads(testname, time_dtors, which);
268 }
269 }
270
271 // TODO threaded test just doing loads of allocations, no deallocs
272 // both with per-thread resource (unsync vs sync vs newdel)
273 // and shared resource (locked vs sync vs newdel)
274
275 // TODO threaded test just doing loads of deallocations, no allocs
276 // both with per-thread resource (unsync vs sync vs newdel)
277 // and shared resource (locked vs sync vs newdel)
278
279 // Multithreaded test where deallocations happen on different threads.
280 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
281 //
282 // This hits the slow path for pmr::synchronized_pool_resource, where
283 // an exclusive lock must be taken to access other threads' pools.
284 // pmr::synchronized_pool_resource is not expected to be anywhere near
285 // as fast as pmr::new_delete_resource() here, but should perform much
286 // better than the naive locking_pool_resource type.
test_cross_thread_dealloc()287 void test_cross_thread_dealloc()
288 {
289 const int num_threads = 4;
290
291 struct X {
292 void* ptr;
293 unsigned size;
294 };
295
296 // A buffer for each thread, and extra buffers for half of the threads:
297 std::vector<X> allocs[num_threads * 3 / 2];
298 for (auto& v : allocs)
299 v.resize(1000 * iterations);
300
301 // Use a few different pools
302 const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 };
303
304 std::mutex mx;
305
306 auto run_test =
307 [&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit)
308 {
309 std::size_t counter = 0;
310 std::lock_guard<std::mutex>{mx};
311 // Fill this thread's buffer with allocations:
312 for (X& x : allocs[i])
313 {
314 x.size = sizes[counter++ % 10];
315 x.ptr = memres->allocate(x.size, 1);
316 }
317
318 if (with_exit && i == 0)
319 {
320 // One of the threads exits, so that its pools transfer to the
321 // non-thread-specific list of pools.
322 return;
323 }
324 else if (i < num_threads / 2)
325 {
326 // Other threads continue allocating, into the extra buffers:
327 for (X& x : allocs[num_threads + i])
328 {
329 x.size = sizes[counter++ % 10];
330 x.ptr = memres->allocate(x.size, 1);
331 }
332 }
333 else
334 {
335 // Half of the threads start deallocating their own memory and the
336 // memory belonging to another pool
337 const int other = i - num_threads / 2;
338 for (unsigned n = 0; n < allocs[i].size(); ++n)
339 {
340 // Deallocate memory allocated in this thread:
341 X& x1 = allocs[i][n];
342 memres->deallocate(x1.ptr, x1.size, 1);
343 x1 = {};
344 // Deallocate memory allocated in another thread:
345 X& x2 = allocs[other][n];
346 memres->deallocate(x2.ptr, x2.size, 1);
347 x2 = {};
348 }
349 }
350 };
351
352 std::thread threads[num_threads];
353
354 locking_pool_resource unsync;
355 std::pmr::synchronized_pool_resource sync;
356
357 std::pmr::memory_resource* memres[3] = {
358 std::pmr::new_delete_resource(), &unsync, &sync
359 };
360 const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
361
362 auto time_threads = [&] (std::string name, int which, bool with_exit)
363 {
364 __gnu_test::time_counter time;
365 __gnu_test::resource_counter resource;
366 std::unique_lock<std::mutex> gate(mx);
367 for (auto& t : threads)
368 t = std::thread{ run_test, memres[which], &t - threads, with_exit };
369 start_counters(time, resource);
370 gate.unlock();
371 for (auto& t : threads)
372 t.join();
373 stop_counters(time, resource);
374 report_performance(__FILE__, resnames[which] + name, time, resource);
375
376 // Clean up:
377 for (auto& a : allocs)
378 {
379 const int i = (&a - allocs);
380 if (i < num_threads) // These allocations were freed
381 for (auto& x : a)
382 {
383 assert(x.ptr == nullptr);
384 }
385 else if (with_exit && i == num_threads)
386 ;
387 else
388 for (auto& x : a)
389 {
390 memres[which]->deallocate(x.ptr, x.size, 1);
391 x = {};
392 }
393 }
394 };
395
396 for (int which : {0, 1, 2})
397 time_threads(" cross-thread dealloc", which, false);
398 for (int which : {0, 1, 2})
399 time_threads(" cross-thread dealloc w/exit", which, true);
400 }
401 #endif
402
main()403 int main()
404 {
405 test_lists_single_thread();
406 #ifndef NOTHREAD
407 test_lists_resource_per_thread();
408 test_lists_shared_resource();
409 test_cross_thread_dealloc();
410 #endif
411 }
412