1 // This file is part of the GNU ISO C++ Library.  This library is free
2 // software; you can redistribute it and/or modify it under the
3 // terms of the GNU General Public License as published by the
4 // Free Software Foundation; either version 3, or (at your option)
5 // any later version.
6 
7 // This library is distributed in the hope that it will be useful,
8 // but WITHOUT ANY WARRANTY; without even the implied warranty of
9 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10 // GNU General Public License for more details.
11 
12 // You should have received a copy of the GNU General Public License along
13 // with this library; see the file COPYING3.  If not see
14 // <http://www.gnu.org/licenses/>.
15 
16 // Override the -std flag in the check_performance script: STD=gnu++17
17 
18 // Run the test as both single- and multi-threaded: TEST_B
19 
20 #include <memory_resource>
21 #include <list>
22 #include <string>
23 #include <testsuite_performance.h>
24 
25 const int iterations = 100;
26 
27 // Insert and remove elements of various sizes in std::list containers.
28 // If timers!=nullptr the function will pause the timer while the lists
29 // are cleared and deallocated, so that only insertions/removals are timed.
30 // Otherwise, the time taken to deallocate the lists is also counted.
31 void
populate_lists(std::pmr::memory_resource * r,__gnu_test::time_counter * timers,int kmax=iterations)32 populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers,
33 	       int kmax = iterations)
34 {
35   struct size16 { char c[16]; };
36   struct size32 { char c[32]; };
37   struct size64 { char c[64]; };
38   struct size128 { char c[128]; };
39 
40   std::pmr::list<int> l4(r);
41   std::pmr::list<size16> l16(r);
42   std::pmr::list<size32> l32(r);
43   std::pmr::list<size64> l64(r);
44   std::pmr::list<size128> l128(r);
45 
46   const int imax = 1000;
47   const int jmax = 100;
48   for (int k = 0; k < kmax; ++k)
49   {
50     for (int i = 0; i < imax; ++i)
51     {
52       for (int j = 0; j < jmax; ++j)
53       {
54         l4.emplace_back();
55         l16.emplace_back();
56         l32.emplace_back();
57         l64.emplace_back();
58         l128.emplace_back();
59       }
60       l4.pop_front();
61       l16.pop_front();
62       l32.pop_front();
63       l64.pop_front();
64       l128.pop_front();
65     }
66 
67     if (timers)
68       timers->stop();
69 
70     // Deallocate everything:
71     l4.clear();
72     l16.clear();
73     l32.clear();
74     l64.clear();
75     l128.clear();
76 
77     if (timers)
78       timers->restart();
79   }
80 }
81 
82 // Test allocations and deallocations of node-based containers (std::list).
83 // In this test pmr::unsynchronized_pool_resource should be faster than
84 // pmr::new_delete_resource().
test_lists_single_thread()85 void test_lists_single_thread()
86 {
87   std::pmr::memory_resource* newdel = std::pmr::new_delete_resource();
88   std::pmr::unsynchronized_pool_resource pool;
89 #ifndef NOTHREAD
90   std::pmr::synchronized_pool_resource syncpool;
91 #endif
92 
93   auto run_test = [](auto* memres, std::string name, bool time_dtors) {
94     name += " std::list push/pop";
95     if (time_dtors)
96       name += "/destroy";
97     __gnu_test::time_counter time;
98     __gnu_test::resource_counter resource;
99     start_counters(time, resource);
100     populate_lists(memres, time_dtors ? nullptr : &time);
101     stop_counters(time, resource);
102     report_performance(__FILE__, name, time, resource);
103   };
104 
105   for (auto time_dtors : {false, true})
106   {
107     run_test(newdel, "new-delete-1 ", time_dtors);
108     run_test(newdel, "new-delete-2 ", time_dtors);
109     run_test(newdel, "new-delete-3 ", time_dtors);
110 
111     // Start with an empty set of pools:
112     pool.release();
113     run_test(&pool, "unsync-pool-1", time_dtors);
114     // Destroy pools and start fresh:
115     pool.release();
116     run_test(&pool, "unsync-pool-2", time_dtors);
117     // Do not destroy pools, reuse allocated memory:
118     run_test(&pool, "unsync-pool-3", time_dtors);
119 
120 #ifndef NOTHREAD
121     syncpool.release();
122     run_test(&syncpool, "sync-pool-1  ", time_dtors);
123     // Destroy pools and start fresh:
124     syncpool.release();
125     run_test(&syncpool, "sync-pool-2  ", time_dtors);
126     // Do not destroy pools, reuse allocated memory:
127     run_test(&syncpool, "sync-pool-3  ", time_dtors);
128 #endif
129   }
130 }
131 
132 // TODO test non-pooled large allocations from (un)synchronized_pool_resource
133 
134 #ifndef NOTHREAD
135 # include <thread>
136 # include <mutex>
137 # include <cassert>
138 
139 // Multithreaded std::list test with each thread having its own resource.
140 // (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool)
141 //
142 // In this test both pmr::unsynchronized_pool_resource and
143 // pmr::synchronized_pool_resource should be faster than
144 // pmr::new_delete_resource().
test_lists_resource_per_thread()145 void test_lists_resource_per_thread()
146 {
147   std::mutex mx;
148   std::unique_lock<std::mutex> gate(mx, std::defer_lock);
149 
150   struct state
151   {
152     std::thread thread;
153 
154     // Per-thread pool resources:
155     std::pmr::unsynchronized_pool_resource unsync;
156     std::pmr::synchronized_pool_resource sync;
157 
158     std::pmr::memory_resource* memres[3] = {
159       std::pmr::new_delete_resource(), &unsync, &sync
160     };
161   };
162 
163   state states[4];
164 
165   const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool  "};
166 
167   auto run_test = [&mx] (std::pmr::memory_resource* memres,
168 			 __gnu_test::time_counter* timers)
169   {
170     std::lock_guard<std::mutex>{mx};  // block until the mutex can be locked
171     populate_lists(memres, timers);
172   };
173 
174   auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
175     __gnu_test::time_counter time;
176     __gnu_test::resource_counter resource;
177     gate.lock();
178     auto* time_ptr = time_dtors ? nullptr : &time;
179     for (auto& s : states)
180       s.thread = std::thread{ run_test, s.memres[which], time_ptr };
181     start_counters(time, resource);
182     gate.unlock(); // let the threads run
183     for (auto& s : states)
184       s.thread.join();
185     stop_counters(time, resource);
186     report_performance(__FILE__, resnames[which] + testname, time, resource);
187   };
188 
189   for (auto time_dtors : {false, true})
190   {
191     std::string testname = " resource-per-thread std::list push/pop";
192     if (time_dtors)
193       testname += "/destroy";
194     for (int which : {0, 1, 2})
195       time_threads(testname, time_dtors, which);
196   }
197 }
198 
199 // A naive memory_resource that adds a mutex to unsynchronized_pool_resource
200 struct locking_pool_resource : std::pmr::unsynchronized_pool_resource
201 {
do_allocatelocking_pool_resource202   void* do_allocate(std::size_t b, std::size_t a) override
203   {
204     std::lock_guard<std::mutex> l(m);
205     return unsynchronized_pool_resource::do_allocate(b, a);
206   }
207 
do_deallocatelocking_pool_resource208   void do_deallocate(void* p, std::size_t b, std::size_t a) override
209   {
210     std::lock_guard<std::mutex> l(m);
211     return unsynchronized_pool_resource::do_deallocate(p, b, a);
212   }
213 
214   std::mutex m;
215 };
216 
217 // Multithreaded std::list test with all threads sharing the same resource.
218 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
219 //
220 // pmr::synchronized_pool_resource is not expected to be anywhere near
221 // as fast as pmr::new_delete_resource() here, but should perform much
222 // better than the naive locking_pool_resource type.
test_lists_shared_resource()223 void test_lists_shared_resource()
224 {
225   std::mutex mx;
226   std::unique_lock<std::mutex> gate(mx, std::defer_lock);
227 
228   locking_pool_resource unsync;
229   std::pmr::synchronized_pool_resource sync;
230 
231   std::pmr::memory_resource* memres[3] = {
232     std::pmr::new_delete_resource(), &unsync, &sync
233   };
234 
235   std::thread threads[4];
236 
237   const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
238 
239   auto run_test = [&mx] (std::pmr::memory_resource* memres,
240 			 __gnu_test::time_counter* timers)
241   {
242     std::lock_guard<std::mutex>{mx};  // block until the mutex can be locked
243     populate_lists(memres, timers);
244   };
245 
246   auto time_threads = [&] (std::string testname, bool time_dtors, int which) {
247     __gnu_test::time_counter time;
248     __gnu_test::resource_counter resource;
249     gate.lock();
250     auto* time_ptr = time_dtors ? nullptr : &time;
251     for (auto& t : threads)
252       t = std::thread{ run_test, memres[which], time_ptr };
253     start_counters(time, resource);
254     gate.unlock(); // let the threads run
255     for (auto& t : threads)
256       t.join();
257     stop_counters(time, resource);
258     report_performance(__FILE__, resnames[which] + testname, time, resource);
259   };
260 
261   for (auto time_dtors : {false, true})
262   {
263     std::string testname = " shared-resource std::list push/pop";
264     if (time_dtors)
265       testname += "/destroy";
266     for (int which : {0, 1, 2})
267       time_threads(testname, time_dtors, which);
268   }
269 }
270 
271 // TODO threaded test just doing loads of allocations, no deallocs
272 // both with per-thread resource (unsync vs sync vs newdel)
273 // and shared resource (locked vs sync vs newdel)
274 
275 // TODO threaded test just doing loads of deallocations, no allocs
276 // both with per-thread resource (unsync vs sync vs newdel)
277 // and shared resource (locked vs sync vs newdel)
278 
279 // Multithreaded test where deallocations happen on different threads.
280 // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool)
281 //
282 // This hits the slow path for pmr::synchronized_pool_resource, where
283 // an exclusive lock must be taken to access other threads' pools.
284 // pmr::synchronized_pool_resource is not expected to be anywhere near
285 // as fast as pmr::new_delete_resource() here, but should perform much
286 // better than the naive locking_pool_resource type.
test_cross_thread_dealloc()287 void test_cross_thread_dealloc()
288 {
289   const int num_threads = 4;
290 
291   struct X {
292     void* ptr;
293     unsigned size;
294   };
295 
296   // A buffer for each thread, and extra buffers for half of the threads:
297   std::vector<X> allocs[num_threads * 3 / 2];
298   for (auto& v : allocs)
299     v.resize(1000 * iterations);
300 
301   // Use a few different pools
302   const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 };
303 
304   std::mutex mx;
305 
306   auto run_test =
307   [&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit)
308   {
309     std::size_t counter = 0;
310     std::lock_guard<std::mutex>{mx};
311     // Fill this thread's buffer with allocations:
312     for (X& x : allocs[i])
313     {
314       x.size = sizes[counter++ % 10];
315       x.ptr = memres->allocate(x.size, 1);
316     }
317 
318     if (with_exit && i == 0)
319     {
320       // One of the threads exits, so that its pools transfer to the
321       // non-thread-specific list of pools.
322       return;
323     }
324     else if (i < num_threads / 2)
325     {
326       // Other threads continue allocating, into the extra buffers:
327       for (X& x : allocs[num_threads + i])
328       {
329 	x.size = sizes[counter++ % 10];
330 	x.ptr = memres->allocate(x.size, 1);
331       }
332     }
333     else
334     {
335       // Half of the threads start deallocating their own memory and the
336       // memory belonging to another pool
337       const int other = i - num_threads / 2;
338       for (unsigned n = 0; n < allocs[i].size(); ++n)
339       {
340 	// Deallocate memory allocated in this thread:
341 	X& x1 = allocs[i][n];
342 	memres->deallocate(x1.ptr, x1.size, 1);
343 	x1 = {};
344 	// Deallocate memory allocated in another thread:
345 	X& x2 = allocs[other][n];
346 	memres->deallocate(x2.ptr, x2.size, 1);
347 	x2 = {};
348       }
349     }
350   };
351 
352   std::thread threads[num_threads];
353 
354   locking_pool_resource unsync;
355   std::pmr::synchronized_pool_resource sync;
356 
357   std::pmr::memory_resource* memres[3] = {
358     std::pmr::new_delete_resource(), &unsync, &sync
359   };
360   const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " };
361 
362   auto time_threads = [&] (std::string name, int which, bool with_exit)
363   {
364     __gnu_test::time_counter time;
365     __gnu_test::resource_counter resource;
366     std::unique_lock<std::mutex> gate(mx);
367     for (auto& t : threads)
368       t = std::thread{ run_test, memres[which], &t - threads, with_exit };
369     start_counters(time, resource);
370     gate.unlock();
371     for (auto& t : threads)
372       t.join();
373     stop_counters(time, resource);
374     report_performance(__FILE__, resnames[which] + name, time, resource);
375 
376     // Clean up:
377     for (auto& a : allocs)
378     {
379       const int i = (&a - allocs);
380       if (i < num_threads) // These allocations were freed
381 	for (auto& x : a)
382 	{
383 	  assert(x.ptr == nullptr);
384 	}
385       else if (with_exit && i == num_threads)
386 	;
387       else
388 	for (auto& x : a)
389 	{
390 	  memres[which]->deallocate(x.ptr, x.size, 1);
391 	  x = {};
392 	}
393     }
394   };
395 
396   for (int which : {0, 1, 2})
397     time_threads(" cross-thread dealloc", which, false);
398   for (int which : {0, 1, 2})
399     time_threads(" cross-thread dealloc w/exit", which, true);
400 }
401 #endif
402 
main()403 int main()
404 {
405   test_lists_single_thread();
406 #ifndef NOTHREAD
407   test_lists_resource_per_thread();
408   test_lists_shared_resource();
409   test_cross_thread_dealloc();
410 #endif
411 }
412