// This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // You should have received a copy of the GNU General Public License along // with this library; see the file COPYING3. If not see // . // Override the -std flag in the check_performance script: STD=gnu++17 // Run the test as both single- and multi-threaded: TEST_B #include #include #include #include const int iterations = 100; // Insert and remove elements of various sizes in std::list containers. // If timers!=nullptr the function will pause the timer while the lists // are cleared and deallocated, so that only insertions/removals are timed. // Otherwise, the time taken to deallocate the lists is also counted. void populate_lists(std::pmr::memory_resource* r, __gnu_test::time_counter* timers, int kmax = iterations) { struct size16 { char c[16]; }; struct size32 { char c[32]; }; struct size64 { char c[64]; }; struct size128 { char c[128]; }; std::pmr::list l4(r); std::pmr::list l16(r); std::pmr::list l32(r); std::pmr::list l64(r); std::pmr::list l128(r); const int imax = 1000; const int jmax = 100; for (int k = 0; k < kmax; ++k) { for (int i = 0; i < imax; ++i) { for (int j = 0; j < jmax; ++j) { l4.emplace_back(); l16.emplace_back(); l32.emplace_back(); l64.emplace_back(); l128.emplace_back(); } l4.pop_front(); l16.pop_front(); l32.pop_front(); l64.pop_front(); l128.pop_front(); } if (timers) timers->stop(); // Deallocate everything: l4.clear(); l16.clear(); l32.clear(); l64.clear(); l128.clear(); if (timers) timers->restart(); } } // Test allocations and deallocations of node-based containers (std::list). // In this test pmr::unsynchronized_pool_resource should be faster than // pmr::new_delete_resource(). void test_lists_single_thread() { std::pmr::memory_resource* newdel = std::pmr::new_delete_resource(); std::pmr::unsynchronized_pool_resource pool; #ifndef NOTHREAD std::pmr::synchronized_pool_resource syncpool; #endif auto run_test = [](auto* memres, std::string name, bool time_dtors) { name += " std::list push/pop"; if (time_dtors) name += "/destroy"; __gnu_test::time_counter time; __gnu_test::resource_counter resource; start_counters(time, resource); populate_lists(memres, time_dtors ? nullptr : &time); stop_counters(time, resource); report_performance(__FILE__, name, time, resource); }; for (auto time_dtors : {false, true}) { run_test(newdel, "new-delete-1 ", time_dtors); run_test(newdel, "new-delete-2 ", time_dtors); run_test(newdel, "new-delete-3 ", time_dtors); // Start with an empty set of pools: pool.release(); run_test(&pool, "unsync-pool-1", time_dtors); // Destroy pools and start fresh: pool.release(); run_test(&pool, "unsync-pool-2", time_dtors); // Do not destroy pools, reuse allocated memory: run_test(&pool, "unsync-pool-3", time_dtors); #ifndef NOTHREAD syncpool.release(); run_test(&syncpool, "sync-pool-1 ", time_dtors); // Destroy pools and start fresh: syncpool.release(); run_test(&syncpool, "sync-pool-2 ", time_dtors); // Do not destroy pools, reuse allocated memory: run_test(&syncpool, "sync-pool-3 ", time_dtors); #endif } } // TODO test non-pooled large allocations from (un)synchronized_pool_resource #ifndef NOTHREAD # include # include # include // Multithreaded std::list test with each thread having its own resource. // (pmr::new_delete vs pmr::unsynchronized_pool vs pmr::synchronized_pool) // // In this test both pmr::unsynchronized_pool_resource and // pmr::synchronized_pool_resource should be faster than // pmr::new_delete_resource(). void test_lists_resource_per_thread() { std::mutex mx; std::unique_lock gate(mx, std::defer_lock); struct state { std::thread thread; // Per-thread pool resources: std::pmr::unsynchronized_pool_resource unsync; std::pmr::synchronized_pool_resource sync; std::pmr::memory_resource* memres[3] = { std::pmr::new_delete_resource(), &unsync, &sync }; }; state states[4]; const std::string resnames[] = {"new-delete ", "unsync-pool", "sync-pool "}; auto run_test = [&mx] (std::pmr::memory_resource* memres, __gnu_test::time_counter* timers) { std::lock_guard{mx}; // block until the mutex can be locked populate_lists(memres, timers); }; auto time_threads = [&] (std::string testname, bool time_dtors, int which) { __gnu_test::time_counter time; __gnu_test::resource_counter resource; gate.lock(); auto* time_ptr = time_dtors ? nullptr : &time; for (auto& s : states) s.thread = std::thread{ run_test, s.memres[which], time_ptr }; start_counters(time, resource); gate.unlock(); // let the threads run for (auto& s : states) s.thread.join(); stop_counters(time, resource); report_performance(__FILE__, resnames[which] + testname, time, resource); }; for (auto time_dtors : {false, true}) { std::string testname = " resource-per-thread std::list push/pop"; if (time_dtors) testname += "/destroy"; for (int which : {0, 1, 2}) time_threads(testname, time_dtors, which); } } // A naive memory_resource that adds a mutex to unsynchronized_pool_resource struct locking_pool_resource : std::pmr::unsynchronized_pool_resource { void* do_allocate(std::size_t b, std::size_t a) override { std::lock_guard l(m); return unsynchronized_pool_resource::do_allocate(b, a); } void do_deallocate(void* p, std::size_t b, std::size_t a) override { std::lock_guard l(m); return unsynchronized_pool_resource::do_deallocate(p, b, a); } std::mutex m; }; // Multithreaded std::list test with all threads sharing the same resource. // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool) // // pmr::synchronized_pool_resource is not expected to be anywhere near // as fast as pmr::new_delete_resource() here, but should perform much // better than the naive locking_pool_resource type. void test_lists_shared_resource() { std::mutex mx; std::unique_lock gate(mx, std::defer_lock); locking_pool_resource unsync; std::pmr::synchronized_pool_resource sync; std::pmr::memory_resource* memres[3] = { std::pmr::new_delete_resource(), &unsync, &sync }; std::thread threads[4]; const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " }; auto run_test = [&mx] (std::pmr::memory_resource* memres, __gnu_test::time_counter* timers) { std::lock_guard{mx}; // block until the mutex can be locked populate_lists(memres, timers); }; auto time_threads = [&] (std::string testname, bool time_dtors, int which) { __gnu_test::time_counter time; __gnu_test::resource_counter resource; gate.lock(); auto* time_ptr = time_dtors ? nullptr : &time; for (auto& t : threads) t = std::thread{ run_test, memres[which], time_ptr }; start_counters(time, resource); gate.unlock(); // let the threads run for (auto& t : threads) t.join(); stop_counters(time, resource); report_performance(__FILE__, resnames[which] + testname, time, resource); }; for (auto time_dtors : {false, true}) { std::string testname = " shared-resource std::list push/pop"; if (time_dtors) testname += "/destroy"; for (int which : {0, 1, 2}) time_threads(testname, time_dtors, which); } } // TODO threaded test just doing loads of allocations, no deallocs // both with per-thread resource (unsync vs sync vs newdel) // and shared resource (locked vs sync vs newdel) // TODO threaded test just doing loads of deallocations, no allocs // both with per-thread resource (unsync vs sync vs newdel) // and shared resource (locked vs sync vs newdel) // Multithreaded test where deallocations happen on different threads. // (new_delete vs unsynchronized_pool+mutex vs synchronized_pool) // // This hits the slow path for pmr::synchronized_pool_resource, where // an exclusive lock must be taken to access other threads' pools. // pmr::synchronized_pool_resource is not expected to be anywhere near // as fast as pmr::new_delete_resource() here, but should perform much // better than the naive locking_pool_resource type. void test_cross_thread_dealloc() { const int num_threads = 4; struct X { void* ptr; unsigned size; }; // A buffer for each thread, and extra buffers for half of the threads: std::vector allocs[num_threads * 3 / 2]; for (auto& v : allocs) v.resize(1000 * iterations); // Use a few different pools const std::size_t sizes[] = { 8, 16, 8, 16, 32, 64, 8, 16, 32, 64 }; std::mutex mx; auto run_test = [&, num_threads] (std::pmr::memory_resource* memres, int i, bool with_exit) { std::size_t counter = 0; std::lock_guard{mx}; // Fill this thread's buffer with allocations: for (X& x : allocs[i]) { x.size = sizes[counter++ % 10]; x.ptr = memres->allocate(x.size, 1); } if (with_exit && i == 0) { // One of the threads exits, so that its pools transfer to the // non-thread-specific list of pools. return; } else if (i < num_threads / 2) { // Other threads continue allocating, into the extra buffers: for (X& x : allocs[num_threads + i]) { x.size = sizes[counter++ % 10]; x.ptr = memres->allocate(x.size, 1); } } else { // Half of the threads start deallocating their own memory and the // memory belonging to another pool const int other = i - num_threads / 2; for (unsigned n = 0; n < allocs[i].size(); ++n) { // Deallocate memory allocated in this thread: X& x1 = allocs[i][n]; memres->deallocate(x1.ptr, x1.size, 1); x1 = {}; // Deallocate memory allocated in another thread: X& x2 = allocs[other][n]; memres->deallocate(x2.ptr, x2.size, 1); x2 = {}; } } }; std::thread threads[num_threads]; locking_pool_resource unsync; std::pmr::synchronized_pool_resource sync; std::pmr::memory_resource* memres[3] = { std::pmr::new_delete_resource(), &unsync, &sync }; const std::string resnames[3] = { "new-delete", "mutex-pool", "sync-pool " }; auto time_threads = [&] (std::string name, int which, bool with_exit) { __gnu_test::time_counter time; __gnu_test::resource_counter resource; std::unique_lock gate(mx); for (auto& t : threads) t = std::thread{ run_test, memres[which], &t - threads, with_exit }; start_counters(time, resource); gate.unlock(); for (auto& t : threads) t.join(); stop_counters(time, resource); report_performance(__FILE__, resnames[which] + name, time, resource); // Clean up: for (auto& a : allocs) { const int i = (&a - allocs); if (i < num_threads) // These allocations were freed for (auto& x : a) { assert(x.ptr == nullptr); } else if (with_exit && i == num_threads) ; else for (auto& x : a) { memres[which]->deallocate(x.ptr, x.size, 1); x = {}; } } }; for (int which : {0, 1, 2}) time_threads(" cross-thread dealloc", which, false); for (int which : {0, 1, 2}) time_threads(" cross-thread dealloc w/exit", which, true); } #endif int main() { test_lists_single_thread(); #ifndef NOTHREAD test_lists_resource_per_thread(); test_lists_shared_resource(); test_cross_thread_dealloc(); #endif }