1 /*
2 * Copyright (c) Facebook, Inc. and its affiliates.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include <folly/portability/Config.h>
18
19 // AtomicSharedPtr-detail.h only works with libstdc++, so skip these tests for
20 // other vendors
21 #if defined(__GLIBCXX__)
22
23 #include <atomic>
24 #include <memory>
25 #include <thread>
26 #include <vector>
27
28 #include <folly/Benchmark.h>
29 #include <folly/Portability.h>
30 #include <folly/concurrency/AtomicSharedPtr.h>
31 #include <folly/concurrency/CoreCachedSharedPtr.h>
32 #include <folly/experimental/ReadMostlySharedPtr.h>
33 #include <folly/portability/GTest.h>
34
35 namespace {
36
37 template <class Operation>
parallelRun(Operation op,size_t numThreads=std::thread::hardware_concurrency ())38 void parallelRun(
39 Operation op, size_t numThreads = std::thread::hardware_concurrency()) {
40 std::vector<std::thread> threads;
41 for (size_t t = 0; t < numThreads; ++t) {
42 threads.emplace_back([&, t] { op(t); });
43 }
44 for (auto& t : threads) {
45 t.join();
46 }
47 }
48
49 } // namespace
50
TEST(CoreCachedSharedPtr,Basic)51 TEST(CoreCachedSharedPtr, Basic) {
52 auto p = std::make_shared<int>(1);
53 std::weak_ptr<int> wp(p);
54
55 folly::CoreCachedSharedPtr<int> cached(p);
56 folly::CoreCachedWeakPtr<int> wcached(cached);
57
58 std::shared_ptr<int> p2 = cached.get();
59 std::weak_ptr<int> wp2 = wcached.get();
60 ASSERT_TRUE(p2 != nullptr);
61 ASSERT_EQ(*p2, 1);
62 ASSERT_FALSE(wp2.expired());
63
64 // Check that other cores get the correct shared_ptr too.
65 parallelRun([&](size_t) {
66 EXPECT_TRUE(cached.get().get() == p.get());
67 EXPECT_EQ(*cached.get(), 1);
68 EXPECT_EQ(*wcached.lock(), 1);
69 });
70
71 p.reset();
72 cached.reset();
73 // p2 should survive.
74 ASSERT_FALSE(wp.expired());
75 // Here we don't know anything about wp2: could be expired even if
76 // there is a living reference to the main object.
77
78 p2.reset();
79 ASSERT_TRUE(wp.expired());
80 ASSERT_TRUE(wp2.expired());
81 }
82
TEST(CoreCachedSharedPtr,AtomicCoreCachedSharedPtr)83 TEST(CoreCachedSharedPtr, AtomicCoreCachedSharedPtr) {
84 constexpr size_t kIters = 2000;
85 {
86 folly::AtomicCoreCachedSharedPtr<size_t> p;
87 parallelRun([&](size_t) {
88 for (size_t i = 0; i < kIters; ++i) {
89 p.reset(std::make_shared<size_t>(i));
90 EXPECT_TRUE(p.get());
91 // Just read the value, and ensure that ASAN/TSAN do not complain.
92 EXPECT_GE(*p.get(), 0);
93 }
94 });
95 }
96 {
97 // One writer thread, all other readers, verify consistency.
98 std::atomic<size_t> largestValueObserved{0};
99 folly::AtomicCoreCachedSharedPtr<size_t> p{std::make_shared<size_t>(0)};
100 parallelRun([&](size_t t) {
101 if (t == 0) {
102 for (size_t i = 0; i < kIters; ++i) {
103 p.reset(std::make_shared<size_t>(i + 1));
104 }
105 } else {
106 while (true) {
107 auto exp = largestValueObserved.load();
108 auto value = *p.get();
109 EXPECT_GE(value, exp);
110 // Maintain the maximum value observed so far. As soon as one thread
111 // observes an update, they all should observe it.
112 while (value > exp &&
113 !largestValueObserved.compare_exchange_weak(exp, value)) {
114 }
115 if (exp == kIters) {
116 break;
117 }
118 }
119 }
120 });
121 }
122 }
123
124 namespace {
125
126 template <class Holder>
testAliasingCornerCases()127 void testAliasingCornerCases() {
128 {
129 Holder h;
130 auto p1 = std::make_shared<int>(1);
131 std::weak_ptr<int> w1 = p1;
132 // Aliasing constructor, p2 is nullptr but still manages the object in p1.
133 std::shared_ptr<int> p2(p1, nullptr);
134 // And now it's the only reference left.
135 p1.reset();
136 EXPECT_FALSE(w1.expired());
137
138 // Pass the ownership to the Holder.
139 h.reset(p2);
140 p2.reset();
141
142 // Object should still be alive.
143 EXPECT_FALSE(w1.expired());
144
145 // And resetting will destroy it.
146 h.reset();
147 folly::hazptr_cleanup();
148 EXPECT_TRUE(w1.expired());
149 }
150
151 {
152 Holder h;
153 int x = 1;
154 // p points to x, but has no managed object.
155 std::shared_ptr<int> p(std::shared_ptr<int>{}, &x);
156 h.reset(p);
157 EXPECT_TRUE(h.get().get() == &x);
158 }
159 }
160
161 } // namespace
162
TEST(CoreCachedSharedPtr,AliasingCornerCases)163 TEST(CoreCachedSharedPtr, AliasingCornerCases) {
164 testAliasingCornerCases<folly::CoreCachedSharedPtr<int>>();
165 }
166
TEST(CoreCachedSharedPtr,AliasingCornerCasesAtomic)167 TEST(CoreCachedSharedPtr, AliasingCornerCasesAtomic) {
168 testAliasingCornerCases<folly::AtomicCoreCachedSharedPtr<int>>();
169 }
170
171 namespace {
172
173 template <class Operation>
benchmarkParallelRun(Operation op,size_t numThreads)174 size_t benchmarkParallelRun(Operation op, size_t numThreads) {
175 constexpr size_t kIters = 2 * 1000 * 1000;
176
177 std::vector<std::thread> threads;
178
179 // Prevent the compiler from hoisting code out of the loop.
180 auto opNoinline = [&]() FOLLY_NOINLINE { op(); };
181
182 std::atomic<size_t> ready = 0;
183 std::atomic<bool> done = false;
184 std::atomic<size_t> iters = 0;
185
186 for (size_t t = 0; t < numThreads; ++t) {
187 threads.emplace_back([&] {
188 ++ready;
189 while (ready.load() < numThreads) {
190 }
191 size_t i = 0;
192 // Interrupt all workers as soon as the first one is done, so the overall
193 // wall time is close to the time spent in each thread.
194 for (; !done.load(std::memory_order_acquire) && i < kIters; ++i) {
195 opNoinline();
196 }
197 done = true;
198 iters += i;
199 });
200 }
201
202 for (auto& t : threads) {
203 t.join();
204 }
205
206 return iters / numThreads;
207 }
208
benchmarkSharedPtrAcquire(size_t numThreads)209 size_t benchmarkSharedPtrAcquire(size_t numThreads) {
210 auto p = std::make_shared<int>(1);
211 return benchmarkParallelRun([&] { return p; }, numThreads);
212 }
213
benchmarkWeakPtrLock(size_t numThreads)214 size_t benchmarkWeakPtrLock(size_t numThreads) {
215 auto p = std::make_shared<int>(1);
216 std::weak_ptr<int> wp = p;
217 return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
218 }
219
benchmarkAtomicSharedPtrAcquire(size_t numThreads)220 size_t benchmarkAtomicSharedPtrAcquire(size_t numThreads) {
221 auto s = std::make_shared<int>(1);
222 folly::atomic_shared_ptr<int> p;
223 p.store(s);
224 return benchmarkParallelRun([&] { return p.load(); }, numThreads);
225 }
226
benchmarkCoreCachedSharedPtrAcquire(size_t numThreads)227 size_t benchmarkCoreCachedSharedPtrAcquire(size_t numThreads) {
228 folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
229 return benchmarkParallelRun([&] { return p.get(); }, numThreads);
230 }
231
benchmarkCoreCachedWeakPtrLock(size_t numThreads)232 size_t benchmarkCoreCachedWeakPtrLock(size_t numThreads) {
233 folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
234 folly::CoreCachedWeakPtr<int> wp(p);
235 return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
236 }
237
benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads)238 size_t benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads) {
239 folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
240 return benchmarkParallelRun([&] { return p.get(); }, numThreads);
241 }
242
benchmarkReadMostlySharedPtrAcquire(size_t numThreads)243 size_t benchmarkReadMostlySharedPtrAcquire(size_t numThreads) {
244 folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
245 return benchmarkParallelRun([&] { return p.getShared(); }, numThreads);
246 }
247
benchmarkReadMostlyWeakPtrLock(size_t numThreads)248 size_t benchmarkReadMostlyWeakPtrLock(size_t numThreads) {
249 folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
250 folly::ReadMostlyWeakPtr<int> w{p};
251 return benchmarkParallelRun([&] { return w.lock(); }, numThreads);
252 }
253
254 } // namespace
255
256 #define BENCHMARK_THREADS(THREADS) \
257 BENCHMARK_MULTI(SharedPtrAcquire_##THREADS##Threads) { \
258 return benchmarkSharedPtrAcquire(THREADS); \
259 } \
260 BENCHMARK_MULTI(WeakPtrLock_##THREADS##Threads) { \
261 return benchmarkWeakPtrLock(THREADS); \
262 } \
263 BENCHMARK_MULTI(AtomicSharedPtrAcquire_##THREADS##Threads) { \
264 return benchmarkAtomicSharedPtrAcquire(THREADS); \
265 } \
266 BENCHMARK_MULTI(CoreCachedSharedPtrAquire_##THREADS##Threads) { \
267 return benchmarkCoreCachedSharedPtrAcquire(THREADS); \
268 } \
269 BENCHMARK_MULTI(CoreCachedWeakPtrLock_##THREADS##Threads) { \
270 return benchmarkCoreCachedWeakPtrLock(THREADS); \
271 } \
272 BENCHMARK_MULTI(AtomicCoreCachedSharedPtrAcquire_##THREADS##Threads) { \
273 return benchmarkAtomicCoreCachedSharedPtrAcquire(THREADS); \
274 } \
275 BENCHMARK_MULTI(ReadMostlySharedPtrAcquire_##THREADS##Threads) { \
276 return benchmarkReadMostlySharedPtrAcquire(THREADS); \
277 } \
278 BENCHMARK_MULTI(ReadMostlyWeakPtrLock_##THREADS##Threads) { \
279 return benchmarkReadMostlyWeakPtrLock(THREADS); \
280 } \
281 BENCHMARK_DRAW_LINE();
282
283 BENCHMARK_THREADS(1)
284 BENCHMARK_THREADS(4)
285 BENCHMARK_THREADS(8)
286 BENCHMARK_THREADS(16)
287 BENCHMARK_THREADS(32)
288 BENCHMARK_THREADS(64)
289
BENCHMARK_MULTI(SharedPtrSingleThreadReset)290 BENCHMARK_MULTI(SharedPtrSingleThreadReset) {
291 auto p = std::make_shared<int>(1);
292 return benchmarkParallelRun([&] { p = std::make_shared<int>(1); }, 1);
293 }
BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset)294 BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset) {
295 auto s = std::make_shared<int>(1);
296 folly::atomic_shared_ptr<int> p;
297 p.store(s);
298 return benchmarkParallelRun([&] { p.store(std::make_shared<int>(1)); }, 1);
299 }
BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset)300 BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset) {
301 folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
302 return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
303 }
BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset)304 BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset) {
305 folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
306 return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
307 }
308
main(int argc,char ** argv)309 int main(int argc, char** argv) {
310 testing::InitGoogleTest(&argc, argv);
311 gflags::ParseCommandLineFlags(&argc, &argv, true);
312
313 auto ret = RUN_ALL_TESTS();
314 if (ret == 0 && FLAGS_benchmark) {
315 folly::runBenchmarks();
316 }
317
318 return ret;
319 }
320
321 #endif // defined(__GLIBCXX__)
322
323 #if 0
324 // On Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz, 2 sockets, 28 cores, 56 threads.
325 $ buck-out/gen/folly/concurrency/test/core_cached_shared_ptr_test --benchmark --bm_min_usec 500000
326
327 ============================================================================
328 folly/concurrency/test/CoreCachedSharedPtrTest.cpprelative time/iter iters/s
329 ============================================================================
330 SharedPtrAcquire_1Threads 18.31ns 54.62M
331 WeakPtrLock_1Threads 20.07ns 49.83M
332 AtomicSharedPtrAcquire_1Threads 26.31ns 38.01M
333 CoreCachedSharedPtrAquire_1Threads 20.17ns 49.57M
334 CoreCachedWeakPtrLock_1Threads 22.41ns 44.62M
335 AtomicCoreCachedSharedPtrAcquire_1Threads 23.59ns 42.40M
336 ReadMostlySharedPtrAcquire_1Threads 20.16ns 49.61M
337 ReadMostlyWeakPtrLock_1Threads 20.46ns 48.87M
338 ----------------------------------------------------------------------------
339 SharedPtrAcquire_4Threads 193.62ns 5.16M
340 WeakPtrLock_4Threads 216.29ns 4.62M
341 AtomicSharedPtrAcquire_4Threads 508.04ns 1.97M
342 CoreCachedSharedPtrAquire_4Threads 21.52ns 46.46M
343 CoreCachedWeakPtrLock_4Threads 23.80ns 42.01M
344 AtomicCoreCachedSharedPtrAcquire_4Threads 24.81ns 40.30M
345 ReadMostlySharedPtrAcquire_4Threads 21.67ns 46.15M
346 ReadMostlyWeakPtrLock_4Threads 21.72ns 46.04M
347 ----------------------------------------------------------------------------
348 SharedPtrAcquire_8Threads 389.71ns 2.57M
349 WeakPtrLock_8Threads 467.29ns 2.14M
350 AtomicSharedPtrAcquire_8Threads 1.38us 727.03K
351 CoreCachedSharedPtrAquire_8Threads 21.49ns 46.53M
352 CoreCachedWeakPtrLock_8Threads 23.83ns 41.97M
353 AtomicCoreCachedSharedPtrAcquire_8Threads 24.68ns 40.52M
354 ReadMostlySharedPtrAcquire_8Threads 21.68ns 46.12M
355 ReadMostlyWeakPtrLock_8Threads 21.48ns 46.55M
356 ----------------------------------------------------------------------------
357 SharedPtrAcquire_16Threads 739.59ns 1.35M
358 WeakPtrLock_16Threads 896.23ns 1.12M
359 AtomicSharedPtrAcquire_16Threads 2.88us 347.73K
360 CoreCachedSharedPtrAquire_16Threads 21.98ns 45.50M
361 CoreCachedWeakPtrLock_16Threads 25.98ns 38.49M
362 AtomicCoreCachedSharedPtrAcquire_16Threads 26.44ns 37.82M
363 ReadMostlySharedPtrAcquire_16Threads 23.75ns 42.11M
364 ReadMostlyWeakPtrLock_16Threads 22.89ns 43.70M
365 ----------------------------------------------------------------------------
366 SharedPtrAcquire_32Threads 1.36us 732.78K
367 WeakPtrLock_32Threads 1.93us 518.58K
368 AtomicSharedPtrAcquire_32Threads 5.68us 176.04K
369 CoreCachedSharedPtrAquire_32Threads 29.24ns 34.20M
370 CoreCachedWeakPtrLock_32Threads 32.17ns 31.08M
371 AtomicCoreCachedSharedPtrAcquire_32Threads 28.67ns 34.88M
372 ReadMostlySharedPtrAcquire_32Threads 29.36ns 34.06M
373 ReadMostlyWeakPtrLock_32Threads 27.27ns 36.67M
374 ----------------------------------------------------------------------------
375 SharedPtrAcquire_64Threads 2.39us 418.35K
376 WeakPtrLock_64Threads 4.21us 237.61K
377 AtomicSharedPtrAcquire_64Threads 8.63us 115.86K
378 CoreCachedSharedPtrAquire_64Threads 49.70ns 20.12M
379 CoreCachedWeakPtrLock_64Threads 74.74ns 13.38M
380 AtomicCoreCachedSharedPtrAcquire_64Threads 56.09ns 17.83M
381 ReadMostlySharedPtrAcquire_64Threads 49.22ns 20.32M
382 ReadMostlyWeakPtrLock_64Threads 49.16ns 20.34M
383 ----------------------------------------------------------------------------
384 SharedPtrSingleThreadReset 10.45ns 95.70M
385 AtomicSharedPtrSingleThreadReset 42.83ns 23.35M
386 CoreCachedSharedPtrSingleThreadReset 2.51us 398.43K
387 AtomicCoreCachedSharedPtrSingleThreadReset 2.36us 423.31K
388 ============================================================================
389 #endif
390