1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include <folly/portability/Config.h>
18 
19 // AtomicSharedPtr-detail.h only works with libstdc++, so skip these tests for
20 // other vendors
21 #if defined(__GLIBCXX__)
22 
23 #include <atomic>
24 #include <memory>
25 #include <thread>
26 #include <vector>
27 
28 #include <folly/Benchmark.h>
29 #include <folly/Portability.h>
30 #include <folly/concurrency/AtomicSharedPtr.h>
31 #include <folly/concurrency/CoreCachedSharedPtr.h>
32 #include <folly/experimental/ReadMostlySharedPtr.h>
33 #include <folly/portability/GTest.h>
34 
35 namespace {
36 
37 template <class Operation>
parallelRun(Operation op,size_t numThreads=std::thread::hardware_concurrency ())38 void parallelRun(
39     Operation op, size_t numThreads = std::thread::hardware_concurrency()) {
40   std::vector<std::thread> threads;
41   for (size_t t = 0; t < numThreads; ++t) {
42     threads.emplace_back([&, t] { op(t); });
43   }
44   for (auto& t : threads) {
45     t.join();
46   }
47 }
48 
49 } // namespace
50 
TEST(CoreCachedSharedPtr,Basic)51 TEST(CoreCachedSharedPtr, Basic) {
52   auto p = std::make_shared<int>(1);
53   std::weak_ptr<int> wp(p);
54 
55   folly::CoreCachedSharedPtr<int> cached(p);
56   folly::CoreCachedWeakPtr<int> wcached(cached);
57 
58   std::shared_ptr<int> p2 = cached.get();
59   std::weak_ptr<int> wp2 = wcached.get();
60   ASSERT_TRUE(p2 != nullptr);
61   ASSERT_EQ(*p2, 1);
62   ASSERT_FALSE(wp2.expired());
63 
64   // Check that other cores get the correct shared_ptr too.
65   parallelRun([&](size_t) {
66     EXPECT_TRUE(cached.get().get() == p.get());
67     EXPECT_EQ(*cached.get(), 1);
68     EXPECT_EQ(*wcached.lock(), 1);
69   });
70 
71   p.reset();
72   cached.reset();
73   // p2 should survive.
74   ASSERT_FALSE(wp.expired());
75   // Here we don't know anything about wp2: could be expired even if
76   // there is a living reference to the main object.
77 
78   p2.reset();
79   ASSERT_TRUE(wp.expired());
80   ASSERT_TRUE(wp2.expired());
81 }
82 
TEST(CoreCachedSharedPtr,AtomicCoreCachedSharedPtr)83 TEST(CoreCachedSharedPtr, AtomicCoreCachedSharedPtr) {
84   constexpr size_t kIters = 2000;
85   {
86     folly::AtomicCoreCachedSharedPtr<size_t> p;
87     parallelRun([&](size_t) {
88       for (size_t i = 0; i < kIters; ++i) {
89         p.reset(std::make_shared<size_t>(i));
90         EXPECT_TRUE(p.get());
91         // Just read the value, and ensure that ASAN/TSAN do not complain.
92         EXPECT_GE(*p.get(), 0);
93       }
94     });
95   }
96   {
97     // One writer thread, all other readers, verify consistency.
98     std::atomic<size_t> largestValueObserved{0};
99     folly::AtomicCoreCachedSharedPtr<size_t> p{std::make_shared<size_t>(0)};
100     parallelRun([&](size_t t) {
101       if (t == 0) {
102         for (size_t i = 0; i < kIters; ++i) {
103           p.reset(std::make_shared<size_t>(i + 1));
104         }
105       } else {
106         while (true) {
107           auto exp = largestValueObserved.load();
108           auto value = *p.get();
109           EXPECT_GE(value, exp);
110           // Maintain the maximum value observed so far. As soon as one thread
111           // observes an update, they all should observe it.
112           while (value > exp &&
113                  !largestValueObserved.compare_exchange_weak(exp, value)) {
114           }
115           if (exp == kIters) {
116             break;
117           }
118         }
119       }
120     });
121   }
122 }
123 
124 namespace {
125 
126 template <class Holder>
testAliasingCornerCases()127 void testAliasingCornerCases() {
128   {
129     Holder h;
130     auto p1 = std::make_shared<int>(1);
131     std::weak_ptr<int> w1 = p1;
132     // Aliasing constructor, p2 is nullptr but still manages the object in p1.
133     std::shared_ptr<int> p2(p1, nullptr);
134     // And now it's the only reference left.
135     p1.reset();
136     EXPECT_FALSE(w1.expired());
137 
138     // Pass the ownership to the Holder.
139     h.reset(p2);
140     p2.reset();
141 
142     // Object should still be alive.
143     EXPECT_FALSE(w1.expired());
144 
145     // And resetting will destroy it.
146     h.reset();
147     folly::hazptr_cleanup();
148     EXPECT_TRUE(w1.expired());
149   }
150 
151   {
152     Holder h;
153     int x = 1;
154     // p points to x, but has no managed object.
155     std::shared_ptr<int> p(std::shared_ptr<int>{}, &x);
156     h.reset(p);
157     EXPECT_TRUE(h.get().get() == &x);
158   }
159 }
160 
161 } // namespace
162 
TEST(CoreCachedSharedPtr,AliasingCornerCases)163 TEST(CoreCachedSharedPtr, AliasingCornerCases) {
164   testAliasingCornerCases<folly::CoreCachedSharedPtr<int>>();
165 }
166 
TEST(CoreCachedSharedPtr,AliasingCornerCasesAtomic)167 TEST(CoreCachedSharedPtr, AliasingCornerCasesAtomic) {
168   testAliasingCornerCases<folly::AtomicCoreCachedSharedPtr<int>>();
169 }
170 
171 namespace {
172 
173 template <class Operation>
benchmarkParallelRun(Operation op,size_t numThreads)174 size_t benchmarkParallelRun(Operation op, size_t numThreads) {
175   constexpr size_t kIters = 2 * 1000 * 1000;
176 
177   std::vector<std::thread> threads;
178 
179   // Prevent the compiler from hoisting code out of the loop.
180   auto opNoinline = [&]() FOLLY_NOINLINE { op(); };
181 
182   std::atomic<size_t> ready = 0;
183   std::atomic<bool> done = false;
184   std::atomic<size_t> iters = 0;
185 
186   for (size_t t = 0; t < numThreads; ++t) {
187     threads.emplace_back([&] {
188       ++ready;
189       while (ready.load() < numThreads) {
190       }
191       size_t i = 0;
192       // Interrupt all workers as soon as the first one is done, so the overall
193       // wall time is close to the time spent in each thread.
194       for (; !done.load(std::memory_order_acquire) && i < kIters; ++i) {
195         opNoinline();
196       }
197       done = true;
198       iters += i;
199     });
200   }
201 
202   for (auto& t : threads) {
203     t.join();
204   }
205 
206   return iters / numThreads;
207 }
208 
benchmarkSharedPtrAcquire(size_t numThreads)209 size_t benchmarkSharedPtrAcquire(size_t numThreads) {
210   auto p = std::make_shared<int>(1);
211   return benchmarkParallelRun([&] { return p; }, numThreads);
212 }
213 
benchmarkWeakPtrLock(size_t numThreads)214 size_t benchmarkWeakPtrLock(size_t numThreads) {
215   auto p = std::make_shared<int>(1);
216   std::weak_ptr<int> wp = p;
217   return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
218 }
219 
benchmarkAtomicSharedPtrAcquire(size_t numThreads)220 size_t benchmarkAtomicSharedPtrAcquire(size_t numThreads) {
221   auto s = std::make_shared<int>(1);
222   folly::atomic_shared_ptr<int> p;
223   p.store(s);
224   return benchmarkParallelRun([&] { return p.load(); }, numThreads);
225 }
226 
benchmarkCoreCachedSharedPtrAcquire(size_t numThreads)227 size_t benchmarkCoreCachedSharedPtrAcquire(size_t numThreads) {
228   folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
229   return benchmarkParallelRun([&] { return p.get(); }, numThreads);
230 }
231 
benchmarkCoreCachedWeakPtrLock(size_t numThreads)232 size_t benchmarkCoreCachedWeakPtrLock(size_t numThreads) {
233   folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
234   folly::CoreCachedWeakPtr<int> wp(p);
235   return benchmarkParallelRun([&] { return wp.lock(); }, numThreads);
236 }
237 
benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads)238 size_t benchmarkAtomicCoreCachedSharedPtrAcquire(size_t numThreads) {
239   folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
240   return benchmarkParallelRun([&] { return p.get(); }, numThreads);
241 }
242 
benchmarkReadMostlySharedPtrAcquire(size_t numThreads)243 size_t benchmarkReadMostlySharedPtrAcquire(size_t numThreads) {
244   folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
245   return benchmarkParallelRun([&] { return p.getShared(); }, numThreads);
246 }
247 
benchmarkReadMostlyWeakPtrLock(size_t numThreads)248 size_t benchmarkReadMostlyWeakPtrLock(size_t numThreads) {
249   folly::ReadMostlyMainPtr<int> p{std::make_shared<int>(1)};
250   folly::ReadMostlyWeakPtr<int> w{p};
251   return benchmarkParallelRun([&] { return w.lock(); }, numThreads);
252 }
253 
254 } // namespace
255 
256 #define BENCHMARK_THREADS(THREADS)                                       \
257   BENCHMARK_MULTI(SharedPtrAcquire_##THREADS##Threads) {                 \
258     return benchmarkSharedPtrAcquire(THREADS);                           \
259   }                                                                      \
260   BENCHMARK_MULTI(WeakPtrLock_##THREADS##Threads) {                      \
261     return benchmarkWeakPtrLock(THREADS);                                \
262   }                                                                      \
263   BENCHMARK_MULTI(AtomicSharedPtrAcquire_##THREADS##Threads) {           \
264     return benchmarkAtomicSharedPtrAcquire(THREADS);                     \
265   }                                                                      \
266   BENCHMARK_MULTI(CoreCachedSharedPtrAquire_##THREADS##Threads) {        \
267     return benchmarkCoreCachedSharedPtrAcquire(THREADS);                 \
268   }                                                                      \
269   BENCHMARK_MULTI(CoreCachedWeakPtrLock_##THREADS##Threads) {            \
270     return benchmarkCoreCachedWeakPtrLock(THREADS);                      \
271   }                                                                      \
272   BENCHMARK_MULTI(AtomicCoreCachedSharedPtrAcquire_##THREADS##Threads) { \
273     return benchmarkAtomicCoreCachedSharedPtrAcquire(THREADS);           \
274   }                                                                      \
275   BENCHMARK_MULTI(ReadMostlySharedPtrAcquire_##THREADS##Threads) {       \
276     return benchmarkReadMostlySharedPtrAcquire(THREADS);                 \
277   }                                                                      \
278   BENCHMARK_MULTI(ReadMostlyWeakPtrLock_##THREADS##Threads) {            \
279     return benchmarkReadMostlyWeakPtrLock(THREADS);                      \
280   }                                                                      \
281   BENCHMARK_DRAW_LINE();
282 
283 BENCHMARK_THREADS(1)
284 BENCHMARK_THREADS(4)
285 BENCHMARK_THREADS(8)
286 BENCHMARK_THREADS(16)
287 BENCHMARK_THREADS(32)
288 BENCHMARK_THREADS(64)
289 
BENCHMARK_MULTI(SharedPtrSingleThreadReset)290 BENCHMARK_MULTI(SharedPtrSingleThreadReset) {
291   auto p = std::make_shared<int>(1);
292   return benchmarkParallelRun([&] { p = std::make_shared<int>(1); }, 1);
293 }
BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset)294 BENCHMARK_MULTI(AtomicSharedPtrSingleThreadReset) {
295   auto s = std::make_shared<int>(1);
296   folly::atomic_shared_ptr<int> p;
297   p.store(s);
298   return benchmarkParallelRun([&] { p.store(std::make_shared<int>(1)); }, 1);
299 }
BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset)300 BENCHMARK_MULTI(CoreCachedSharedPtrSingleThreadReset) {
301   folly::CoreCachedSharedPtr<int> p(std::make_shared<int>(1));
302   return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
303 }
BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset)304 BENCHMARK_MULTI(AtomicCoreCachedSharedPtrSingleThreadReset) {
305   folly::AtomicCoreCachedSharedPtr<int> p(std::make_shared<int>(1));
306   return benchmarkParallelRun([&] { p.reset(std::make_shared<int>(1)); }, 1);
307 }
308 
main(int argc,char ** argv)309 int main(int argc, char** argv) {
310   testing::InitGoogleTest(&argc, argv);
311   gflags::ParseCommandLineFlags(&argc, &argv, true);
312 
313   auto ret = RUN_ALL_TESTS();
314   if (ret == 0 && FLAGS_benchmark) {
315     folly::runBenchmarks();
316   }
317 
318   return ret;
319 }
320 
321 #endif // defined(__GLIBCXX__)
322 
323 #if 0
324 // On Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GHz, 2 sockets, 28 cores, 56 threads.
325 $ buck-out/gen/folly/concurrency/test/core_cached_shared_ptr_test --benchmark --bm_min_usec 500000
326 
327 ============================================================================
328 folly/concurrency/test/CoreCachedSharedPtrTest.cpprelative  time/iter  iters/s
329 ============================================================================
330 SharedPtrAcquire_1Threads                                   18.31ns   54.62M
331 WeakPtrLock_1Threads                                        20.07ns   49.83M
332 AtomicSharedPtrAcquire_1Threads                             26.31ns   38.01M
333 CoreCachedSharedPtrAquire_1Threads                          20.17ns   49.57M
334 CoreCachedWeakPtrLock_1Threads                              22.41ns   44.62M
335 AtomicCoreCachedSharedPtrAcquire_1Threads                   23.59ns   42.40M
336 ReadMostlySharedPtrAcquire_1Threads                         20.16ns   49.61M
337 ReadMostlyWeakPtrLock_1Threads                              20.46ns   48.87M
338 ----------------------------------------------------------------------------
339 SharedPtrAcquire_4Threads                                  193.62ns    5.16M
340 WeakPtrLock_4Threads                                       216.29ns    4.62M
341 AtomicSharedPtrAcquire_4Threads                            508.04ns    1.97M
342 CoreCachedSharedPtrAquire_4Threads                          21.52ns   46.46M
343 CoreCachedWeakPtrLock_4Threads                              23.80ns   42.01M
344 AtomicCoreCachedSharedPtrAcquire_4Threads                   24.81ns   40.30M
345 ReadMostlySharedPtrAcquire_4Threads                         21.67ns   46.15M
346 ReadMostlyWeakPtrLock_4Threads                              21.72ns   46.04M
347 ----------------------------------------------------------------------------
348 SharedPtrAcquire_8Threads                                  389.71ns    2.57M
349 WeakPtrLock_8Threads                                       467.29ns    2.14M
350 AtomicSharedPtrAcquire_8Threads                              1.38us  727.03K
351 CoreCachedSharedPtrAquire_8Threads                          21.49ns   46.53M
352 CoreCachedWeakPtrLock_8Threads                              23.83ns   41.97M
353 AtomicCoreCachedSharedPtrAcquire_8Threads                   24.68ns   40.52M
354 ReadMostlySharedPtrAcquire_8Threads                         21.68ns   46.12M
355 ReadMostlyWeakPtrLock_8Threads                              21.48ns   46.55M
356 ----------------------------------------------------------------------------
357 SharedPtrAcquire_16Threads                                 739.59ns    1.35M
358 WeakPtrLock_16Threads                                      896.23ns    1.12M
359 AtomicSharedPtrAcquire_16Threads                             2.88us  347.73K
360 CoreCachedSharedPtrAquire_16Threads                         21.98ns   45.50M
361 CoreCachedWeakPtrLock_16Threads                             25.98ns   38.49M
362 AtomicCoreCachedSharedPtrAcquire_16Threads                  26.44ns   37.82M
363 ReadMostlySharedPtrAcquire_16Threads                        23.75ns   42.11M
364 ReadMostlyWeakPtrLock_16Threads                             22.89ns   43.70M
365 ----------------------------------------------------------------------------
366 SharedPtrAcquire_32Threads                                   1.36us  732.78K
367 WeakPtrLock_32Threads                                        1.93us  518.58K
368 AtomicSharedPtrAcquire_32Threads                             5.68us  176.04K
369 CoreCachedSharedPtrAquire_32Threads                         29.24ns   34.20M
370 CoreCachedWeakPtrLock_32Threads                             32.17ns   31.08M
371 AtomicCoreCachedSharedPtrAcquire_32Threads                  28.67ns   34.88M
372 ReadMostlySharedPtrAcquire_32Threads                        29.36ns   34.06M
373 ReadMostlyWeakPtrLock_32Threads                             27.27ns   36.67M
374 ----------------------------------------------------------------------------
375 SharedPtrAcquire_64Threads                                   2.39us  418.35K
376 WeakPtrLock_64Threads                                        4.21us  237.61K
377 AtomicSharedPtrAcquire_64Threads                             8.63us  115.86K
378 CoreCachedSharedPtrAquire_64Threads                         49.70ns   20.12M
379 CoreCachedWeakPtrLock_64Threads                             74.74ns   13.38M
380 AtomicCoreCachedSharedPtrAcquire_64Threads                  56.09ns   17.83M
381 ReadMostlySharedPtrAcquire_64Threads                        49.22ns   20.32M
382 ReadMostlyWeakPtrLock_64Threads                             49.16ns   20.34M
383 ----------------------------------------------------------------------------
384 SharedPtrSingleThreadReset                                  10.45ns   95.70M
385 AtomicSharedPtrSingleThreadReset                            42.83ns   23.35M
386 CoreCachedSharedPtrSingleThreadReset                         2.51us  398.43K
387 AtomicCoreCachedSharedPtrSingleThreadReset                   2.36us  423.31K
388 ============================================================================
389 #endif
390