1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 
8 #include <assert.h>
9 #include <errno.h>
10 #include <stdint.h>
11 #include <atomic>
12 #include <thread>
13 
14 #include <folly/detail/Futex.h>
15 #include <folly/portability/Asm.h>
16 #include <folly/synchronization/WaitOptions.h>
17 #include <folly/synchronization/detail/Spin.h>
18 
19 namespace folly {
20 
21 /// A Baton allows a thread to block once and be awoken. Captures a
22 /// single handoff, and during its lifecycle (from construction/reset
23 /// to destruction/reset) a baton must either be post()ed and wait()ed
24 /// exactly once each, or not at all.
25 ///
26 /// Baton includes no internal padding, and is only 4 bytes in size.
27 /// Any alignment or padding to avoid false sharing is up to the user.
28 ///
29 /// This is basically a stripped-down semaphore that supports only a
30 /// single call to sem_post and a single call to sem_wait.
31 ///
32 /// The non-blocking version (MayBlock == false) provides more speed
33 /// by using only load acquire and store release operations in the
34 /// critical path, at the cost of disallowing blocking.
35 ///
36 /// The current posix semaphore sem_t isn't too bad, but this provides
37 /// more a bit more speed, inlining, smaller size, a guarantee that
38 /// the implementation won't change, and compatibility with
39 /// DeterministicSchedule.  By having a much more restrictive
40 /// lifecycle we can also add a bunch of assertions that can help to
41 /// catch race conditions ahead of time.
42 template <bool MayBlock = true, template <typename> class Atom = std::atomic>
43 class Baton {
44  public:
wait_options()45   static constexpr WaitOptions wait_options() {
46     return {};
47   }
48 
Baton()49   constexpr Baton() noexcept : state_(INIT) {}
50 
51   Baton(Baton const&) = delete;
52   Baton& operator=(Baton const&) = delete;
53 
54   /// It is an error to destroy a Baton on which a thread is currently
55   /// wait()ing.  In practice this means that the waiter usually takes
56   /// responsibility for destroying the Baton.
~Baton()57   ~Baton() noexcept {
58     // The docblock for this function says that it can't be called when
59     // there is a concurrent waiter.  We assume a strong version of this
60     // requirement in which the caller must _know_ that this is true, they
61     // are not allowed to be merely lucky.  If two threads are involved,
62     // the destroying thread must actually have synchronized with the
63     // waiting thread after wait() returned.  To convey causality the the
64     // waiting thread must have used release semantics and the destroying
65     // thread must have used acquire semantics for that communication,
66     // so we are guaranteed to see the post-wait() value of state_,
67     // which cannot be WAITING.
68     //
69     // Note that since we only care about a single memory location,
70     // the only two plausible memory orders here are relaxed and seq_cst.
71     assert(state_.load(std::memory_order_relaxed) != WAITING);
72   }
73 
ready()74   bool ready() const noexcept {
75     auto s = state_.load(std::memory_order_acquire);
76     assert(s == INIT || s == EARLY_DELIVERY);
77     return (s == EARLY_DELIVERY);
78   }
79 
80   /// Equivalent to destroying the Baton and creating a new one.  It is
81   /// a bug to call this while there is a waiting thread, so in practice
82   /// the waiter will be the one that resets the baton.
reset()83   void reset() noexcept {
84     // See ~Baton for a discussion about why relaxed is okay here
85     assert(state_.load(std::memory_order_relaxed) != WAITING);
86 
87     // We use a similar argument to justify the use of a relaxed store
88     // here.  Since both wait() and post() are required to be called
89     // only once per lifetime, no thread can actually call those methods
90     // correctly after a reset() unless it synchronizes with the thread
91     // that performed the reset().  If a post() or wait() on another thread
92     // didn't synchronize, then regardless of what operation we performed
93     // here there would be a race on proper use of the Baton's spec
94     // (although not on any particular load and store).  Put another way,
95     // we don't need to synchronize here because anybody that might rely
96     // on such synchronization is required by the baton rules to perform
97     // an additional synchronization that has the desired effect anyway.
98     //
99     // There is actually a similar argument to be made about the
100     // constructor, in which the fenceless constructor initialization
101     // of state_ is piggybacked on whatever synchronization mechanism
102     // distributes knowledge of the Baton's existence
103     state_.store(INIT, std::memory_order_relaxed);
104   }
105 
106   /// Causes wait() to wake up.  For each lifetime of a Baton (where a
107   /// lifetime starts at construction or reset() and ends at
108   /// destruction or reset()) there can be at most one call to post(),
109   /// in the single poster version.  Any thread may call post().
post()110   void post() noexcept {
111     if (!MayBlock) {
112       /// Spin-only version
113       ///
114       assert(
115           ((1 << state_.load(std::memory_order_relaxed)) &
116            ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0);
117       state_.store(EARLY_DELIVERY, std::memory_order_release);
118       return;
119     }
120 
121     /// May-block versions
122     ///
123     uint32_t before = state_.load(std::memory_order_acquire);
124 
125     assert(before == INIT || before == WAITING || before == TIMED_OUT);
126 
127     if (before == INIT &&
128         state_.compare_exchange_strong(
129             before,
130             EARLY_DELIVERY,
131             std::memory_order_release,
132             std::memory_order_relaxed)) {
133       return;
134     }
135 
136     assert(before == WAITING || before == TIMED_OUT);
137 
138     if (before == TIMED_OUT) {
139       return;
140     }
141 
142     assert(before == WAITING);
143     state_.store(LATE_DELIVERY, std::memory_order_release);
144     detail::futexWake(&state_, 1);
145   }
146 
147   /// Waits until post() has been called in the current Baton lifetime.
148   /// May be called at most once during a Baton lifetime (construction
149   /// |reset until destruction|reset).  If post is called before wait in
150   /// the current lifetime then this method returns immediately.
151   ///
152   /// The restriction that there can be at most one wait() per lifetime
153   /// could be relaxed somewhat without any perf or size regressions,
154   /// but by making this condition very restrictive we can provide better
155   /// checking in debug builds.
156   void wait(const WaitOptions& opt = wait_options()) noexcept {
157     if (try_wait()) {
158       return;
159     }
160 
161     auto const deadline = std::chrono::steady_clock::time_point::max();
162     tryWaitSlow(deadline, opt);
163   }
164 
165   /// Similar to wait, but doesn't block the thread if it hasn't been posted.
166   ///
167   /// try_wait has the following semantics:
168   /// - It is ok to call try_wait any number times on the same baton until
169   ///   try_wait reports that the baton has been posted.
170   /// - It is ok to call timed_wait or wait on the same baton if try_wait
171   ///   reports that baton hasn't been posted.
172   /// - If try_wait indicates that the baton has been posted, it is invalid to
173   ///   call wait, try_wait or timed_wait on the same baton without resetting
174   ///
175   /// @return       true if baton has been posted, false othewise
try_wait()176   bool try_wait() const noexcept {
177     return ready();
178   }
179 
180   /// Similar to wait, but with a timeout. The thread is unblocked if the
181   /// timeout expires.
182   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
183   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
184   /// words, after try_wait_for the caller can't invoke
185   /// wait/try_wait/try_wait_for/try_wait_until
186   /// again on the same baton without resetting it.
187   ///
188   /// @param  timeout       Time until which the thread can block
189   /// @return               true if the baton was posted to before timeout,
190   ///                       false otherwise
191   template <typename Rep, typename Period>
192   bool try_wait_for(
193       const std::chrono::duration<Rep, Period>& timeout,
194       const WaitOptions& opt = wait_options()) noexcept {
195     if (try_wait()) {
196       return true;
197     }
198 
199     auto const deadline = std::chrono::steady_clock::now() + timeout;
200     return tryWaitSlow(deadline, opt);
201   }
202 
203   /// Similar to wait, but with a deadline. The thread is unblocked if the
204   /// deadline expires.
205   /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
206   /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
207   /// words, after try_wait_until the caller can't invoke
208   /// wait/try_wait/try_wait_for/try_wait_until
209   /// again on the same baton without resetting it.
210   ///
211   /// @param  deadline      Time until which the thread can block
212   /// @return               true if the baton was posted to before deadline,
213   ///                       false otherwise
214   template <typename Clock, typename Duration>
215   bool try_wait_until(
216       const std::chrono::time_point<Clock, Duration>& deadline,
217       const WaitOptions& opt = wait_options()) noexcept {
218     if (try_wait()) {
219       return true;
220     }
221 
222     return tryWaitSlow(deadline, opt);
223   }
224 
225   /// Alias to try_wait_for. Deprecated.
226   template <typename Rep, typename Period>
timed_wait(const std::chrono::duration<Rep,Period> & timeout)227   bool timed_wait(
228       const std::chrono::duration<Rep, Period>& timeout) noexcept {
229     return try_wait_for(timeout);
230   }
231 
232   /// Alias to try_wait_until. Deprecated.
233   template <typename Clock, typename Duration>
timed_wait(const std::chrono::time_point<Clock,Duration> & deadline)234   bool timed_wait(
235       const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
236     return try_wait_until(deadline);
237   }
238 
239  private:
240   enum State : uint32_t {
241     INIT = 0,
242     EARLY_DELIVERY = 1,
243     WAITING = 2,
244     LATE_DELIVERY = 3,
245     TIMED_OUT = 4,
246   };
247 
248   template <typename Clock, typename Duration>
tryWaitSlow(const std::chrono::time_point<Clock,Duration> & deadline,const WaitOptions & opt)249   bool tryWaitSlow(
250       const std::chrono::time_point<Clock, Duration>& deadline,
251       const WaitOptions& opt) noexcept {
252     switch (
253         detail::spin_pause_until(deadline, opt, [this] { return ready(); })) {
254       case detail::spin_result::success:
255         return true;
256       case detail::spin_result::timeout:
257         return false;
258       case detail::spin_result::advance:
259         break;
260     }
261 
262     if (!MayBlock) {
263       switch (detail::spin_yield_until(deadline, [this] { return ready(); })) {
264         case detail::spin_result::success:
265           return true;
266         case detail::spin_result::timeout:
267           return false;
268         case detail::spin_result::advance:
269           break;
270       }
271     }
272 
273     // guess we have to block :(
274     uint32_t expected = INIT;
275     if (!state_.compare_exchange_strong(
276             expected,
277             WAITING,
278             std::memory_order_relaxed,
279             std::memory_order_relaxed)) {
280       // CAS failed, last minute reprieve
281       assert(expected == EARLY_DELIVERY);
282       // TODO: move the acquire to the compare_exchange failure load after C++17
283       std::atomic_thread_fence(std::memory_order_acquire);
284       return true;
285     }
286 
287     while (true) {
288       auto rv = detail::futexWaitUntil(&state_, WAITING, deadline);
289 
290       // Awoken by the deadline passing.
291       if (rv == detail::FutexResult::TIMEDOUT) {
292         assert(deadline != (std::chrono::time_point<Clock, Duration>::max()));
293         state_.store(TIMED_OUT, std::memory_order_release);
294         return false;
295       }
296 
297       // Probably awoken by a matching wake event, but could also by awoken
298       // by an asynchronous signal or by a spurious wakeup.
299       //
300       // state_ is the truth even if FUTEX_WAIT reported a matching
301       // FUTEX_WAKE, since we aren't using type-stable storage and we
302       // don't guarantee reuse.  The scenario goes like this: thread
303       // A's last touch of a Baton is a call to wake(), which stores
304       // LATE_DELIVERY and gets an unlucky context switch before delivering
305       // the corresponding futexWake.  Thread B sees LATE_DELIVERY
306       // without consuming a futex event, because it calls futexWait
307       // with an expected value of WAITING and hence doesn't go to sleep.
308       // B returns, so the Baton's memory is reused and becomes another
309       // Baton (or a reuse of this one).  B calls futexWait on the new
310       // Baton lifetime, then A wakes up and delivers a spurious futexWake
311       // to the same memory location.  B's futexWait will then report a
312       // consumed wake event even though state_ is still WAITING.
313       //
314       // It would be possible to add an extra state_ dance to communicate
315       // that the futexWake has been sent so that we can be sure to consume
316       // it before returning, but that would be a perf and complexity hit.
317       uint32_t s = state_.load(std::memory_order_acquire);
318       assert(s == WAITING || s == LATE_DELIVERY);
319       if (s == LATE_DELIVERY) {
320         return true;
321       }
322     }
323   }
324 
325   detail::Futex<Atom> state_;
326 };
327 
328 } // namespace folly
329