1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 8 #include <assert.h> 9 #include <errno.h> 10 #include <stdint.h> 11 #include <atomic> 12 #include <thread> 13 14 #include <folly/detail/Futex.h> 15 #include <folly/portability/Asm.h> 16 #include <folly/synchronization/WaitOptions.h> 17 #include <folly/synchronization/detail/Spin.h> 18 19 namespace folly { 20 21 /// A Baton allows a thread to block once and be awoken. Captures a 22 /// single handoff, and during its lifecycle (from construction/reset 23 /// to destruction/reset) a baton must either be post()ed and wait()ed 24 /// exactly once each, or not at all. 25 /// 26 /// Baton includes no internal padding, and is only 4 bytes in size. 27 /// Any alignment or padding to avoid false sharing is up to the user. 28 /// 29 /// This is basically a stripped-down semaphore that supports only a 30 /// single call to sem_post and a single call to sem_wait. 31 /// 32 /// The non-blocking version (MayBlock == false) provides more speed 33 /// by using only load acquire and store release operations in the 34 /// critical path, at the cost of disallowing blocking. 35 /// 36 /// The current posix semaphore sem_t isn't too bad, but this provides 37 /// more a bit more speed, inlining, smaller size, a guarantee that 38 /// the implementation won't change, and compatibility with 39 /// DeterministicSchedule. By having a much more restrictive 40 /// lifecycle we can also add a bunch of assertions that can help to 41 /// catch race conditions ahead of time. 42 template <bool MayBlock = true, template <typename> class Atom = std::atomic> 43 class Baton { 44 public: wait_options()45 static constexpr WaitOptions wait_options() { 46 return {}; 47 } 48 Baton()49 constexpr Baton() noexcept : state_(INIT) {} 50 51 Baton(Baton const&) = delete; 52 Baton& operator=(Baton const&) = delete; 53 54 /// It is an error to destroy a Baton on which a thread is currently 55 /// wait()ing. In practice this means that the waiter usually takes 56 /// responsibility for destroying the Baton. ~Baton()57 ~Baton() noexcept { 58 // The docblock for this function says that it can't be called when 59 // there is a concurrent waiter. We assume a strong version of this 60 // requirement in which the caller must _know_ that this is true, they 61 // are not allowed to be merely lucky. If two threads are involved, 62 // the destroying thread must actually have synchronized with the 63 // waiting thread after wait() returned. To convey causality the the 64 // waiting thread must have used release semantics and the destroying 65 // thread must have used acquire semantics for that communication, 66 // so we are guaranteed to see the post-wait() value of state_, 67 // which cannot be WAITING. 68 // 69 // Note that since we only care about a single memory location, 70 // the only two plausible memory orders here are relaxed and seq_cst. 71 assert(state_.load(std::memory_order_relaxed) != WAITING); 72 } 73 ready()74 bool ready() const noexcept { 75 auto s = state_.load(std::memory_order_acquire); 76 assert(s == INIT || s == EARLY_DELIVERY); 77 return (s == EARLY_DELIVERY); 78 } 79 80 /// Equivalent to destroying the Baton and creating a new one. It is 81 /// a bug to call this while there is a waiting thread, so in practice 82 /// the waiter will be the one that resets the baton. reset()83 void reset() noexcept { 84 // See ~Baton for a discussion about why relaxed is okay here 85 assert(state_.load(std::memory_order_relaxed) != WAITING); 86 87 // We use a similar argument to justify the use of a relaxed store 88 // here. Since both wait() and post() are required to be called 89 // only once per lifetime, no thread can actually call those methods 90 // correctly after a reset() unless it synchronizes with the thread 91 // that performed the reset(). If a post() or wait() on another thread 92 // didn't synchronize, then regardless of what operation we performed 93 // here there would be a race on proper use of the Baton's spec 94 // (although not on any particular load and store). Put another way, 95 // we don't need to synchronize here because anybody that might rely 96 // on such synchronization is required by the baton rules to perform 97 // an additional synchronization that has the desired effect anyway. 98 // 99 // There is actually a similar argument to be made about the 100 // constructor, in which the fenceless constructor initialization 101 // of state_ is piggybacked on whatever synchronization mechanism 102 // distributes knowledge of the Baton's existence 103 state_.store(INIT, std::memory_order_relaxed); 104 } 105 106 /// Causes wait() to wake up. For each lifetime of a Baton (where a 107 /// lifetime starts at construction or reset() and ends at 108 /// destruction or reset()) there can be at most one call to post(), 109 /// in the single poster version. Any thread may call post(). post()110 void post() noexcept { 111 if (!MayBlock) { 112 /// Spin-only version 113 /// 114 assert( 115 ((1 << state_.load(std::memory_order_relaxed)) & 116 ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0); 117 state_.store(EARLY_DELIVERY, std::memory_order_release); 118 return; 119 } 120 121 /// May-block versions 122 /// 123 uint32_t before = state_.load(std::memory_order_acquire); 124 125 assert(before == INIT || before == WAITING || before == TIMED_OUT); 126 127 if (before == INIT && 128 state_.compare_exchange_strong( 129 before, 130 EARLY_DELIVERY, 131 std::memory_order_release, 132 std::memory_order_relaxed)) { 133 return; 134 } 135 136 assert(before == WAITING || before == TIMED_OUT); 137 138 if (before == TIMED_OUT) { 139 return; 140 } 141 142 assert(before == WAITING); 143 state_.store(LATE_DELIVERY, std::memory_order_release); 144 detail::futexWake(&state_, 1); 145 } 146 147 /// Waits until post() has been called in the current Baton lifetime. 148 /// May be called at most once during a Baton lifetime (construction 149 /// |reset until destruction|reset). If post is called before wait in 150 /// the current lifetime then this method returns immediately. 151 /// 152 /// The restriction that there can be at most one wait() per lifetime 153 /// could be relaxed somewhat without any perf or size regressions, 154 /// but by making this condition very restrictive we can provide better 155 /// checking in debug builds. 156 void wait(const WaitOptions& opt = wait_options()) noexcept { 157 if (try_wait()) { 158 return; 159 } 160 161 auto const deadline = std::chrono::steady_clock::time_point::max(); 162 tryWaitSlow(deadline, opt); 163 } 164 165 /// Similar to wait, but doesn't block the thread if it hasn't been posted. 166 /// 167 /// try_wait has the following semantics: 168 /// - It is ok to call try_wait any number times on the same baton until 169 /// try_wait reports that the baton has been posted. 170 /// - It is ok to call timed_wait or wait on the same baton if try_wait 171 /// reports that baton hasn't been posted. 172 /// - If try_wait indicates that the baton has been posted, it is invalid to 173 /// call wait, try_wait or timed_wait on the same baton without resetting 174 /// 175 /// @return true if baton has been posted, false othewise try_wait()176 bool try_wait() const noexcept { 177 return ready(); 178 } 179 180 /// Similar to wait, but with a timeout. The thread is unblocked if the 181 /// timeout expires. 182 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed 183 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other 184 /// words, after try_wait_for the caller can't invoke 185 /// wait/try_wait/try_wait_for/try_wait_until 186 /// again on the same baton without resetting it. 187 /// 188 /// @param timeout Time until which the thread can block 189 /// @return true if the baton was posted to before timeout, 190 /// false otherwise 191 template <typename Rep, typename Period> 192 bool try_wait_for( 193 const std::chrono::duration<Rep, Period>& timeout, 194 const WaitOptions& opt = wait_options()) noexcept { 195 if (try_wait()) { 196 return true; 197 } 198 199 auto const deadline = std::chrono::steady_clock::now() + timeout; 200 return tryWaitSlow(deadline, opt); 201 } 202 203 /// Similar to wait, but with a deadline. The thread is unblocked if the 204 /// deadline expires. 205 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed 206 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other 207 /// words, after try_wait_until the caller can't invoke 208 /// wait/try_wait/try_wait_for/try_wait_until 209 /// again on the same baton without resetting it. 210 /// 211 /// @param deadline Time until which the thread can block 212 /// @return true if the baton was posted to before deadline, 213 /// false otherwise 214 template <typename Clock, typename Duration> 215 bool try_wait_until( 216 const std::chrono::time_point<Clock, Duration>& deadline, 217 const WaitOptions& opt = wait_options()) noexcept { 218 if (try_wait()) { 219 return true; 220 } 221 222 return tryWaitSlow(deadline, opt); 223 } 224 225 /// Alias to try_wait_for. Deprecated. 226 template <typename Rep, typename Period> timed_wait(const std::chrono::duration<Rep,Period> & timeout)227 bool timed_wait( 228 const std::chrono::duration<Rep, Period>& timeout) noexcept { 229 return try_wait_for(timeout); 230 } 231 232 /// Alias to try_wait_until. Deprecated. 233 template <typename Clock, typename Duration> timed_wait(const std::chrono::time_point<Clock,Duration> & deadline)234 bool timed_wait( 235 const std::chrono::time_point<Clock, Duration>& deadline) noexcept { 236 return try_wait_until(deadline); 237 } 238 239 private: 240 enum State : uint32_t { 241 INIT = 0, 242 EARLY_DELIVERY = 1, 243 WAITING = 2, 244 LATE_DELIVERY = 3, 245 TIMED_OUT = 4, 246 }; 247 248 template <typename Clock, typename Duration> tryWaitSlow(const std::chrono::time_point<Clock,Duration> & deadline,const WaitOptions & opt)249 bool tryWaitSlow( 250 const std::chrono::time_point<Clock, Duration>& deadline, 251 const WaitOptions& opt) noexcept { 252 switch ( 253 detail::spin_pause_until(deadline, opt, [this] { return ready(); })) { 254 case detail::spin_result::success: 255 return true; 256 case detail::spin_result::timeout: 257 return false; 258 case detail::spin_result::advance: 259 break; 260 } 261 262 if (!MayBlock) { 263 switch (detail::spin_yield_until(deadline, [this] { return ready(); })) { 264 case detail::spin_result::success: 265 return true; 266 case detail::spin_result::timeout: 267 return false; 268 case detail::spin_result::advance: 269 break; 270 } 271 } 272 273 // guess we have to block :( 274 uint32_t expected = INIT; 275 if (!state_.compare_exchange_strong( 276 expected, 277 WAITING, 278 std::memory_order_relaxed, 279 std::memory_order_relaxed)) { 280 // CAS failed, last minute reprieve 281 assert(expected == EARLY_DELIVERY); 282 // TODO: move the acquire to the compare_exchange failure load after C++17 283 std::atomic_thread_fence(std::memory_order_acquire); 284 return true; 285 } 286 287 while (true) { 288 auto rv = detail::futexWaitUntil(&state_, WAITING, deadline); 289 290 // Awoken by the deadline passing. 291 if (rv == detail::FutexResult::TIMEDOUT) { 292 assert(deadline != (std::chrono::time_point<Clock, Duration>::max())); 293 state_.store(TIMED_OUT, std::memory_order_release); 294 return false; 295 } 296 297 // Probably awoken by a matching wake event, but could also by awoken 298 // by an asynchronous signal or by a spurious wakeup. 299 // 300 // state_ is the truth even if FUTEX_WAIT reported a matching 301 // FUTEX_WAKE, since we aren't using type-stable storage and we 302 // don't guarantee reuse. The scenario goes like this: thread 303 // A's last touch of a Baton is a call to wake(), which stores 304 // LATE_DELIVERY and gets an unlucky context switch before delivering 305 // the corresponding futexWake. Thread B sees LATE_DELIVERY 306 // without consuming a futex event, because it calls futexWait 307 // with an expected value of WAITING and hence doesn't go to sleep. 308 // B returns, so the Baton's memory is reused and becomes another 309 // Baton (or a reuse of this one). B calls futexWait on the new 310 // Baton lifetime, then A wakes up and delivers a spurious futexWake 311 // to the same memory location. B's futexWait will then report a 312 // consumed wake event even though state_ is still WAITING. 313 // 314 // It would be possible to add an extra state_ dance to communicate 315 // that the futexWake has been sent so that we can be sure to consume 316 // it before returning, but that would be a perf and complexity hit. 317 uint32_t s = state_.load(std::memory_order_acquire); 318 assert(s == WAITING || s == LATE_DELIVERY); 319 if (s == LATE_DELIVERY) { 320 return true; 321 } 322 } 323 } 324 325 detail::Futex<Atom> state_; 326 }; 327 328 } // namespace folly 329