2 * Copyright 2014-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
25 #include <folly/Likely.h>
26 #include <folly/detail/Futex.h>
27 #include <folly/detail/MemoryIdler.h>
28 #include <folly/portability/Asm.h>
29 #include <folly/synchronization/WaitOptions.h>
33 /// A Baton allows a thread to block once and be awoken. Captures a
34 /// single handoff, and during its lifecycle (from construction/reset
35 /// to destruction/reset) a baton must either be post()ed and wait()ed
36 /// exactly once each, or not at all.
38 /// Baton includes no internal padding, and is only 4 bytes in size.
39 /// Any alignment or padding to avoid false sharing is up to the user.
41 /// This is basically a stripped-down semaphore that supports only a
42 /// single call to sem_post and a single call to sem_wait.
44 /// The non-blocking version (MayBlock == false) provides more speed
45 /// by using only load acquire and store release operations in the
46 /// critical path, at the cost of disallowing blocking.
48 /// The current posix semaphore sem_t isn't too bad, but this provides
49 /// more a bit more speed, inlining, smaller size, a guarantee that
50 /// the implementation won't change, and compatibility with
51 /// DeterministicSchedule. By having a much more restrictive
52 /// lifecycle we can also add a bunch of assertions that can help to
53 /// catch race conditions ahead of time.
54 template <bool MayBlock = true, template <typename> class Atom = std::atomic>
57 FOLLY_ALWAYS_INLINE static WaitOptions wait_options() {
61 constexpr Baton() noexcept : state_(INIT) {}
63 Baton(Baton const&) = delete;
64 Baton& operator=(Baton const&) = delete;
66 /// It is an error to destroy a Baton on which a thread is currently
67 /// wait()ing. In practice this means that the waiter usually takes
68 /// responsibility for destroying the Baton.
70 // The docblock for this function says that it can't be called when
71 // there is a concurrent waiter. We assume a strong version of this
72 // requirement in which the caller must _know_ that this is true, they
73 // are not allowed to be merely lucky. If two threads are involved,
74 // the destroying thread must actually have synchronized with the
75 // waiting thread after wait() returned. To convey causality the the
76 // waiting thread must have used release semantics and the destroying
77 // thread must have used acquire semantics for that communication,
78 // so we are guaranteed to see the post-wait() value of state_,
79 // which cannot be WAITING.
81 // Note that since we only care about a single memory location,
82 // the only two plausible memory orders here are relaxed and seq_cst.
83 assert(state_.load(std::memory_order_relaxed) != WAITING);
86 FOLLY_ALWAYS_INLINE bool ready() const noexcept {
87 auto s = state_.load(std::memory_order_acquire);
88 assert(s == INIT || s == EARLY_DELIVERY);
89 return LIKELY(s == EARLY_DELIVERY);
92 /// Equivalent to destroying the Baton and creating a new one. It is
93 /// a bug to call this while there is a waiting thread, so in practice
94 /// the waiter will be the one that resets the baton.
95 void reset() noexcept {
96 // See ~Baton for a discussion about why relaxed is okay here
97 assert(state_.load(std::memory_order_relaxed) != WAITING);
99 // We use a similar argument to justify the use of a relaxed store
100 // here. Since both wait() and post() are required to be called
101 // only once per lifetime, no thread can actually call those methods
102 // correctly after a reset() unless it synchronizes with the thread
103 // that performed the reset(). If a post() or wait() on another thread
104 // didn't synchronize, then regardless of what operation we performed
105 // here there would be a race on proper use of the Baton's spec
106 // (although not on any particular load and store). Put another way,
107 // we don't need to synchronize here because anybody that might rely
108 // on such synchronization is required by the baton rules to perform
109 // an additional synchronization that has the desired effect anyway.
111 // There is actually a similar argument to be made about the
112 // constructor, in which the fenceless constructor initialization
113 // of state_ is piggybacked on whatever synchronization mechanism
114 // distributes knowledge of the Baton's existence
115 state_.store(INIT, std::memory_order_relaxed);
118 /// Causes wait() to wake up. For each lifetime of a Baton (where a
119 /// lifetime starts at construction or reset() and ends at
120 /// destruction or reset()) there can be at most one call to post(),
121 /// in the single poster version. Any thread may call post().
122 void post() noexcept {
124 /// Spin-only version
127 ((1 << state_.load(std::memory_order_relaxed)) &
128 ((1 << INIT) | (1 << EARLY_DELIVERY))) != 0);
129 state_.store(EARLY_DELIVERY, std::memory_order_release);
133 /// May-block versions
135 uint32_t before = state_.load(std::memory_order_acquire);
137 assert(before == INIT || before == WAITING || before == TIMED_OUT);
139 if (before == INIT &&
140 state_.compare_exchange_strong(
143 std::memory_order_release,
144 std::memory_order_relaxed)) {
148 assert(before == WAITING || before == TIMED_OUT);
150 if (before == TIMED_OUT) {
154 assert(before == WAITING);
155 state_.store(LATE_DELIVERY, std::memory_order_release);
159 /// Waits until post() has been called in the current Baton lifetime.
160 /// May be called at most once during a Baton lifetime (construction
161 /// |reset until destruction|reset). If post is called before wait in
162 /// the current lifetime then this method returns immediately.
164 /// The restriction that there can be at most one wait() per lifetime
165 /// could be relaxed somewhat without any perf or size regressions,
166 /// but by making this condition very restrictive we can provide better
167 /// checking in debug builds.
169 void wait(const WaitOptions& opt = wait_options()) noexcept {
177 /// Similar to wait, but doesn't block the thread if it hasn't been posted.
179 /// try_wait has the following semantics:
180 /// - It is ok to call try_wait any number times on the same baton until
181 /// try_wait reports that the baton has been posted.
182 /// - It is ok to call timed_wait or wait on the same baton if try_wait
183 /// reports that baton hasn't been posted.
184 /// - If try_wait indicates that the baton has been posted, it is invalid to
185 /// call wait, try_wait or timed_wait on the same baton without resetting
187 /// @return true if baton has been posted, false othewise
188 FOLLY_ALWAYS_INLINE bool try_wait() const noexcept {
192 /// Similar to wait, but with a timeout. The thread is unblocked if the
194 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
195 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
196 /// words, after try_wait_for the caller can't invoke
197 /// wait/try_wait/try_wait_for/try_wait_until
198 /// again on the same baton without resetting it.
200 /// @param timeout Time until which the thread can block
201 /// @return true if the baton was posted to before timeout,
203 template <typename Rep, typename Period>
204 FOLLY_ALWAYS_INLINE bool try_wait_for(
205 const std::chrono::duration<Rep, Period>& timeout,
206 const WaitOptions& opt = wait_options()) noexcept {
211 auto deadline = std::chrono::steady_clock::now() + timeout;
212 return tryWaitUntilSlow(deadline, opt);
215 /// Similar to wait, but with a deadline. The thread is unblocked if the
216 /// deadline expires.
217 /// Note: Only a single call to wait/try_wait_for/try_wait_until is allowed
218 /// during a baton's life-cycle (from ctor/reset to dtor/reset). In other
219 /// words, after try_wait_until the caller can't invoke
220 /// wait/try_wait/try_wait_for/try_wait_until
221 /// again on the same baton without resetting it.
223 /// @param deadline Time until which the thread can block
224 /// @return true if the baton was posted to before deadline,
226 template <typename Clock, typename Duration>
227 FOLLY_ALWAYS_INLINE bool try_wait_until(
228 const std::chrono::time_point<Clock, Duration>& deadline,
229 const WaitOptions& opt = wait_options()) noexcept {
234 return tryWaitUntilSlow(deadline, opt);
237 /// Alias to try_wait_for. Deprecated.
238 template <typename Rep, typename Period>
239 FOLLY_ALWAYS_INLINE bool timed_wait(
240 const std::chrono::duration<Rep, Period>& timeout) noexcept {
241 return try_wait_for(timeout);
244 /// Alias to try_wait_until. Deprecated.
245 template <typename Clock, typename Duration>
246 FOLLY_ALWAYS_INLINE bool timed_wait(
247 const std::chrono::time_point<Clock, Duration>& deadline) noexcept {
248 return try_wait_until(deadline);
252 enum State : uint32_t {
260 // Spin for "some time"
262 // @return true if we received an early delivery during the wait,
263 // false otherwise. If the function returns true then
264 // state_ is guaranteed to be EARLY_DELIVERY
265 template <typename Clock, typename Duration>
266 bool spinWaitForEarlyDelivery(
267 const std::chrono::time_point<Clock, Duration>& deadline,
268 const WaitOptions& opt) noexcept {
269 auto tbegin = Clock::now();
275 auto const tnow = Clock::now();
276 if (tnow >= deadline) {
280 // Backward time discontinuity in Clock? revise pre_block starting point
281 tbegin = std::min(tbegin, tnow);
282 auto const dur = std::chrono::duration_cast<Duration>(tnow - tbegin);
283 if (dur >= opt.spin_max()) {
287 // The pause instruction is the polite way to spin, but it doesn't
288 // actually affect correctness to omit it if we don't have it.
289 // Pausing donates the full capabilities of the current core to
290 // its other hyperthreads for a dozen cycles or so
291 asm_volatile_pause();
295 FOLLY_NOINLINE void waitSlow(const WaitOptions& opt) noexcept {
296 auto const deadline = std::chrono::steady_clock::time_point::max();
297 if (spinWaitForEarlyDelivery(deadline, opt)) {
298 assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
303 while (!try_wait()) {
304 std::this_thread::yield();
309 // guess we have to block :(
310 uint32_t expected = INIT;
311 if (!state_.compare_exchange_strong(expected, WAITING)) {
312 // CAS failed, last minute reprieve
313 assert(expected == EARLY_DELIVERY);
318 detail::MemoryIdler::futexWait(state_, WAITING);
320 // state_ is the truth even if FUTEX_WAIT reported a matching
321 // FUTEX_WAKE, since we aren't using type-stable storage and we
322 // don't guarantee reuse. The scenario goes like this: thread
323 // A's last touch of a Baton is a call to wake(), which stores
324 // LATE_DELIVERY and gets an unlucky context switch before delivering
325 // the corresponding futexWake. Thread B sees LATE_DELIVERY
326 // without consuming a futex event, because it calls futexWait
327 // with an expected value of WAITING and hence doesn't go to sleep.
328 // B returns, so the Baton's memory is reused and becomes another
329 // Baton (or a reuse of this one). B calls futexWait on the new
330 // Baton lifetime, then A wakes up and delivers a spurious futexWake
331 // to the same memory location. B's futexWait will then report a
332 // consumed wake event even though state_ is still WAITING.
334 // It would be possible to add an extra state_ dance to communicate
335 // that the futexWake has been sent so that we can be sure to consume
336 // it before returning, but that would be a perf and complexity hit.
337 uint32_t s = state_.load(std::memory_order_acquire);
338 assert(s == WAITING || s == LATE_DELIVERY);
340 if (s == LATE_DELIVERY) {
347 template <typename Clock, typename Duration>
348 FOLLY_NOINLINE bool tryWaitUntilSlow(
349 const std::chrono::time_point<Clock, Duration>& deadline,
350 const WaitOptions& opt) noexcept {
351 if (spinWaitForEarlyDelivery(deadline, opt)) {
352 assert(state_.load(std::memory_order_acquire) == EARLY_DELIVERY);
361 if (Clock::now() >= deadline) {
364 std::this_thread::yield();
368 // guess we have to block :(
369 uint32_t expected = INIT;
370 if (!state_.compare_exchange_strong(expected, WAITING)) {
371 // CAS failed, last minute reprieve
372 assert(expected == EARLY_DELIVERY);
377 auto rv = state_.futexWaitUntil(WAITING, deadline);
378 if (rv == folly::detail::FutexResult::TIMEDOUT) {
379 state_.store(TIMED_OUT, std::memory_order_release);
383 uint32_t s = state_.load(std::memory_order_acquire);
384 assert(s == WAITING || s == LATE_DELIVERY);
385 if (s == LATE_DELIVERY) {
391 detail::Futex<Atom> state_;