2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/detail/Futex.h>
18 #include <boost/intrusive/list.hpp>
19 #include <folly/Indestructible.h>
20 #include <folly/ScopeGuard.h>
21 #include <folly/hash/Hash.h>
22 #include <folly/portability/SysSyscall.h>
27 #include <condition_variable>
31 #include <linux/futex.h>
34 using namespace std::chrono;
41 ////////////////////////////////////////////////////
42 // native implementation using the futex() syscall
46 /// Certain toolchains (like Android's) don't include the full futex API in
47 /// their headers even though they support it. Make sure we have our constants
48 /// even if the headers don't have them.
49 #ifndef FUTEX_WAIT_BITSET
50 # define FUTEX_WAIT_BITSET 9
52 #ifndef FUTEX_WAKE_BITSET
53 # define FUTEX_WAKE_BITSET 10
55 #ifndef FUTEX_PRIVATE_FLAG
56 # define FUTEX_PRIVATE_FLAG 128
58 #ifndef FUTEX_CLOCK_REALTIME
59 # define FUTEX_CLOCK_REALTIME 256
62 int nativeFutexWake(void* addr, int count, uint32_t wakeMask) {
63 int rv = syscall(__NR_futex,
65 FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
67 nullptr, /* timeout */
71 /* NOTE: we ignore errors on wake for the case of a futex
72 guarding its own destruction, similar to this
73 glibc bug with sem_post/sem_wait:
74 https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */
81 template <class Clock>
83 timeSpecFromTimePoint(time_point<Clock> absTime)
85 auto epoch = absTime.time_since_epoch();
86 if (epoch.count() < 0) {
87 // kernel timespec_valid requires non-negative seconds and nanos in [0,1G)
88 epoch = Clock::duration::zero();
91 // timespec-safe seconds and nanoseconds;
92 // chrono::{nano,}seconds are `long long int`
93 // whereas timespec uses smaller types
94 using time_t_seconds = duration<std::time_t, seconds::period>;
95 using long_nanos = duration<long int, nanoseconds::period>;
97 auto secs = duration_cast<time_t_seconds>(epoch);
98 auto nanos = duration_cast<long_nanos>(epoch - secs);
99 struct timespec result = { secs.count(), nanos.count() };
103 FutexResult nativeFutexWaitImpl(void* addr,
105 time_point<system_clock>* absSystemTime,
106 time_point<steady_clock>* absSteadyTime,
108 assert(absSystemTime == nullptr || absSteadyTime == nullptr);
110 int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG;
112 struct timespec* timeout = nullptr;
114 if (absSystemTime != nullptr) {
115 op |= FUTEX_CLOCK_REALTIME;
116 ts = timeSpecFromTimePoint(*absSystemTime);
118 } else if (absSteadyTime != nullptr) {
119 ts = timeSpecFromTimePoint(*absSteadyTime);
123 // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
124 // value - http://locklessinc.com/articles/futex_cheat_sheet/
125 int rv = syscall(__NR_futex,
129 timeout, /* timeout */
131 waitMask); /* val3 */
134 return FutexResult::AWOKEN;
138 assert(timeout != nullptr);
139 return FutexResult::TIMEDOUT;
141 return FutexResult::INTERRUPTED;
143 return FutexResult::VALUE_CHANGED;
146 // EINVAL, EACCESS, or EFAULT. EINVAL means there was an invalid
147 // op (should be impossible) or an invalid timeout (should have
148 // been sanitized by timeSpecFromTimePoint). EACCESS or EFAULT
149 // means *addr points to invalid memory, which is unlikely because
150 // the caller should have segfaulted already. We can either
151 // crash, or return a value that lets the process continue for
152 // a bit. We choose the latter. VALUE_CHANGED probably turns the
153 // caller into a spin lock.
154 return FutexResult::VALUE_CHANGED;
161 ///////////////////////////////////////////////////////
162 // compatibility implementation using standard C++ API
164 // Our emulated futex uses 4096 lists of wait nodes. There are two levels
165 // of locking: a per-list mutex that controls access to the list and a
166 // per-node mutex, condvar, and bool that are used for the actual wakeups.
167 // The per-node mutex allows us to do precise wakeups without thundering
170 struct EmulatedFutexWaitNode : public boost::intrusive::list_base_hook<> {
172 const uint32_t waitMask_;
174 // tricky: hold both bucket and node mutex to write, either to read
177 std::condition_variable cond_;
179 EmulatedFutexWaitNode(void* addr, uint32_t waitMask)
181 , waitMask_(waitMask)
187 struct EmulatedFutexBucket {
189 boost::intrusive::list<EmulatedFutexWaitNode> waiters_;
191 static constexpr size_t const kNumBuckets = kIsMobile ? 256 : 4096;
193 static EmulatedFutexBucket& bucketFor(void* addr) {
194 // Statically allocating this lets us use this in allocation-sensitive
195 // contexts. This relies on the assumption that std::mutex won't dynamically
196 // allocate memory, which we assume to be the case on Linux and iOS.
197 static Indestructible<std::array<EmulatedFutexBucket, kNumBuckets>>
200 folly::hash::twang_mix64(reinterpret_cast<uintptr_t>(addr));
201 return (*gBuckets)[mixedBits % kNumBuckets];
205 int emulatedFutexWake(void* addr, int count, uint32_t waitMask) {
206 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
207 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
210 for (auto iter = bucket.waiters_.begin();
211 numAwoken < count && iter != bucket.waiters_.end(); ) {
213 auto& node = *iter++;
214 if (node.addr_ == addr && (node.waitMask_ & waitMask) != 0) {
217 // we unlink, but waiter destroys the node
218 bucket.waiters_.erase(current);
220 std::unique_lock<std::mutex> nodeLock(node.mutex_);
221 node.signaled_ = true;
222 node.cond_.notify_one();
228 template <typename F>
229 FutexResult emulatedFutexWaitImpl(
232 time_point<system_clock>* absSystemTime,
233 time_point<steady_clock>* absSteadyTime,
236 std::is_same<F, Futex<std::atomic>>::value ||
237 std::is_same<F, Futex<EmulatedFutexAtomic>>::value,
238 "Type F must be either Futex<std::atomic> or Futex<EmulatedFutexAtomic>");
239 void* addr = static_cast<void*>(futex);
240 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
241 EmulatedFutexWaitNode node(addr, waitMask);
244 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
246 if (futex->load(std::memory_order_relaxed) != expected) {
247 return FutexResult::VALUE_CHANGED;
250 bucket.waiters_.push_back(node);
251 } // bucketLock scope
253 std::cv_status status = std::cv_status::no_timeout;
255 std::unique_lock<std::mutex> nodeLock(node.mutex_);
256 while (!node.signaled_ && status != std::cv_status::timeout) {
257 if (absSystemTime != nullptr) {
258 status = node.cond_.wait_until(nodeLock, *absSystemTime);
259 } else if (absSteadyTime != nullptr) {
260 status = node.cond_.wait_until(nodeLock, *absSteadyTime);
262 node.cond_.wait(nodeLock);
267 if (status == std::cv_status::timeout) {
268 // it's not really a timeout until we unlink the unsignaled node
269 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
270 if (!node.signaled_) {
271 bucket.waiters_.erase(bucket.waiters_.iterator_to(node));
272 return FutexResult::TIMEDOUT;
275 return FutexResult::AWOKEN;
280 /////////////////////////////////
281 // Futex<> specializations
285 Futex<std::atomic>::futexWake(int count, uint32_t wakeMask) {
287 return nativeFutexWake(this, count, wakeMask);
289 return emulatedFutexWake(this, count, wakeMask);
295 Futex<EmulatedFutexAtomic>::futexWake(int count, uint32_t wakeMask) {
296 return emulatedFutexWake(this, count, wakeMask);
301 Futex<std::atomic>::futexWaitImpl(uint32_t expected,
302 time_point<system_clock>* absSystemTime,
303 time_point<steady_clock>* absSteadyTime,
306 return nativeFutexWaitImpl(
307 this, expected, absSystemTime, absSteadyTime, waitMask);
309 return emulatedFutexWaitImpl(
310 this, expected, absSystemTime, absSteadyTime, waitMask);
316 Futex<EmulatedFutexAtomic>::futexWaitImpl(
318 time_point<system_clock>* absSystemTime,
319 time_point<steady_clock>* absSteadyTime,
321 return emulatedFutexWaitImpl(
322 this, expected, absSystemTime, absSteadyTime, waitMask);
325 } // namespace detail