2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/detail/Futex.h>
20 #include <condition_variable>
22 #include <boost/intrusive/list.hpp>
23 #include <folly/CallOnce.h>
24 #include <folly/Hash.h>
25 #include <folly/ScopeGuard.h>
29 # include <linux/futex.h>
30 # include <sys/syscall.h>
33 using namespace std::chrono;
35 namespace folly { namespace detail {
39 ////////////////////////////////////////////////////
40 // native implementation using the futex() syscall
44 /// Certain toolchains (like Android's) don't include the full futex API in
45 /// their headers even though they support it. Make sure we have our constants
46 /// even if the headers don't have them.
47 #ifndef FUTEX_WAIT_BITSET
48 # define FUTEX_WAIT_BITSET 9
50 #ifndef FUTEX_WAKE_BITSET
51 # define FUTEX_WAKE_BITSET 10
53 #ifndef FUTEX_PRIVATE_FLAG
54 # define FUTEX_PRIVATE_FLAG 128
56 #ifndef FUTEX_CLOCK_REALTIME
57 # define FUTEX_CLOCK_REALTIME 256
60 int nativeFutexWake(void* addr, int count, uint32_t wakeMask) {
61 int rv = syscall(__NR_futex,
63 FUTEX_WAKE_BITSET | FUTEX_PRIVATE_FLAG, /* op */
65 nullptr, /* timeout */
69 /* NOTE: we ignore errors on wake for the case of a futex
70 guarding its own destruction, similar to this
71 glibc bug with sem_post/sem_wait:
72 https://sourceware.org/bugzilla/show_bug.cgi?id=12674 */
79 template <class Clock>
81 timeSpecFromTimePoint(time_point<Clock> absTime)
83 auto epoch = absTime.time_since_epoch();
84 if (epoch.count() < 0) {
85 // kernel timespec_valid requires non-negative seconds and nanos in [0,1G)
86 epoch = Clock::duration::zero();
89 // timespec-safe seconds and nanoseconds;
90 // chrono::{nano,}seconds are `long long int`
91 // whereas timespec uses smaller types
92 using time_t_seconds = duration<std::time_t, seconds::period>;
93 using long_nanos = duration<long int, nanoseconds::period>;
95 auto secs = duration_cast<time_t_seconds>(epoch);
96 auto nanos = duration_cast<long_nanos>(epoch - secs);
97 struct timespec result = { secs.count(), nanos.count() };
101 FutexResult nativeFutexWaitImpl(void* addr,
103 time_point<system_clock>* absSystemTime,
104 time_point<steady_clock>* absSteadyTime,
106 assert(absSystemTime == nullptr || absSteadyTime == nullptr);
108 int op = FUTEX_WAIT_BITSET | FUTEX_PRIVATE_FLAG;
110 struct timespec* timeout = nullptr;
112 if (absSystemTime != nullptr) {
113 op |= FUTEX_CLOCK_REALTIME;
114 ts = timeSpecFromTimePoint(*absSystemTime);
116 } else if (absSteadyTime != nullptr) {
117 ts = timeSpecFromTimePoint(*absSteadyTime);
121 // Unlike FUTEX_WAIT, FUTEX_WAIT_BITSET requires an absolute timeout
122 // value - http://locklessinc.com/articles/futex_cheat_sheet/
123 int rv = syscall(__NR_futex,
127 timeout, /* timeout */
129 waitMask); /* val3 */
132 return FutexResult::AWOKEN;
136 assert(timeout != nullptr);
137 return FutexResult::TIMEDOUT;
139 return FutexResult::INTERRUPTED;
141 return FutexResult::VALUE_CHANGED;
144 // EINVAL, EACCESS, or EFAULT. EINVAL means there was an invalid
145 // op (should be impossible) or an invalid timeout (should have
146 // been sanitized by timeSpecFromTimePoint). EACCESS or EFAULT
147 // means *addr points to invalid memory, which is unlikely because
148 // the caller should have segfaulted already. We can either
149 // crash, or return a value that lets the process continue for
150 // a bit. We choose the latter. VALUE_CHANGED probably turns the
151 // caller into a spin lock.
152 return FutexResult::VALUE_CHANGED;
159 ///////////////////////////////////////////////////////
160 // compatibility implementation using standard C++ API
162 // Our emulated futex uses 4096 lists of wait nodes. There are two levels
163 // of locking: a per-list mutex that controls access to the list and a
164 // per-node mutex, condvar, and bool that are used for the actual wakeups.
165 // The per-node mutex allows us to do precise wakeups without thundering
168 struct EmulatedFutexWaitNode : public boost::intrusive::list_base_hook<> {
170 const uint32_t waitMask_;
172 // tricky: hold both bucket and node mutex to write, either to read
175 std::condition_variable cond_;
177 EmulatedFutexWaitNode(void* addr, uint32_t waitMask)
179 , waitMask_(waitMask)
185 struct EmulatedFutexBucket {
187 boost::intrusive::list<EmulatedFutexWaitNode> waiters_;
189 static const size_t kNumBuckets = 4096;
190 static EmulatedFutexBucket* gBuckets;
191 static folly::once_flag gBucketInit;
193 static EmulatedFutexBucket& bucketFor(void* addr) {
194 folly::call_once(gBucketInit, [](){
195 gBuckets = new EmulatedFutexBucket[kNumBuckets];
197 uint64_t mixedBits = folly::hash::twang_mix64(
198 reinterpret_cast<uintptr_t>(addr));
199 return gBuckets[mixedBits % kNumBuckets];
203 EmulatedFutexBucket* EmulatedFutexBucket::gBuckets;
204 folly::once_flag EmulatedFutexBucket::gBucketInit;
206 int emulatedFutexWake(void* addr, int count, uint32_t waitMask) {
207 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
208 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
211 for (auto iter = bucket.waiters_.begin();
212 numAwoken < count && iter != bucket.waiters_.end(); ) {
214 auto& node = *iter++;
215 if (node.addr_ == addr && (node.waitMask_ & waitMask) != 0) {
218 // we unlink, but waiter destroys the node
219 bucket.waiters_.erase(current);
221 std::unique_lock<std::mutex> nodeLock(node.mutex_);
222 node.signaled_ = true;
223 node.cond_.notify_one();
229 FutexResult emulatedFutexWaitImpl(
232 time_point<system_clock>* absSystemTime,
233 time_point<steady_clock>* absSteadyTime,
235 auto& bucket = EmulatedFutexBucket::bucketFor(addr);
236 EmulatedFutexWaitNode node(addr, waitMask);
239 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
242 memcpy(&actual, addr, sizeof(uint32_t));
243 if (actual != expected) {
244 return FutexResult::VALUE_CHANGED;
247 bucket.waiters_.push_back(node);
248 } // bucketLock scope
250 std::cv_status status = std::cv_status::no_timeout;
252 std::unique_lock<std::mutex> nodeLock(node.mutex_);
253 while (!node.signaled_ && status != std::cv_status::timeout) {
254 if (absSystemTime != nullptr) {
255 status = node.cond_.wait_until(nodeLock, *absSystemTime);
256 } else if (absSteadyTime != nullptr) {
257 status = node.cond_.wait_until(nodeLock, *absSteadyTime);
259 node.cond_.wait(nodeLock);
264 if (status == std::cv_status::timeout) {
265 // it's not really a timeout until we unlink the unsignaled node
266 std::unique_lock<std::mutex> bucketLock(bucket.mutex_);
267 if (!node.signaled_) {
268 bucket.waiters_.erase(bucket.waiters_.iterator_to(node));
269 return FutexResult::TIMEDOUT;
272 return FutexResult::AWOKEN;
278 /////////////////////////////////
279 // Futex<> specializations
283 Futex<std::atomic>::futexWake(int count, uint32_t wakeMask) {
285 return nativeFutexWake(this, count, wakeMask);
287 return emulatedFutexWake(this, count, wakeMask);
293 Futex<EmulatedFutexAtomic>::futexWake(int count, uint32_t wakeMask) {
294 return emulatedFutexWake(this, count, wakeMask);
299 Futex<std::atomic>::futexWaitImpl(uint32_t expected,
300 time_point<system_clock>* absSystemTime,
301 time_point<steady_clock>* absSteadyTime,
304 return nativeFutexWaitImpl(
305 this, expected, absSystemTime, absSteadyTime, waitMask);
307 return emulatedFutexWaitImpl(
308 this, expected, absSystemTime, absSteadyTime, waitMask);
314 Futex<EmulatedFutexAtomic>::futexWaitImpl(
316 time_point<system_clock>* absSystemTime,
317 time_point<steady_clock>* absSteadyTime,
319 return emulatedFutexWaitImpl(
320 this, expected, absSystemTime, absSteadyTime, waitMask);
323 }} // namespace folly::detail