2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_SMALLLOCKS_H_
18 #define FOLLY_SMALLLOCKS_H_
21 * This header defines a few very small mutex types. These are useful
22 * in highly memory-constrained environments where contention is
25 * Note: these locks are for use when you aren't likely to contend on
26 * the critical section, or when the critical section is incredibly
27 * small. Given that, both of the locks defined in this header are
28 * inherently unfair: that is, the longer a thread is waiting, the
29 * longer it waits between attempts to acquire, so newer waiters are
30 * more likely to get the mutex. For the intended use-case this is
33 * @author Keith Adams <kma@fb.com>
34 * @author Jordan DeLong <delong.j@fb.com>
39 #include <type_traits>
41 #include <boost/noncopyable.hpp>
46 #include <glog/logging.h>
47 #include <folly/Portability.h>
50 # error "SmallLocks.h is currently x64-only."
55 //////////////////////////////////////////////////////////////////////
60 * A helper object for the condended case. Starts off with eager
61 * spinning, and falls back to sleeping for small quantums.
64 static const uint32_t kMaxActiveSpin = 4000;
69 Sleeper() : spinCount(0) {}
72 if (spinCount < kMaxActiveSpin) {
74 asm volatile("pause");
77 * Always sleep 0.5ms, assuming this will make the kernel put
78 * us down for whatever its minimum timer resolution is (in
79 * linux this varies by kernel version from 1ms to 10ms).
81 struct timespec ts = { 0, 500000 };
82 nanosleep(&ts, nullptr);
89 //////////////////////////////////////////////////////////////////////
92 * A really, *really* small spinlock for fine-grained locking of lots
95 * Zero initializing these is guaranteed to be as good as calling
96 * init(), since the free state is guaranteed to be all-bits zero.
98 * This class should be kept a POD, so we can used it in other packed
99 * structs (gcc does not allow __attribute__((packed)) on structs that
100 * contain non-POD data). This means avoid adding a constructor, or
101 * making some members private, etc.
103 struct MicroSpinLock {
104 enum { FREE = 0, LOCKED = 1 };
108 * Atomically move lock_ from "compare" to "newval". Return boolean
109 * success. Do not play on or around.
111 bool cas(uint8_t compare, uint8_t newVal) {
113 bool memVal; // only set if the cmpxchg fails
114 asm volatile("lock; cmpxchgb %[newVal], (%[lockPtr]);"
116 : [output] "=r" (out), "=a" (memVal)
117 : "a" (compare), // cmpxchgb constrains this to be in %al
118 [newVal] "q" (newVal), // Needs to be byte-accessible
119 [lockPtr] "r" (&lock_)
120 : "memory", "flags");
124 // Initialize this MSL. It is unnecessary to call this if you
125 // zero-initialize the MicroSpinLock.
131 return cas(FREE, LOCKED);
135 detail::Sleeper sleeper;
137 while (lock_ != FREE) {
138 asm volatile("" : : : "memory");
141 } while (!try_lock());
142 DCHECK(lock_ == LOCKED);
146 CHECK(lock_ == LOCKED);
147 asm volatile("" : : : "memory");
148 lock_ = FREE; // release barrier on x86
152 //////////////////////////////////////////////////////////////////////
155 * Spin lock on a single bit in an integral type. You can use this
156 * with 16, 32, or 64-bit integral types.
158 * This is useful if you want a small lock and already have an int
159 * with a bit in it that you aren't using. But note that it can't be
160 * as small as MicroSpinLock (1 byte), if you don't already have a
161 * convenient int with an unused bit lying around to put it on.
163 * To construct these, either use init() or zero initialize. We don't
164 * have a real constructor because we want this to be a POD type so we
165 * can put it into packed structs.
167 template<class IntType, int Bit = sizeof(IntType) * 8 - 1>
168 struct PicoSpinLock {
169 // Internally we deal with the unsigned version of the type.
170 typedef typename std::make_unsigned<IntType>::type UIntType;
172 static_assert(std::is_integral<IntType>::value,
173 "PicoSpinLock needs an integral type");
174 static_assert(sizeof(IntType) == 2 || sizeof(IntType) == 4 ||
175 sizeof(IntType) == 8,
176 "PicoSpinLock can't work on integers smaller than 2 bytes");
179 static const UIntType kLockBitMask_ = UIntType(1) << Bit;
183 * You must call this function before using this class, if you
184 * default constructed it. If you zero-initialized it you can
185 * assume the PicoSpinLock is in a valid unlocked state with
188 * (This doesn't use a constructor because we want to be a POD.)
190 void init(IntType initialValue = 0) {
191 CHECK(!(initialValue & kLockBitMask_));
192 lock_ = initialValue;
196 * Returns the value of the integer we using for our lock, except
197 * with the bit we are using as a lock cleared, regardless of
198 * whether the lock is held.
200 * It is 'safe' to call this without holding the lock. (As in: you
201 * get the same guarantees for simultaneous accesses to an integer
202 * as you normally get.)
204 IntType getData() const {
205 return static_cast<IntType>(lock_ & ~kLockBitMask_);
209 * Set the value of the other bits in our integer.
211 * Don't use this when you aren't holding the lock, unless it can be
212 * guaranteed that no other threads may be trying to use this.
214 void setData(IntType w) {
215 CHECK(!(w & kLockBitMask_));
216 lock_ = (lock_ & kLockBitMask_) | w;
220 * Try to get the lock without blocking: returns whether or not we
223 bool try_lock() const {
226 #define FB_DOBTS(size) \
227 asm volatile("lock; bts" #size " %1, (%2); setnc %0" \
233 switch (sizeof(IntType)) {
234 case 2: FB_DOBTS(w); break;
235 case 4: FB_DOBTS(l); break;
236 case 8: FB_DOBTS(q); break;
245 * Block until we can acquire the lock. Uses Sleeper to wait.
248 detail::Sleeper sleeper;
249 while (!try_lock()) {
255 * Release the lock, without changing the value of the rest of the
258 void unlock() const {
259 #define FB_DOBTR(size) \
260 asm volatile("lock; btr" #size " %0, (%1)" \
267 // Reads and writes can not be reordered wrt locked instructions,
268 // so we don't need a memory fence here.
269 switch (sizeof(IntType)) {
270 case 2: FB_DOBTR(w); break;
271 case 4: FB_DOBTR(l); break;
272 case 8: FB_DOBTR(q); break;
279 //////////////////////////////////////////////////////////////////////
282 * Array of spinlocks where each one is padded to prevent false sharing.
283 * Useful for shard-based locking implementations in environments where
284 * contention is unlikely.
287 // TODO: generate it from configure (`getconf LEVEL1_DCACHE_LINESIZE`)
288 #define FOLLY_CACHE_LINE_SIZE 64
290 template <class T, size_t N>
291 struct SpinLockArray {
292 T& operator[](size_t i) {
293 return data_[i].lock;
296 const T& operator[](size_t i) const {
297 return data_[i].lock;
300 constexpr size_t size() const { return N; }
303 struct PaddedSpinLock {
304 PaddedSpinLock() : lock() { }
306 char padding[FOLLY_CACHE_LINE_SIZE - sizeof(T)];
308 static_assert(sizeof(PaddedSpinLock) == FOLLY_CACHE_LINE_SIZE,
309 "Invalid size of PaddedSpinLock");
311 // Check if T can theoretically cross a cache line.
312 // NOTE: It should be alignof(std::max_align_t), but max_align_t
313 // isn't supported by gcc 4.6.2.
314 static_assert(alignof(MaxAlign) > 0 &&
315 FOLLY_CACHE_LINE_SIZE % alignof(MaxAlign) == 0 &&
316 sizeof(T) <= alignof(MaxAlign),
317 "T can cross cache line boundaries");
319 char padding_[FOLLY_CACHE_LINE_SIZE];
320 std::array<PaddedSpinLock, N> data_;
321 } __attribute__((aligned));
323 //////////////////////////////////////////////////////////////////////
325 typedef std::lock_guard<MicroSpinLock> MSLGuard;
327 //////////////////////////////////////////////////////////////////////