2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Two Read-Write spin lock implementations.
20 * Ref: http://locklessinc.com/articles/locks
22 * Both locks here are faster than pthread_rwlock and have very low
23 * overhead (usually 20-30ns). They don't use any system mutexes and
24 * are very compact (4/8 bytes), so are suitable for per-instance
25 * based locking, particularly when contention is not expected.
27 * In most cases, RWSpinLock is a reasonable choice. It has minimal
28 * overhead, and comparable contention performance when the number of
29 * competing threads is less than or equal to the number of logical
30 * CPUs. Even as the number of threads gets larger, RWSpinLock can
31 * still be very competitive in READ, although it is slower on WRITE,
32 * and also inherently unfair to writers.
34 * RWTicketSpinLock shows more balanced READ/WRITE performance. If
35 * your application really needs a lot more threads, and a
36 * higher-priority writer, prefer one of the RWTicketSpinLock locks.
40 * RWTicketSpinLock locks can only be used with GCC on x86/x86-64
43 * RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
44 * readers and writers.
46 * RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
47 * readers and writers.
49 * RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
50 * writer priority) is NOT reentrant, even for lock_shared().
52 * The lock will not grant any new shared (read) accesses while a thread
53 * attempting to acquire the lock in write mode is blocked. (That is,
54 * if the lock is held in shared mode by N threads, and a thread attempts
55 * to acquire it in write mode, no one else can acquire it in shared mode
56 * until these N threads release the lock and then the blocked thread
57 * acquires and releases the exclusive lock.) This also applies for
58 * attempts to reacquire the lock in shared mode by threads that already
59 * hold it in shared mode, making the lock non-reentrant.
61 * RWSpinLock handles 2^30 - 1 concurrent readers.
63 * @author Xin Liu <xliux@fb.com>
66 #ifndef FOLLY_RWSPINLOCK_H_
67 #define FOLLY_RWSPINLOCK_H_
70 ========================================================================
71 Benchmark on (Intel(R) Xeon(R) CPU L5630 @ 2.13GHz) 8 cores(16 HTs)
72 ========================================================================
74 ------------------------------------------------------------------------------
75 1. Single thread benchmark (read/write lock + unlock overhead)
76 Benchmark Iters Total t t/iter iter/sec
77 -------------------------------------------------------------------------------
78 * BM_RWSpinLockRead 100000 1.786 ms 17.86 ns 53.4M
79 +30.5% BM_RWSpinLockWrite 100000 2.331 ms 23.31 ns 40.91M
80 +85.7% BM_RWTicketSpinLock32Read 100000 3.317 ms 33.17 ns 28.75M
81 +96.0% BM_RWTicketSpinLock32Write 100000 3.5 ms 35 ns 27.25M
82 +85.6% BM_RWTicketSpinLock64Read 100000 3.315 ms 33.15 ns 28.77M
83 +96.0% BM_RWTicketSpinLock64Write 100000 3.5 ms 35 ns 27.25M
84 +85.7% BM_RWTicketSpinLock32FavorWriterRead 100000 3.317 ms 33.17 ns 28.75M
85 +29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000 2.316 ms 23.16 ns 41.18M
86 +85.3% BM_RWTicketSpinLock64FavorWriterRead 100000 3.309 ms 33.09 ns 28.82M
87 +30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000 2.325 ms 23.25 ns 41.02M
88 + 175% BM_PThreadRWMutexRead 100000 4.917 ms 49.17 ns 19.4M
89 + 166% BM_PThreadRWMutexWrite 100000 4.757 ms 47.57 ns 20.05M
91 ------------------------------------------------------------------------------
92 2. Contention Benchmark 90% read 10% write
93 Benchmark hits average min max sigma
94 ------------------------------------------------------------------------------
95 ---------- 8 threads ------------
96 RWSpinLock Write 142666 220ns 78ns 40.8us 269ns
97 RWSpinLock Read 1282297 222ns 80ns 37.7us 248ns
98 RWTicketSpinLock Write 85692 209ns 71ns 17.9us 252ns
99 RWTicketSpinLock Read 769571 215ns 78ns 33.4us 251ns
100 pthread_rwlock_t Write 84248 2.48us 99ns 269us 8.19us
101 pthread_rwlock_t Read 761646 933ns 101ns 374us 3.25us
103 ---------- 16 threads ------------
104 RWSpinLock Write 124236 237ns 78ns 261us 801ns
105 RWSpinLock Read 1115807 236ns 78ns 2.27ms 2.17us
106 RWTicketSpinLock Write 81781 231ns 71ns 31.4us 351ns
107 RWTicketSpinLock Read 734518 238ns 78ns 73.6us 379ns
108 pthread_rwlock_t Write 83363 7.12us 99ns 785us 28.1us
109 pthread_rwlock_t Read 754978 2.18us 101ns 1.02ms 14.3us
111 ---------- 50 threads ------------
112 RWSpinLock Write 131142 1.37us 82ns 7.53ms 68.2us
113 RWSpinLock Read 1181240 262ns 78ns 6.62ms 12.7us
114 RWTicketSpinLock Write 83045 397ns 73ns 7.01ms 31.5us
115 RWTicketSpinLock Read 744133 386ns 78ns 11ms 31.4us
116 pthread_rwlock_t Write 80849 112us 103ns 4.52ms 263us
117 pthread_rwlock_t Read 728698 24us 101ns 7.28ms 194us
121 #include <folly/Portability.h>
123 #if defined(__GNUC__) && \
124 (defined(__i386) || FOLLY_X64 || \
126 # define RW_SPINLOCK_USE_X86_INTRINSIC_
127 # include <x86intrin.h>
128 #elif defined(_MSC_VER) && defined(FOLLY_X64)
129 # define RW_SPINLOCK_USE_X86_INTRINSIC_
131 # undef RW_SPINLOCK_USE_X86_INTRINSIC_
134 // iOS doesn't define _mm_cvtsi64_si128 and friends
135 #if (FOLLY_SSE >= 2) && !TARGET_OS_IPHONE
136 #define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
138 #undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
146 #include <glog/logging.h>
148 #include <folly/Likely.h>
153 * A simple, small (4-bytes), but unfair rwlock. Use it when you want
154 * a nice writer and don't expect a lot of write/read contention, or
155 * when you need small rwlocks since you are creating a large number
158 * Note that the unfairness here is extreme: if the lock is
159 * continually accessed for read, writers will never get a chance. If
160 * the lock can be that highly contended this class is probably not an
161 * ideal choice anyway.
163 * It currently implements most of the Lockable, SharedLockable and
164 * UpgradeLockable concepts except the TimedLockable related locking/unlocking
168 enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
170 constexpr RWSpinLock() : bits_(0) {}
172 RWSpinLock(RWSpinLock const&) = delete;
173 RWSpinLock& operator=(RWSpinLock const&) = delete;
178 while (!LIKELY(try_lock())) {
179 if (++count > 1000) sched_yield();
183 // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
185 static_assert(READER > WRITER + UPGRADED, "wrong bits!");
186 bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
189 // SharedLockable Concept
192 while (!LIKELY(try_lock_shared())) {
193 if (++count > 1000) sched_yield();
197 void unlock_shared() {
198 bits_.fetch_add(-READER, std::memory_order_release);
201 // Downgrade the lock from writer status to reader status.
202 void unlock_and_lock_shared() {
203 bits_.fetch_add(READER, std::memory_order_acquire);
207 // UpgradeLockable Concept
208 void lock_upgrade() {
210 while (!try_lock_upgrade()) {
211 if (++count > 1000) sched_yield();
215 void unlock_upgrade() {
216 bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
219 // unlock upgrade and try to acquire write lock
220 void unlock_upgrade_and_lock() {
222 while (!try_unlock_upgrade_and_lock()) {
223 if (++count > 1000) sched_yield();
227 // unlock upgrade and read lock atomically
228 void unlock_upgrade_and_lock_shared() {
229 bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
232 // write unlock and upgrade lock atomically
233 void unlock_and_lock_upgrade() {
234 // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
235 // the same time when other threads are trying do try_lock_upgrade().
236 bits_.fetch_or(UPGRADED, std::memory_order_acquire);
237 bits_.fetch_add(-WRITER, std::memory_order_release);
241 // Attempt to acquire writer permission. Return false if we didn't get it.
244 return bits_.compare_exchange_strong(expect, WRITER,
245 std::memory_order_acq_rel);
248 // Try to get reader permission on the lock. This can fail if we
249 // find out someone is a writer or upgrader.
250 // Setting the UPGRADED bit would allow a writer-to-be to indicate
251 // its intention to write and block any new readers while waiting
252 // for existing readers to finish and release their read locks. This
253 // helps avoid starving writers (promoted from upgraders).
254 bool try_lock_shared() {
255 // fetch_add is considerably (100%) faster than compare_exchange,
256 // so here we are optimizing for the common (lock success) case.
257 int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
258 if (UNLIKELY(value & (WRITER|UPGRADED))) {
259 bits_.fetch_add(-READER, std::memory_order_release);
265 // try to unlock upgrade and write lock atomically
266 bool try_unlock_upgrade_and_lock() {
267 int32_t expect = UPGRADED;
268 return bits_.compare_exchange_strong(expect, WRITER,
269 std::memory_order_acq_rel);
272 // try to acquire an upgradable lock.
273 bool try_lock_upgrade() {
274 int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
276 // Note: when failed, we cannot flip the UPGRADED bit back,
277 // as in this case there is either another upgrade lock or a write lock.
278 // If it's a write lock, the bit will get cleared up when that lock's done
280 return ((value & (UPGRADED | WRITER)) == 0);
283 // mainly for debugging purposes.
284 int32_t bits() const { return bits_.load(std::memory_order_acquire); }
287 class UpgradedHolder;
292 explicit ReadHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
293 if (lock_) lock_->lock_shared();
296 explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
297 lock_->lock_shared();
300 ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) {
301 other.lock_ = nullptr;
305 explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
306 upgraded.lock_ = nullptr;
307 if (lock_) lock_->unlock_upgrade_and_lock_shared();
310 explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
311 writer.lock_ = nullptr;
312 if (lock_) lock_->unlock_and_lock_shared();
315 ReadHolder& operator=(ReadHolder&& other) {
317 swap(lock_, other.lock_);
321 ReadHolder(const ReadHolder& other) = delete;
322 ReadHolder& operator=(const ReadHolder& other) = delete;
324 ~ReadHolder() { if (lock_) lock_->unlock_shared(); }
326 void reset(RWSpinLock* lock = nullptr) {
327 if (lock == lock_) return;
328 if (lock_) lock_->unlock_shared();
330 if (lock_) lock_->lock_shared();
333 void swap(ReadHolder* other) {
334 std::swap(lock_, other->lock_);
338 friend class UpgradedHolder;
339 friend class WriteHolder;
343 class UpgradedHolder {
345 explicit UpgradedHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
346 if (lock_) lock_->lock_upgrade();
349 explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
350 lock_->lock_upgrade();
353 explicit UpgradedHolder(WriteHolder&& writer) {
354 lock_ = writer.lock_;
355 writer.lock_ = nullptr;
356 if (lock_) lock_->unlock_and_lock_upgrade();
359 UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) {
360 other.lock_ = nullptr;
363 UpgradedHolder& operator =(UpgradedHolder&& other) {
365 swap(lock_, other.lock_);
369 UpgradedHolder(const UpgradedHolder& other) = delete;
370 UpgradedHolder& operator =(const UpgradedHolder& other) = delete;
372 ~UpgradedHolder() { if (lock_) lock_->unlock_upgrade(); }
374 void reset(RWSpinLock* lock = nullptr) {
375 if (lock == lock_) return;
376 if (lock_) lock_->unlock_upgrade();
378 if (lock_) lock_->lock_upgrade();
381 void swap(UpgradedHolder* other) {
383 swap(lock_, other->lock_);
387 friend class WriteHolder;
388 friend class ReadHolder;
394 explicit WriteHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
395 if (lock_) lock_->lock();
398 explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
402 // promoted from an upgrade lock holder
403 explicit WriteHolder(UpgradedHolder&& upgraded) {
404 lock_ = upgraded.lock_;
405 upgraded.lock_ = nullptr;
406 if (lock_) lock_->unlock_upgrade_and_lock();
409 WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) {
410 other.lock_ = nullptr;
413 WriteHolder& operator =(WriteHolder&& other) {
415 swap(lock_, other.lock_);
419 WriteHolder(const WriteHolder& other) = delete;
420 WriteHolder& operator =(const WriteHolder& other) = delete;
422 ~WriteHolder () { if (lock_) lock_->unlock(); }
424 void reset(RWSpinLock* lock = nullptr) {
425 if (lock == lock_) return;
426 if (lock_) lock_->unlock();
428 if (lock_) lock_->lock();
431 void swap(WriteHolder* other) {
433 swap(lock_, other->lock_);
437 friend class ReadHolder;
438 friend class UpgradedHolder;
442 // Synchronized<> adaptors
443 friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); }
444 friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); }
445 friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); }
446 friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); }
449 std::atomic<int32_t> bits_;
453 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
454 // A more balanced Read-Write spin lock implemented based on GCC intrinsics.
457 template <size_t kBitWidth> struct RWTicketIntTrait {
458 static_assert(kBitWidth == 32 || kBitWidth == 64,
459 "bit width has to be either 32 or 64 ");
463 struct RWTicketIntTrait<64> {
464 typedef uint64_t FullInt;
465 typedef uint32_t HalfInt;
466 typedef uint16_t QuarterInt;
468 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
469 static __m128i make128(const uint16_t v[4]) {
470 return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
472 static inline __m128i fromInteger(uint64_t from) {
473 return _mm_cvtsi64_si128(from);
475 static inline uint64_t toInteger(__m128i in) {
476 return _mm_cvtsi128_si64(in);
478 static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
479 return toInteger(_mm_add_epi16(in, kDelta));
485 struct RWTicketIntTrait<32> {
486 typedef uint32_t FullInt;
487 typedef uint16_t HalfInt;
488 typedef uint8_t QuarterInt;
490 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
491 static __m128i make128(const uint8_t v[4]) {
492 return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
493 0, 0, 0, 0, v[3], v[2], v[1], v[0]);
495 static inline __m128i fromInteger(uint32_t from) {
496 return _mm_cvtsi32_si128(from);
498 static inline uint32_t toInteger(__m128i in) {
499 return _mm_cvtsi128_si32(in);
501 static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
502 return toInteger(_mm_add_epi8(in, kDelta));
509 template<size_t kBitWidth, bool kFavorWriter=false>
510 class RWTicketSpinLockT {
511 typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
512 typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
513 typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
514 typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt
518 constexpr RWTicket() : whole(0) {}
521 __extension__ struct {
528 private: // Some x64-specific utilities for atomic access to ticket.
529 template<class T> static T load_acquire(T* addr) {
530 T t = *addr; // acquire barrier
531 asm_volatile_memory();
536 static void store_release(T* addr, T v) {
537 asm_volatile_memory();
538 *addr = v; // release barrier
543 constexpr RWTicketSpinLockT() {}
545 RWTicketSpinLockT(RWTicketSpinLockT const&) = delete;
546 RWTicketSpinLockT& operator=(RWTicketSpinLockT const&) = delete;
550 writeLockAggressive();
557 * Both try_lock and try_lock_shared diverge in our implementation from the
558 * lock algorithm described in the link above.
560 * In the read case, it is undesirable that the readers could wait
561 * for another reader (before increasing ticket.read in the other
562 * implementation). Our approach gives up on
563 * first-come-first-serve, but our benchmarks showed improve
564 * performance for both readers and writers under heavily contended
565 * cases, particularly when the number of threads exceeds the number
568 * We have writeLockAggressive() using the original implementation
569 * for a writer, which gives some advantage to the writer over the
570 * readers---for that path it is guaranteed that the writer will
571 * acquire the lock after all the existing readers exit.
575 FullInt old = t.whole = load_acquire(&ticket.whole);
576 if (t.users != t.write) return false;
578 return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
582 * Call this if you want to prioritize writer to avoid starvation.
583 * Unlike writeLockNice, immediately acquires the write lock when
584 * the existing readers (arriving before the writer) finish their
587 void writeLockAggressive() {
588 // sched_yield() is needed here to avoid a pathology if the number
589 // of threads attempting concurrent writes is >= the number of real
590 // cores allocated to this process. This is less likely than the
591 // corresponding situation in lock_shared(), but we still want to
594 QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
595 while (val != load_acquire(&ticket.write)) {
596 asm_volatile_pause();
597 if (UNLIKELY(++count > 1000)) sched_yield();
601 // Call this when the writer should be nicer to the readers.
602 void writeLockNice() {
603 // Here it doesn't cpu-relax the writer.
605 // This is because usually we have many more readers than the
606 // writers, so the writer has less chance to get the lock when
607 // there are a lot of competing readers. The aggressive spinning
608 // can help to avoid starving writers.
610 // We don't worry about sched_yield() here because the caller
611 // has already explicitly abandoned fairness.
612 while (!try_lock()) {}
615 // Atomically unlock the write-lock from writer and acquire the read-lock.
616 void unlock_and_lock_shared() {
617 QuarterInt val = __sync_fetch_and_add(&ticket.read, 1);
620 // Release writer permission on the lock.
623 t.whole = load_acquire(&ticket.whole);
624 FullInt old = t.whole;
626 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
627 // SSE2 can reduce the lock and unlock overhead by 10%
628 static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 }; // write/read/user
629 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
630 __m128i m = IntTraitType::fromInteger(old);
631 t.whole = IntTraitType::addParallel(m, kDelta);
636 store_release(&ticket.readWrite, t.readWrite);
640 // sched_yield() is important here because we can't grab the
641 // shared lock if there is a pending writeLockAggressive, so we
642 // need to let threads that already have a shared lock complete
644 while (!LIKELY(try_lock_shared())) {
645 asm_volatile_pause();
646 if (UNLIKELY((++count & 1023) == 0)) sched_yield();
650 bool try_lock_shared() {
652 old.whole = t.whole = load_acquire(&ticket.whole);
653 old.users = old.read;
654 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
655 // SSE2 may reduce the total lock and unlock overhead by 10%
656 static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 }; // write/read/user
657 static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
658 __m128i m = IntTraitType::fromInteger(old.whole);
659 t.whole = IntTraitType::addParallel(m, kDelta);
664 return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
667 void unlock_shared() {
668 QuarterInt val = __sync_fetch_and_add(&ticket.write, 1);
673 typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
676 ReadHolder(ReadHolder const&) = delete;
677 ReadHolder& operator=(ReadHolder const&) = delete;
679 explicit ReadHolder(RWSpinLock *lock = nullptr) :
681 if (lock_) lock_->lock_shared();
684 explicit ReadHolder(RWSpinLock &lock) : lock_ (&lock) {
685 if (lock_) lock_->lock_shared();
688 // atomically unlock the write-lock from writer and acquire the read-lock
689 explicit ReadHolder(WriteHolder *writer) : lock_(nullptr) {
690 std::swap(this->lock_, writer->lock_);
692 lock_->unlock_and_lock_shared();
697 if (lock_) lock_->unlock_shared();
700 void reset(RWSpinLock *lock = nullptr) {
701 if (lock_) lock_->unlock_shared();
703 if (lock_) lock_->lock_shared();
706 void swap(ReadHolder *other) {
707 std::swap(this->lock_, other->lock_);
716 WriteHolder(WriteHolder const&) = delete;
717 WriteHolder& operator=(WriteHolder const&) = delete;
719 explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) {
720 if (lock_) lock_->lock();
722 explicit WriteHolder(RWSpinLock &lock) : lock_ (&lock) {
723 if (lock_) lock_->lock();
727 if (lock_) lock_->unlock();
730 void reset(RWSpinLock *lock = nullptr) {
731 if (lock == lock_) return;
732 if (lock_) lock_->unlock();
734 if (lock_) lock_->lock();
737 void swap(WriteHolder *other) {
738 std::swap(this->lock_, other->lock_);
742 friend class ReadHolder;
746 // Synchronized<> adaptors.
747 friend void acquireRead(RWTicketSpinLockT& mutex) {
750 friend void acquireReadWrite(RWTicketSpinLockT& mutex) {
753 friend void releaseRead(RWTicketSpinLockT& mutex) {
754 mutex.unlock_shared();
756 friend void releaseReadWrite(RWTicketSpinLockT& mutex) {
761 typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
762 typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
764 #endif // RW_SPINLOCK_USE_X86_INTRINSIC_
768 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
769 #undef RW_SPINLOCK_USE_X86_INTRINSIC_
772 #endif // FOLLY_RWSPINLOCK_H_