folly/RWSpinLock.h

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /*
  18  * Two Read-Write spin lock implementations.
  19  *
  20  *  Ref: http://locklessinc.com/articles/locks
  21  *
  22  *  Both locks here are faster than pthread_rwlock and have very low
  23  *  overhead (usually 20-30ns).  They don't use any system mutexes and
  24  *  are very compact (4/8 bytes), so are suitable for per-instance
  25  *  based locking, particularly when contention is not expected.
  26  *
  27  *  In most cases, RWSpinLock is a reasonable choice.  It has minimal
  28  *  overhead, and comparable contention performance when the number of
  29  *  competing threads is less than or equal to the number of logical
  30  *  CPUs.  Even as the number of threads gets larger, RWSpinLock can
  31  *  still be very competitive in READ, although it is slower on WRITE,
  32  *  and also inherently unfair to writers.
  33  *
  34  *  RWTicketSpinLock shows more balanced READ/WRITE performance.  If
  35  *  your application really needs a lot more threads, and a
  36  *  higher-priority writer, prefer one of the RWTicketSpinLock locks.
  37  *
  38  *  Caveats:
  39  *
  40  *    RWTicketSpinLock locks can only be used with GCC on x86/x86-64
  41  *    based systems.
  42  *
  43  *    RWTicketSpinLock<32> only allows up to 2^8 - 1 concurrent
  44  *    readers and writers.
  45  *
  46  *    RWTicketSpinLock<64> only allows up to 2^16 - 1 concurrent
  47  *    readers and writers.
  48  *
  49  *    RWTicketSpinLock<..., true> (kFavorWriter = true, that is, strict
  50  *    writer priority) is NOT reentrant, even for lock_shared().
  51  *
  52  *    The lock will not grant any new shared (read) accesses while a thread
  53  *    attempting to acquire the lock in write mode is blocked. (That is,
  54  *    if the lock is held in shared mode by N threads, and a thread attempts
  55  *    to acquire it in write mode, no one else can acquire it in shared mode
  56  *    until these N threads release the lock and then the blocked thread
  57  *    acquires and releases the exclusive lock.) This also applies for
  58  *    attempts to reacquire the lock in shared mode by threads that already
  59  *    hold it in shared mode, making the lock non-reentrant.
  60  *
  61  *    RWSpinLock handles 2^30 - 1 concurrent readers.
  62  *
  63  * @author Xin Liu <xliux@fb.com>
  64  */
  65
  66 #ifndef FOLLY_RWSPINLOCK_H_
  67 #define FOLLY_RWSPINLOCK_H_
  68
  69 /*
  70 ========================================================================
  71 Benchmark on (Intel(R) Xeon(R) CPU  L5630  @ 2.13GHz)  8 cores(16 HTs)
  72 ========================================================================
  73
  74 ------------------------------------------------------------------------------
  75 1. Single thread benchmark (read/write lock + unlock overhead)
  76 Benchmark                                    Iters   Total t    t/iter iter/sec
  77 -------------------------------------------------------------------------------
  78 *      BM_RWSpinLockRead                     100000  1.786 ms  17.86 ns   53.4M
  79 +30.5% BM_RWSpinLockWrite                    100000  2.331 ms  23.31 ns  40.91M
  80 +85.7% BM_RWTicketSpinLock32Read             100000  3.317 ms  33.17 ns  28.75M
  81 +96.0% BM_RWTicketSpinLock32Write            100000    3.5 ms     35 ns  27.25M
  82 +85.6% BM_RWTicketSpinLock64Read             100000  3.315 ms  33.15 ns  28.77M
  83 +96.0% BM_RWTicketSpinLock64Write            100000    3.5 ms     35 ns  27.25M
  84 +85.7% BM_RWTicketSpinLock32FavorWriterRead  100000  3.317 ms  33.17 ns  28.75M
  85 +29.7% BM_RWTicketSpinLock32FavorWriterWrite 100000  2.316 ms  23.16 ns  41.18M
  86 +85.3% BM_RWTicketSpinLock64FavorWriterRead  100000  3.309 ms  33.09 ns  28.82M
  87 +30.2% BM_RWTicketSpinLock64FavorWriterWrite 100000  2.325 ms  23.25 ns  41.02M
  88 + 175% BM_PThreadRWMutexRead                 100000  4.917 ms  49.17 ns   19.4M
  89 + 166% BM_PThreadRWMutexWrite                100000  4.757 ms  47.57 ns  20.05M
  90
  91 ------------------------------------------------------------------------------
  92 2. Contention Benchmark      90% read  10% write
  93 Benchmark                    hits       average    min       max        sigma
  94 ------------------------------------------------------------------------------
  95 ---------- 8  threads ------------
  96 RWSpinLock       Write       142666     220ns      78ns      40.8us     269ns
  97 RWSpinLock       Read        1282297    222ns      80ns      37.7us     248ns
  98 RWTicketSpinLock Write       85692      209ns      71ns      17.9us     252ns
  99 RWTicketSpinLock Read        769571     215ns      78ns      33.4us     251ns
 100 pthread_rwlock_t Write       84248      2.48us     99ns      269us      8.19us
 101 pthread_rwlock_t Read        761646     933ns      101ns     374us      3.25us
 102
 103 ---------- 16 threads ------------
 104 RWSpinLock       Write       124236     237ns      78ns      261us      801ns
 105 RWSpinLock       Read        1115807    236ns      78ns      2.27ms     2.17us
 106 RWTicketSpinLock Write       81781      231ns      71ns      31.4us     351ns
 107 RWTicketSpinLock Read        734518     238ns      78ns      73.6us     379ns
 108 pthread_rwlock_t Write       83363      7.12us     99ns      785us      28.1us
 109 pthread_rwlock_t Read        754978     2.18us     101ns     1.02ms     14.3us
 110
 111 ---------- 50 threads ------------
 112 RWSpinLock       Write       131142     1.37us     82ns      7.53ms     68.2us
 113 RWSpinLock       Read        1181240    262ns      78ns      6.62ms     12.7us
 114 RWTicketSpinLock Write       83045      397ns      73ns      7.01ms     31.5us
 115 RWTicketSpinLock Read        744133     386ns      78ns        11ms     31.4us
 116 pthread_rwlock_t Write       80849      112us      103ns     4.52ms     263us
 117 pthread_rwlock_t Read        728698     24us       101ns     7.28ms     194us
 118
 119 */
 120
 121 #include <folly/Portability.h>
 122
 123 #if defined(__GNUC__) && \
 124   (defined(__i386) || FOLLY_X64 || \
 125    defined(ARCH_K8))
 126 # define RW_SPINLOCK_USE_X86_INTRINSIC_
 127 # include <x86intrin.h>
 128 #elif defined(_MSC_VER) && defined(FOLLY_X64)
 129 # define RW_SPINLOCK_USE_X86_INTRINSIC_
 130 #else
 131 # undef RW_SPINLOCK_USE_X86_INTRINSIC_
 132 #endif
 133
 134 // iOS doesn't define _mm_cvtsi64_si128 and friends
 135 #if (FOLLY_SSE >= 2) && !TARGET_OS_IPHONE
 136 #define RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 137 #else
 138 #undef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 139 #endif
 140
 141 #include <atomic>
 142 #include <string>
 143 #include <algorithm>
 144 #include <boost/noncopyable.hpp>
 145
 146 #include <sched.h>
 147 #include <glog/logging.h>
 148
 149 #include <folly/Likely.h>
 150
 151 namespace folly {
 152
 153 /*
 154  * A simple, small (4-bytes), but unfair rwlock.  Use it when you want
 155  * a nice writer and don't expect a lot of write/read contention, or
 156  * when you need small rwlocks since you are creating a large number
 157  * of them.
 158  *
 159  * Note that the unfairness here is extreme: if the lock is
 160  * continually accessed for read, writers will never get a chance.  If
 161  * the lock can be that highly contended this class is probably not an
 162  * ideal choice anyway.
 163  *
 164  * It currently implements most of the Lockable, SharedLockable and
 165  * UpgradeLockable concepts except the TimedLockable related locking/unlocking
 166  * interfaces.
 167  */
 168 class RWSpinLock : boost::noncopyable {
 169   enum : int32_t { READER = 4, UPGRADED = 2, WRITER = 1 };
 170  public:
 171   RWSpinLock() : bits_(0) {}
 172
 173   // Lockable Concept
 174   void lock() {
 175     int count = 0;
 176     while (!LIKELY(try_lock())) {
 177       if (++count > 1000) sched_yield();
 178     }
 179   }
 180
 181   // Writer is responsible for clearing up both the UPGRADED and WRITER bits.
 182   void unlock() {
 183     static_assert(READER > WRITER + UPGRADED, "wrong bits!");
 184     bits_.fetch_and(~(WRITER | UPGRADED), std::memory_order_release);
 185   }
 186
 187   // SharedLockable Concept
 188   void lock_shared() {
 189     int count = 0;
 190     while (!LIKELY(try_lock_shared())) {
 191       if (++count > 1000) sched_yield();
 192     }
 193   }
 194
 195   void unlock_shared() {
 196     bits_.fetch_add(-READER, std::memory_order_release);
 197   }
 198
 199   // Downgrade the lock from writer status to reader status.
 200   void unlock_and_lock_shared() {
 201     bits_.fetch_add(READER, std::memory_order_acquire);
 202     unlock();
 203   }
 204
 205   // UpgradeLockable Concept
 206   void lock_upgrade() {
 207     int count = 0;
 208     while (!try_lock_upgrade()) {
 209       if (++count > 1000) sched_yield();
 210     }
 211   }
 212
 213   void unlock_upgrade() {
 214     bits_.fetch_add(-UPGRADED, std::memory_order_acq_rel);
 215   }
 216
 217   // unlock upgrade and try to acquire write lock
 218   void unlock_upgrade_and_lock() {
 219     int64_t count = 0;
 220     while (!try_unlock_upgrade_and_lock()) {
 221       if (++count > 1000) sched_yield();
 222     }
 223   }
 224
 225   // unlock upgrade and read lock atomically
 226   void unlock_upgrade_and_lock_shared() {
 227     bits_.fetch_add(READER - UPGRADED, std::memory_order_acq_rel);
 228   }
 229
 230   // write unlock and upgrade lock atomically
 231   void unlock_and_lock_upgrade() {
 232     // need to do it in two steps here -- as the UPGRADED bit might be OR-ed at
 233     // the same time when other threads are trying do try_lock_upgrade().
 234     bits_.fetch_or(UPGRADED, std::memory_order_acquire);
 235     bits_.fetch_add(-WRITER, std::memory_order_release);
 236   }
 237
 238
 239   // Attempt to acquire writer permission. Return false if we didn't get it.
 240   bool try_lock() {
 241     int32_t expect = 0;
 242     return bits_.compare_exchange_strong(expect, WRITER,
 243       std::memory_order_acq_rel);
 244   }
 245
 246   // Try to get reader permission on the lock. This can fail if we
 247   // find out someone is a writer or upgrader.
 248   // Setting the UPGRADED bit would allow a writer-to-be to indicate
 249   // its intention to write and block any new readers while waiting
 250   // for existing readers to finish and release their read locks. This
 251   // helps avoid starving writers (promoted from upgraders).
 252   bool try_lock_shared() {
 253     // fetch_add is considerably (100%) faster than compare_exchange,
 254     // so here we are optimizing for the common (lock success) case.
 255     int32_t value = bits_.fetch_add(READER, std::memory_order_acquire);
 256     if (UNLIKELY(value & (WRITER|UPGRADED))) {
 257       bits_.fetch_add(-READER, std::memory_order_release);
 258       return false;
 259     }
 260     return true;
 261   }
 262
 263   // try to unlock upgrade and write lock atomically
 264   bool try_unlock_upgrade_and_lock() {
 265     int32_t expect = UPGRADED;
 266     return bits_.compare_exchange_strong(expect, WRITER,
 267         std::memory_order_acq_rel);
 268   }
 269
 270   // try to acquire an upgradable lock.
 271   bool try_lock_upgrade() {
 272     int32_t value = bits_.fetch_or(UPGRADED, std::memory_order_acquire);
 273
 274     // Note: when failed, we cannot flip the UPGRADED bit back,
 275     // as in this case there is either another upgrade lock or a write lock.
 276     // If it's a write lock, the bit will get cleared up when that lock's done
 277     // with unlock().
 278     return ((value & (UPGRADED | WRITER)) == 0);
 279   }
 280
 281   // mainly for debugging purposes.
 282   int32_t bits() const { return bits_.load(std::memory_order_acquire); }
 283
 284   class ReadHolder;
 285   class UpgradedHolder;
 286   class WriteHolder;
 287
 288   class ReadHolder {
 289    public:
 290     explicit ReadHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
 291       if (lock_) lock_->lock_shared();
 292     }
 293
 294     explicit ReadHolder(RWSpinLock& lock) : lock_(&lock) {
 295       lock_->lock_shared();
 296     }
 297
 298     ReadHolder(ReadHolder&& other) noexcept : lock_(other.lock_) {
 299       other.lock_ = nullptr;
 300     }
 301
 302     // down-grade
 303     explicit ReadHolder(UpgradedHolder&& upgraded) : lock_(upgraded.lock_) {
 304       upgraded.lock_ = nullptr;
 305       if (lock_) lock_->unlock_upgrade_and_lock_shared();
 306     }
 307
 308     explicit ReadHolder(WriteHolder&& writer) : lock_(writer.lock_) {
 309       writer.lock_ = nullptr;
 310       if (lock_) lock_->unlock_and_lock_shared();
 311     }
 312
 313     ReadHolder& operator=(ReadHolder&& other) {
 314       using std::swap;
 315       swap(lock_, other.lock_);
 316       return *this;
 317     }
 318
 319     ReadHolder(const ReadHolder& other) = delete;
 320     ReadHolder& operator=(const ReadHolder& other) = delete;
 321
 322     ~ReadHolder() { if (lock_) lock_->unlock_shared(); }
 323
 324     void reset(RWSpinLock* lock = nullptr) {
 325       if (lock == lock_) return;
 326       if (lock_) lock_->unlock_shared();
 327       lock_ = lock;
 328       if (lock_) lock_->lock_shared();
 329     }
 330
 331     void swap(ReadHolder* other) {
 332       std::swap(lock_, other->lock_);
 333     }
 334
 335    private:
 336     friend class UpgradedHolder;
 337     friend class WriteHolder;
 338     RWSpinLock* lock_;
 339   };
 340
 341   class UpgradedHolder {
 342    public:
 343     explicit UpgradedHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
 344       if (lock_) lock_->lock_upgrade();
 345     }
 346
 347     explicit UpgradedHolder(RWSpinLock& lock) : lock_(&lock) {
 348       lock_->lock_upgrade();
 349     }
 350
 351     explicit UpgradedHolder(WriteHolder&& writer) {
 352       lock_ = writer.lock_;
 353       writer.lock_ = nullptr;
 354       if (lock_) lock_->unlock_and_lock_upgrade();
 355     }
 356
 357     UpgradedHolder(UpgradedHolder&& other) noexcept : lock_(other.lock_) {
 358       other.lock_ = nullptr;
 359     }
 360
 361     UpgradedHolder& operator =(UpgradedHolder&& other) {
 362       using std::swap;
 363       swap(lock_, other.lock_);
 364       return *this;
 365     }
 366
 367     UpgradedHolder(const UpgradedHolder& other) = delete;
 368     UpgradedHolder& operator =(const UpgradedHolder& other) = delete;
 369
 370     ~UpgradedHolder() { if (lock_) lock_->unlock_upgrade(); }
 371
 372     void reset(RWSpinLock* lock = nullptr) {
 373       if (lock == lock_) return;
 374       if (lock_) lock_->unlock_upgrade();
 375       lock_ = lock;
 376       if (lock_) lock_->lock_upgrade();
 377     }
 378
 379     void swap(UpgradedHolder* other) {
 380       using std::swap;
 381       swap(lock_, other->lock_);
 382     }
 383
 384    private:
 385     friend class WriteHolder;
 386     friend class ReadHolder;
 387     RWSpinLock* lock_;
 388   };
 389
 390   class WriteHolder {
 391    public:
 392     explicit WriteHolder(RWSpinLock* lock = nullptr) : lock_(lock) {
 393       if (lock_) lock_->lock();
 394     }
 395
 396     explicit WriteHolder(RWSpinLock& lock) : lock_(&lock) {
 397       lock_->lock();
 398     }
 399
 400     // promoted from an upgrade lock holder
 401     explicit WriteHolder(UpgradedHolder&& upgraded) {
 402       lock_ = upgraded.lock_;
 403       upgraded.lock_ = nullptr;
 404       if (lock_) lock_->unlock_upgrade_and_lock();
 405     }
 406
 407     WriteHolder(WriteHolder&& other) noexcept : lock_(other.lock_) {
 408       other.lock_ = nullptr;
 409     }
 410
 411     WriteHolder& operator =(WriteHolder&& other) {
 412       using std::swap;
 413       swap(lock_, other.lock_);
 414       return *this;
 415     }
 416
 417     WriteHolder(const WriteHolder& other) = delete;
 418     WriteHolder& operator =(const WriteHolder& other) = delete;
 419
 420     ~WriteHolder () { if (lock_) lock_->unlock(); }
 421
 422     void reset(RWSpinLock* lock = nullptr) {
 423       if (lock == lock_) return;
 424       if (lock_) lock_->unlock();
 425       lock_ = lock;
 426       if (lock_) lock_->lock();
 427     }
 428
 429     void swap(WriteHolder* other) {
 430       using std::swap;
 431       swap(lock_, other->lock_);
 432     }
 433
 434    private:
 435     friend class ReadHolder;
 436     friend class UpgradedHolder;
 437     RWSpinLock* lock_;
 438   };
 439
 440   // Synchronized<> adaptors
 441   friend void acquireRead(RWSpinLock& l) { return l.lock_shared(); }
 442   friend void acquireReadWrite(RWSpinLock& l) { return l.lock(); }
 443   friend void releaseRead(RWSpinLock& l) { return l.unlock_shared(); }
 444   friend void releaseReadWrite(RWSpinLock& l) { return l.unlock(); }
 445
 446  private:
 447   std::atomic<int32_t> bits_;
 448 };
 449
 450
 451 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
 452 // A more balanced Read-Write spin lock implemented based on GCC intrinsics.
 453
 454 namespace detail {
 455 template <size_t kBitWidth> struct RWTicketIntTrait {
 456   static_assert(kBitWidth == 32 || kBitWidth == 64,
 457       "bit width has to be either 32 or 64 ");
 458 };
 459
 460 template <>
 461 struct RWTicketIntTrait<64> {
 462   typedef uint64_t FullInt;
 463   typedef uint32_t HalfInt;
 464   typedef uint16_t QuarterInt;
 465
 466 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 467   static __m128i make128(const uint16_t v[4]) {
 468     return _mm_set_epi16(0, 0, 0, 0, v[3], v[2], v[1], v[0]);
 469   }
 470   static inline __m128i fromInteger(uint64_t from) {
 471     return _mm_cvtsi64_si128(from);
 472   }
 473   static inline uint64_t toInteger(__m128i in) {
 474     return _mm_cvtsi128_si64(in);
 475   }
 476   static inline uint64_t addParallel(__m128i in, __m128i kDelta) {
 477     return toInteger(_mm_add_epi16(in, kDelta));
 478   }
 479 #endif
 480 };
 481
 482 template <>
 483 struct RWTicketIntTrait<32> {
 484   typedef uint32_t FullInt;
 485   typedef uint16_t HalfInt;
 486   typedef uint8_t QuarterInt;
 487
 488 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 489   static __m128i make128(const uint8_t v[4]) {
 490     return _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0,
 491         0, 0, 0, 0, v[3], v[2], v[1], v[0]);
 492   }
 493   static inline __m128i fromInteger(uint32_t from) {
 494     return _mm_cvtsi32_si128(from);
 495   }
 496   static inline uint32_t toInteger(__m128i in) {
 497     return _mm_cvtsi128_si32(in);
 498   }
 499   static inline uint32_t addParallel(__m128i in, __m128i kDelta) {
 500     return toInteger(_mm_add_epi8(in, kDelta));
 501   }
 502 #endif
 503 };
 504 }  // detail
 505
 506
 507 template<size_t kBitWidth, bool kFavorWriter=false>
 508 class RWTicketSpinLockT : boost::noncopyable {
 509   typedef detail::RWTicketIntTrait<kBitWidth> IntTraitType;
 510   typedef typename detail::RWTicketIntTrait<kBitWidth>::FullInt FullInt;
 511   typedef typename detail::RWTicketIntTrait<kBitWidth>::HalfInt HalfInt;
 512   typedef typename detail::RWTicketIntTrait<kBitWidth>::QuarterInt
 513     QuarterInt;
 514
 515   union RWTicket {
 516     FullInt whole;
 517     HalfInt readWrite;
 518     __extension__ struct {
 519       QuarterInt write;
 520       QuarterInt read;
 521       QuarterInt users;
 522     };
 523   } ticket;
 524
 525  private: // Some x64-specific utilities for atomic access to ticket.
 526   template<class T> static T load_acquire(T* addr) {
 527     T t = *addr; // acquire barrier
 528     asm_volatile_memory();
 529     return t;
 530   }
 531
 532   template<class T>
 533   static void store_release(T* addr, T v) {
 534     asm_volatile_memory();
 535     *addr = v; // release barrier
 536   }
 537
 538  public:
 539
 540   RWTicketSpinLockT() {
 541     store_release(&ticket.whole, FullInt(0));
 542   }
 543
 544   void lock() {
 545     if (kFavorWriter) {
 546       writeLockAggressive();
 547     } else {
 548       writeLockNice();
 549     }
 550   }
 551
 552   /*
 553    * Both try_lock and try_lock_shared diverge in our implementation from the
 554    * lock algorithm described in the link above.
 555    *
 556    * In the read case, it is undesirable that the readers could wait
 557    * for another reader (before increasing ticket.read in the other
 558    * implementation).  Our approach gives up on
 559    * first-come-first-serve, but our benchmarks showed improve
 560    * performance for both readers and writers under heavily contended
 561    * cases, particularly when the number of threads exceeds the number
 562    * of logical CPUs.
 563    *
 564    * We have writeLockAggressive() using the original implementation
 565    * for a writer, which gives some advantage to the writer over the
 566    * readers---for that path it is guaranteed that the writer will
 567    * acquire the lock after all the existing readers exit.
 568    */
 569   bool try_lock() {
 570     RWTicket t;
 571     FullInt old = t.whole = load_acquire(&ticket.whole);
 572     if (t.users != t.write) return false;
 573     ++t.users;
 574     return __sync_bool_compare_and_swap(&ticket.whole, old, t.whole);
 575   }
 576
 577   /*
 578    * Call this if you want to prioritize writer to avoid starvation.
 579    * Unlike writeLockNice, immediately acquires the write lock when
 580    * the existing readers (arriving before the writer) finish their
 581    * turns.
 582    */
 583   void writeLockAggressive() {
 584     // sched_yield() is needed here to avoid a pathology if the number
 585     // of threads attempting concurrent writes is >= the number of real
 586     // cores allocated to this process. This is less likely than the
 587     // corresponding situation in lock_shared(), but we still want to
 588     // avoid it
 589     int count = 0;
 590     QuarterInt val = __sync_fetch_and_add(&ticket.users, 1);
 591     while (val != load_acquire(&ticket.write)) {
 592       asm_volatile_pause();
 593       if (UNLIKELY(++count > 1000)) sched_yield();
 594     }
 595   }
 596
 597   // Call this when the writer should be nicer to the readers.
 598   void writeLockNice() {
 599     // Here it doesn't cpu-relax the writer.
 600     //
 601     // This is because usually we have many more readers than the
 602     // writers, so the writer has less chance to get the lock when
 603     // there are a lot of competing readers.  The aggressive spinning
 604     // can help to avoid starving writers.
 605     //
 606     // We don't worry about sched_yield() here because the caller
 607     // has already explicitly abandoned fairness.
 608     while (!try_lock()) {}
 609   }
 610
 611   // Atomically unlock the write-lock from writer and acquire the read-lock.
 612   void unlock_and_lock_shared() {
 613     QuarterInt val = __sync_fetch_and_add(&ticket.read, 1);
 614   }
 615
 616   // Release writer permission on the lock.
 617   void unlock() {
 618     RWTicket t;
 619     t.whole = load_acquire(&ticket.whole);
 620     FullInt old = t.whole;
 621
 622 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 623     // SSE2 can reduce the lock and unlock overhead by 10%
 624     static const QuarterInt kDeltaBuf[4] = { 1, 1, 0, 0 };   // write/read/user
 625     static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
 626     __m128i m = IntTraitType::fromInteger(old);
 627     t.whole = IntTraitType::addParallel(m, kDelta);
 628 #else
 629     ++t.read;
 630     ++t.write;
 631 #endif
 632     store_release(&ticket.readWrite, t.readWrite);
 633   }
 634
 635   void lock_shared() {
 636     // sched_yield() is important here because we can't grab the
 637     // shared lock if there is a pending writeLockAggressive, so we
 638     // need to let threads that already have a shared lock complete
 639     int count = 0;
 640     while (!LIKELY(try_lock_shared())) {
 641       asm_volatile_pause();
 642       if (UNLIKELY((++count & 1023) == 0)) sched_yield();
 643     }
 644   }
 645
 646   bool try_lock_shared() {
 647     RWTicket t, old;
 648     old.whole = t.whole = load_acquire(&ticket.whole);
 649     old.users = old.read;
 650 #ifdef RW_SPINLOCK_USE_SSE_INSTRUCTIONS_
 651     // SSE2 may reduce the total lock and unlock overhead by 10%
 652     static const QuarterInt kDeltaBuf[4] = { 0, 1, 1, 0 };   // write/read/user
 653     static const __m128i kDelta = IntTraitType::make128(kDeltaBuf);
 654     __m128i m = IntTraitType::fromInteger(old.whole);
 655     t.whole = IntTraitType::addParallel(m, kDelta);
 656 #else
 657     ++t.read;
 658     ++t.users;
 659 #endif
 660     return __sync_bool_compare_and_swap(&ticket.whole, old.whole, t.whole);
 661   }
 662
 663   void unlock_shared() {
 664     QuarterInt val = __sync_fetch_and_add(&ticket.write, 1);
 665   }
 666
 667   class WriteHolder;
 668
 669   typedef RWTicketSpinLockT<kBitWidth, kFavorWriter> RWSpinLock;
 670   class ReadHolder : boost::noncopyable {
 671    public:
 672     explicit ReadHolder(RWSpinLock *lock = nullptr) :
 673       lock_(lock) {
 674       if (lock_) lock_->lock_shared();
 675     }
 676
 677     explicit ReadHolder(RWSpinLock &lock) : lock_ (&lock) {
 678       if (lock_) lock_->lock_shared();
 679     }
 680
 681     // atomically unlock the write-lock from writer and acquire the read-lock
 682     explicit ReadHolder(WriteHolder *writer) : lock_(nullptr) {
 683       std::swap(this->lock_, writer->lock_);
 684       if (lock_) {
 685         lock_->unlock_and_lock_shared();
 686       }
 687     }
 688
 689     ~ReadHolder() {
 690       if (lock_) lock_->unlock_shared();
 691     }
 692
 693     void reset(RWSpinLock *lock = nullptr) {
 694       if (lock_) lock_->unlock_shared();
 695       lock_ = lock;
 696       if (lock_) lock_->lock_shared();
 697     }
 698
 699     void swap(ReadHolder *other) {
 700       std::swap(this->lock_, other->lock_);
 701     }
 702
 703    private:
 704     RWSpinLock *lock_;
 705   };
 706
 707   class WriteHolder : boost::noncopyable {
 708    public:
 709     explicit WriteHolder(RWSpinLock *lock = nullptr) : lock_(lock) {
 710       if (lock_) lock_->lock();
 711     }
 712     explicit WriteHolder(RWSpinLock &lock) : lock_ (&lock) {
 713       if (lock_) lock_->lock();
 714     }
 715
 716     ~WriteHolder() {
 717       if (lock_) lock_->unlock();
 718     }
 719
 720     void reset(RWSpinLock *lock = nullptr) {
 721       if (lock == lock_) return;
 722       if (lock_) lock_->unlock();
 723       lock_ = lock;
 724       if (lock_) lock_->lock();
 725     }
 726
 727     void swap(WriteHolder *other) {
 728       std::swap(this->lock_, other->lock_);
 729     }
 730
 731    private:
 732     friend class ReadHolder;
 733     RWSpinLock *lock_;
 734   };
 735
 736   // Synchronized<> adaptors.
 737   friend void acquireRead(RWTicketSpinLockT& mutex) {
 738     mutex.lock_shared();
 739   }
 740   friend void acquireReadWrite(RWTicketSpinLockT& mutex) {
 741     mutex.lock();
 742   }
 743   friend bool acquireReadWrite(RWTicketSpinLockT& mutex,
 744                                unsigned int milliseconds) {
 745     mutex.lock();
 746     return true;
 747   }
 748   friend void releaseRead(RWTicketSpinLockT& mutex) {
 749     mutex.unlock_shared();
 750   }
 751   friend void releaseReadWrite(RWTicketSpinLockT& mutex) {
 752     mutex.unlock();
 753   }
 754 };
 755
 756 typedef RWTicketSpinLockT<32> RWTicketSpinLock32;
 757 typedef RWTicketSpinLockT<64> RWTicketSpinLock64;
 758
 759 #endif  // RW_SPINLOCK_USE_X86_INTRINSIC_
 760
 761 }  // namespace folly
 762
 763 #ifdef RW_SPINLOCK_USE_X86_INTRINSIC_
 764 #undef RW_SPINLOCK_USE_X86_INTRINSIC_
 765 #endif
 766
 767 #endif  // FOLLY_RWSPINLOCK_H_