From: Tudor Bosman Date: Fri, 21 Jun 2013 03:14:35 +0000 (-0700) Subject: Speed up EventCount, now 64-bit only X-Git-Tag: v0.22.0~942 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=8949d553667b268698258fa41759bc655568d91b;p=folly.git Speed up EventCount, now 64-bit only Summary: Don't use two atomic variables when only one will suffice (otherwise, the increments in doNotify() and prepareWait() would require memory_model_seq_cst, because they need to globally order a store before a load, and no weaker model allows you to do that) Of course, this makes this 64-bit only, but I don't care. Test Plan: eventcount_test, tests outside of folly Reviewed By: delong.j@fb.com FB internal diff: D858963 --- diff --git a/folly/experimental/EventCount.h b/folly/experimental/EventCount.h index f7469b46..5a11a330 100644 --- a/folly/experimental/EventCount.h +++ b/folly/experimental/EventCount.h @@ -21,10 +21,14 @@ #include #include #include +#include #include #include #include +#include "folly/Bits.h" +#include "folly/Likely.h" + namespace folly { @@ -90,12 +94,12 @@ inline int futex(int* uaddr, int op, int val, const timespec* timeout, */ class EventCount { public: - EventCount() noexcept : epoch_(0), waiters_(0) { } + EventCount() noexcept : val_(0) { } class Key { friend class EventCount; - explicit Key(int e) noexcept : epoch_(e) { } - int epoch_; + explicit Key(uint32_t e) noexcept : epoch_(e) { } + uint32_t epoch_; }; void notify() noexcept; @@ -118,8 +122,28 @@ class EventCount { EventCount& operator=(const EventCount&) = delete; EventCount& operator=(EventCount&&) = delete; - std::atomic epoch_; - std::atomic waiters_; + // This requires 64-bit + static_assert(sizeof(int) == 4, "bad platform"); + static_assert(sizeof(uint32_t) == 4, "bad platform"); + static_assert(sizeof(uint64_t) == 8, "bad platform"); + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + static constexpr size_t kEpochOffset = 1; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + static constexpr size_t kEpochOffset = 0; // in units of sizeof(int) +#else +# error Your machine uses a weird endianness! +#endif + + // val_ stores the epoch in the most significant 32 bits and the + // waiter count in the least significant 32 bits. + std::atomic val_; + + static constexpr uint64_t kAddWaiter = uint64_t(1); + static constexpr uint64_t kSubWaiter = uint64_t(-1); + static constexpr size_t kEpochShift = 32; + static constexpr uint64_t kAddEpoch = uint64_t(1) << kEpochShift; + static constexpr uint64_t kWaiterMask = kAddEpoch - 1; }; inline void EventCount::notify() noexcept { @@ -131,31 +155,36 @@ inline void EventCount::notifyAll() noexcept { } inline void EventCount::doNotify(int n) noexcept { - // The order is important: epoch_ is incremented before waiters_ is checked. - // prepareWait() increments waiters_ before checking epoch_, so it is - // impossible to miss a wakeup. - ++epoch_; - if (waiters_ != 0) { - detail::futex(reinterpret_cast(&epoch_), FUTEX_WAKE, n, nullptr, - nullptr, 0); + uint64_t prev = val_.fetch_add(kAddEpoch, std::memory_order_acq_rel); + if (UNLIKELY(prev & kWaiterMask)) { + detail::futex(reinterpret_cast(&val_) + kEpochOffset, + FUTEX_WAKE, n, nullptr, nullptr, 0); } } inline EventCount::Key EventCount::prepareWait() noexcept { - ++waiters_; - return Key(epoch_); + uint64_t prev = val_.fetch_add(kAddWaiter, std::memory_order_acq_rel); + return Key(prev >> kEpochShift); } inline void EventCount::cancelWait() noexcept { - --waiters_; + // memory_order_relaxed would suffice for correctness, but the faster + // #waiters gets to 0, the less likely it is that we'll do spurious wakeups + // (and thus system calls). + uint64_t prev = val_.fetch_add(kSubWaiter, std::memory_order_seq_cst); + assert((prev & kWaiterMask) != 0); } inline void EventCount::wait(Key key) noexcept { - while (epoch_ == key.epoch_) { - detail::futex(reinterpret_cast(&epoch_), FUTEX_WAIT, key.epoch_, - nullptr, nullptr, 0); + while ((val_.load(std::memory_order_acquire) >> kEpochShift) == key.epoch_) { + detail::futex(reinterpret_cast(&val_) + kEpochOffset, + FUTEX_WAIT, key.epoch_, nullptr, nullptr, 0); } - --waiters_; + // memory_order_relaxed would suffice for correctness, but the faster + // #waiters gets to 0, the less likely it is that we'll do spurious wakeups + // (and thus system calls) + uint64_t prev = val_.fetch_add(kSubWaiter, std::memory_order_seq_cst); + assert((prev & kWaiterMask) != 0); } template