From a4fc31fc259c144457a3e81e55f4e4160692d6b9 Mon Sep 17 00:00:00 2001 From: Nathan Bronson Date: Fri, 24 Jul 2015 08:20:45 -0700 Subject: [PATCH] improvements to DeterministicSchedule Summary: This diff isolates the CacheLocality back end for DeterministicSchedule from one invocation to the next (the last one was deterministic across the entire program but not per-DeterministicSchedule instance), and makes it easy to do extensive tracing of memory accesses during a deterministic test. These changes were made while tracking down a bug, but don't fix any bugs on their own (and in fact don't affect any production code at all). Reviewed By: @yfeldblum Differential Revision: D1842390 --- folly/test/DeterministicSchedule.cpp | 187 +++++++++++++++------------ folly/test/DeterministicSchedule.h | 141 ++++++++++++++------ 2 files changed, 207 insertions(+), 121 deletions(-) diff --git a/folly/test/DeterministicSchedule.cpp b/folly/test/DeterministicSchedule.cpp index 1b6e9a2e..42cf1929 100644 --- a/folly/test/DeterministicSchedule.cpp +++ b/folly/test/DeterministicSchedule.cpp @@ -23,21 +23,22 @@ #include #include -namespace folly { namespace test { +namespace folly { +namespace test { FOLLY_TLS sem_t* DeterministicSchedule::tls_sem; FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched; +FOLLY_TLS unsigned DeterministicSchedule::tls_threadId; // access is protected by futexLock static std::unordered_map*, - std::list>> futexQueues; + std::list>> futexQueues; static std::mutex futexLock; DeterministicSchedule::DeterministicSchedule( - const std::function& scheduler) - : scheduler_(scheduler) -{ + const std::function& scheduler) + : scheduler_(scheduler), nextThreadId_(1) { assert(tls_sem == nullptr); assert(tls_sched == nullptr); @@ -55,8 +56,7 @@ DeterministicSchedule::~DeterministicSchedule() { beforeThreadExit(); } -std::function -DeterministicSchedule::uniform(long seed) { +std::function DeterministicSchedule::uniform(long seed) { auto rand = std::make_shared(seed); return [rand](size_t numActive) { auto dist = std::uniform_int_distribution(0, numActive - 1); @@ -66,12 +66,10 @@ DeterministicSchedule::uniform(long seed) { struct UniformSubset { UniformSubset(long seed, int subsetSize, int stepsBetweenSelect) - : uniform_(DeterministicSchedule::uniform(seed)) - , subsetSize_(subsetSize) - , stepsBetweenSelect_(stepsBetweenSelect) - , stepsLeft_(0) - { - } + : uniform_(DeterministicSchedule::uniform(seed)), + subsetSize_(subsetSize), + stepsBetweenSelect_(stepsBetweenSelect), + stepsLeft_(0) {} size_t operator()(size_t numActive) { adjustPermSize(numActive); @@ -93,8 +91,9 @@ struct UniformSubset { void adjustPermSize(size_t numActive) { if (perm_.size() > numActive) { - perm_.erase(std::remove_if(perm_.begin(), perm_.end(), - [=](size_t x){ return x >= numActive; }), perm_.end()); + perm_.erase(std::remove_if(perm_.begin(), perm_.end(), [=](size_t x) { + return x >= numActive; + }), perm_.end()); } else { while (perm_.size() < numActive) { perm_.push_back(perm_.size()); @@ -111,21 +110,20 @@ struct UniformSubset { } }; -std::function -DeterministicSchedule::uniformSubset(long seed, int n, int m) { +std::function DeterministicSchedule::uniformSubset(long seed, + int n, + int m) { auto gen = std::make_shared(seed, n, m); return [=](size_t numActive) { return (*gen)(numActive); }; } -void -DeterministicSchedule::beforeSharedAccess() { +void DeterministicSchedule::beforeSharedAccess() { if (tls_sem) { sem_wait(tls_sem); } } -void -DeterministicSchedule::afterSharedAccess() { +void DeterministicSchedule::afterSharedAccess() { auto sched = tls_sched; if (!sched) { return; @@ -134,16 +132,29 @@ DeterministicSchedule::afterSharedAccess() { sem_post(sched->sems_[sched->scheduler_(sched->sems_.size())]); } -int -DeterministicSchedule::getRandNumber(int n) { +int DeterministicSchedule::getRandNumber(int n) { if (tls_sched) { return tls_sched->scheduler_(n); } return std::rand() % n; } -sem_t* -DeterministicSchedule::beforeThreadCreate() { +int DeterministicSchedule::getcpu(unsigned* cpu, unsigned* node, void* unused) { + if (!tls_threadId && tls_sched) { + beforeSharedAccess(); + tls_threadId = tls_sched->nextThreadId_++; + afterSharedAccess(); + } + if (cpu) { + *cpu = tls_threadId; + } + if (node) { + *node = tls_threadId; + } + return 0; +} + +sem_t* DeterministicSchedule::beforeThreadCreate() { sem_t* s = new sem_t; sem_init(s, 0, 0); beforeSharedAccess(); @@ -152,8 +163,7 @@ DeterministicSchedule::beforeThreadCreate() { return s; } -void -DeterministicSchedule::afterThreadCreate(sem_t* sem) { +void DeterministicSchedule::afterThreadCreate(sem_t* sem) { assert(tls_sem == nullptr); assert(tls_sched == nullptr); tls_sem = sem; @@ -168,13 +178,13 @@ DeterministicSchedule::afterThreadCreate(sem_t* sem) { } } -void -DeterministicSchedule::beforeThreadExit() { +void DeterministicSchedule::beforeThreadExit() { assert(tls_sched == this); beforeSharedAccess(); sems_.erase(std::find(sems_.begin(), sems_.end(), tls_sem)); active_.erase(std::this_thread::get_id()); if (sems_.size() > 0) { + FOLLY_TEST_DSCHED_VLOG("exiting"); afterSharedAccess(); } sem_destroy(tls_sem); @@ -183,67 +193,73 @@ DeterministicSchedule::beforeThreadExit() { tls_sched = nullptr; } -void -DeterministicSchedule::join(std::thread& child) { +void DeterministicSchedule::join(std::thread& child) { auto sched = tls_sched; if (sched) { bool done = false; while (!done) { beforeSharedAccess(); done = !sched->active_.count(child.get_id()); + if (done) { + FOLLY_TEST_DSCHED_VLOG("joined " << std::hex << child.get_id()); + } afterSharedAccess(); } } child.join(); } -void -DeterministicSchedule::post(sem_t* sem) { +void DeterministicSchedule::post(sem_t* sem) { beforeSharedAccess(); sem_post(sem); + FOLLY_TEST_DSCHED_VLOG("sem_post(" << sem << ")"); afterSharedAccess(); } -bool -DeterministicSchedule::tryWait(sem_t* sem) { +bool DeterministicSchedule::tryWait(sem_t* sem) { beforeSharedAccess(); int rv = sem_trywait(sem); + int e = rv == 0 ? 0 : errno; + FOLLY_TEST_DSCHED_VLOG("sem_trywait(" << sem << ") = " << rv + << " errno=" << e); afterSharedAccess(); if (rv == 0) { return true; } else { - assert(errno == EAGAIN); + assert(e == EAGAIN); return false; } } -void -DeterministicSchedule::wait(sem_t* sem) { +void DeterministicSchedule::wait(sem_t* sem) { while (!tryWait(sem)) { // we're not busy waiting because this is a deterministic schedule } } +} +} -}} - -namespace folly { namespace detail { +namespace folly { +namespace detail { using namespace test; using namespace std::chrono; template <> -FutexResult -Futex::futexWaitImpl( - uint32_t expected, - time_point* absSystemTimeout, - time_point* absSteadyTimeout, - uint32_t waitMask) { +FutexResult Futex::futexWaitImpl( + uint32_t expected, + time_point* absSystemTimeout, + time_point* absSteadyTimeout, + uint32_t waitMask) { bool hasTimeout = absSystemTimeout != nullptr || absSteadyTimeout != nullptr; bool awoken = false; FutexResult result = FutexResult::AWOKEN; int futexErrno = 0; DeterministicSchedule::beforeSharedAccess(); + FOLLY_TEST_DSCHED_VLOG(this << ".futexWait(" << std::hex << expected + << ", .., " << std::hex << waitMask + << ") beginning.."); futexLock.lock(); if (data == expected) { auto& queue = futexQueues[this]; @@ -260,17 +276,16 @@ Futex::futexWaitImpl( // a 10% probability if we haven't been woken up already if (!awoken && hasTimeout && DeterministicSchedule::getRandNumber(100) < 10) { - assert(futexQueues.count(this) != 0 && - &futexQueues[this] == &queue); + assert(futexQueues.count(this) != 0 && &futexQueues[this] == &queue); queue.erase(ours); if (queue.empty()) { futexQueues.erase(this); } // Simulate ETIMEDOUT 90% of the time and other failures // remaining time - result = - DeterministicSchedule::getRandNumber(100) >= 10 - ? FutexResult::TIMEDOUT : FutexResult::INTERRUPTED; + result = DeterministicSchedule::getRandNumber(100) >= 10 + ? FutexResult::TIMEDOUT + : FutexResult::INTERRUPTED; break; } } @@ -278,13 +293,31 @@ Futex::futexWaitImpl( result = FutexResult::VALUE_CHANGED; } futexLock.unlock(); + + char const* resultStr = "?"; + switch (result) { + case FutexResult::AWOKEN: + resultStr = "AWOKEN"; + break; + case FutexResult::TIMEDOUT: + resultStr = "TIMEDOUT"; + break; + case FutexResult::INTERRUPTED: + resultStr = "INTERRUPTED"; + break; + case FutexResult::VALUE_CHANGED: + resultStr = "VALUE_CHANGED"; + break; + } + FOLLY_TEST_DSCHED_VLOG(this << ".futexWait(" << std::hex << expected + << ", .., " << std::hex << waitMask << ") -> " + << resultStr); DeterministicSchedule::afterSharedAccess(); return result; } -template<> -int -Futex::futexWake(int count, uint32_t wakeMask) { +template <> +int Futex::futexWake(int count, uint32_t wakeMask) { int rv = 0; DeterministicSchedule::beforeSharedAccess(); futexLock.lock(); @@ -304,44 +337,36 @@ Futex::futexWake(int count, uint32_t wakeMask) { } } futexLock.unlock(); + FOLLY_TEST_DSCHED_VLOG(this << ".futexWake(" << count << ", " << std::hex + << wakeMask << ") -> " << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - -template<> +template <> CacheLocality const& CacheLocality::system() { static CacheLocality cache(CacheLocality::uniform(16)); return cache; } -template<> -test::DeterministicAtomic - SequentialThreadId::prevId(0); - -template<> -FOLLY_TLS size_t - SequentialThreadId::currentId(0); - -template<> +template <> const AccessSpreader -AccessSpreader::stripeByCore( - CacheLocality::system<>().numCachesByLevel.front()); + AccessSpreader::stripeByCore( + CacheLocality::system<>().numCachesByLevel.front()); -template<> +template <> const AccessSpreader -AccessSpreader::stripeByChip( - CacheLocality::system<>().numCachesByLevel.back()); - -template<> -AccessSpreaderArray -AccessSpreaderArray::sharedInstance = {}; + AccessSpreader::stripeByChip( + CacheLocality::system<>().numCachesByLevel.back()); +template <> +AccessSpreaderArray + AccessSpreaderArray::sharedInstance = {}; -template<> -Getcpu::Func -AccessSpreader::pickGetcpuFunc(size_t numStripes) { - return &SequentialThreadId::getcpu; +template <> +Getcpu::Func AccessSpreader::pickGetcpuFunc( + size_t numStripes) { + return &DeterministicSchedule::getcpu; +} +} } - -}} diff --git a/folly/test/DeterministicSchedule.h b/folly/test/DeterministicSchedule.h index 3fda7321..25cd80e6 100644 --- a/folly/test/DeterministicSchedule.h +++ b/folly/test/DeterministicSchedule.h @@ -25,12 +25,23 @@ #include #include #include +#include #include #include #include -namespace folly { namespace test { +namespace folly { +namespace test { + +// This is ugly, but better perf for DeterministicAtomic translates +// directly to more states explored and tested +#define FOLLY_TEST_DSCHED_VLOG(msg...) \ + do { \ + if (false) { \ + VLOG(2) << std::hex << std::this_thread::get_id() << ": " << msg; \ + } \ + } while (false) /** * DeterministicSchedule coordinates the inter-thread communication of a @@ -80,7 +91,8 @@ class DeterministicSchedule : boost::noncopyable { * runnable thread. The subset is chosen with size n, and the choice * is made every m steps. */ - static std::function uniformSubset(long seed, int n = 2, + static std::function uniformSubset(long seed, + int n = 2, int m = 64); /** Obtains permission for the current thread to perform inter-thread @@ -99,13 +111,23 @@ class DeterministicSchedule : boost::noncopyable { auto sched = tls_sched; auto sem = sched ? sched->beforeThreadCreate() : nullptr; auto child = std::thread([=](Args... a) { - if (sched) sched->afterThreadCreate(sem); - SCOPE_EXIT { if (sched) sched->beforeThreadExit(); }; + if (sched) { + sched->afterThreadCreate(sem); + beforeSharedAccess(); + FOLLY_TEST_DSCHED_VLOG("running"); + afterSharedAccess(); + } + SCOPE_EXIT { + if (sched) { + sched->beforeThreadExit(); + } + }; func(a...); }, args...); if (sched) { beforeSharedAccess(); sched->active_.insert(child.get_id()); + FOLLY_TEST_DSCHED_VLOG("forked " << std::hex << child.get_id()); afterSharedAccess(); } return child; @@ -128,20 +150,24 @@ class DeterministicSchedule : boost::noncopyable { * not set-up it falls back to std::rand() */ static int getRandNumber(int n); + /** Deterministic implemencation of getcpu */ + static int getcpu(unsigned* cpu, unsigned* node, void* unused); + private: static FOLLY_TLS sem_t* tls_sem; static FOLLY_TLS DeterministicSchedule* tls_sched; + static FOLLY_TLS unsigned tls_threadId; std::function scheduler_; std::vector sems_; std::unordered_set active_; + unsigned nextThreadId_; sem_t* beforeThreadCreate(); void afterThreadCreate(sem_t*); void beforeThreadExit(); }; - /** * DeterministicAtomic is a drop-in replacement std::atomic that * cooperates with DeterministicSchedule. @@ -152,29 +178,33 @@ struct DeterministicAtomic { DeterministicAtomic() = default; ~DeterministicAtomic() = default; - DeterministicAtomic(DeterministicAtomic const &) = delete; - DeterministicAtomic& operator= (DeterministicAtomic const &) = delete; + DeterministicAtomic(DeterministicAtomic const&) = delete; + DeterministicAtomic& operator=(DeterministicAtomic const&) = delete; constexpr /* implicit */ DeterministicAtomic(T v) noexcept : data(v) {} - bool is_lock_free() const noexcept { - return data.is_lock_free(); - } + bool is_lock_free() const noexcept { return data.is_lock_free(); } bool compare_exchange_strong( - T& v0, T v1, - std::memory_order mo = std::memory_order_seq_cst) noexcept { + T& v0, T v1, std::memory_order mo = std::memory_order_seq_cst) noexcept { DeterministicSchedule::beforeSharedAccess(); + auto orig = v0; bool rv = data.compare_exchange_strong(v0, v1, mo); + FOLLY_TEST_DSCHED_VLOG(this << ".compare_exchange_strong(" << std::hex + << orig << ", " << std::hex << v1 << ") -> " + << rv << "," << std::hex << v0); DeterministicSchedule::afterSharedAccess(); return rv; } bool compare_exchange_weak( - T& v0, T v1, - std::memory_order mo = std::memory_order_seq_cst) noexcept { + T& v0, T v1, std::memory_order mo = std::memory_order_seq_cst) noexcept { DeterministicSchedule::beforeSharedAccess(); + auto orig = v0; bool rv = data.compare_exchange_weak(v0, v1, mo); + FOLLY_TEST_DSCHED_VLOG(this << ".compare_exchange_weak(" << std::hex << orig + << ", " << std::hex << v1 << ") -> " << rv + << "," << std::hex << v0); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -182,13 +212,16 @@ struct DeterministicAtomic { T exchange(T v, std::memory_order mo = std::memory_order_seq_cst) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = data.exchange(v, mo); + FOLLY_TEST_DSCHED_VLOG(this << ".exchange(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - /* implicit */ operator T () const noexcept { + /* implicit */ operator T() const noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = data; + FOLLY_TEST_DSCHED_VLOG(this << "() -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -196,13 +229,15 @@ struct DeterministicAtomic { T load(std::memory_order mo = std::memory_order_seq_cst) const noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = data.load(mo); + FOLLY_TEST_DSCHED_VLOG(this << ".load() -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator= (T v) noexcept { + T operator=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data = v); + FOLLY_TEST_DSCHED_VLOG(this << " = " << std::hex << v); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -210,40 +245,47 @@ struct DeterministicAtomic { void store(T v, std::memory_order mo = std::memory_order_seq_cst) noexcept { DeterministicSchedule::beforeSharedAccess(); data.store(v, mo); + FOLLY_TEST_DSCHED_VLOG(this << ".store(" << std::hex << v << ")"); DeterministicSchedule::afterSharedAccess(); } - T operator++ () noexcept { + T operator++() noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = ++data; + FOLLY_TEST_DSCHED_VLOG(this << " pre++ -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator++ (int postDummy) noexcept { + T operator++(int postDummy) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = data++; + FOLLY_TEST_DSCHED_VLOG(this << " post++ -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator-- () noexcept { + T operator--() noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = --data; + FOLLY_TEST_DSCHED_VLOG(this << " pre-- -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator-- (int postDummy) noexcept { + T operator--(int postDummy) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = data--; + FOLLY_TEST_DSCHED_VLOG(this << " post-- -> " << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator+= (T v) noexcept { + T operator+=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data += v); + FOLLY_TEST_DSCHED_VLOG(this << " += " << std::hex << v << " -> " << std::hex + << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -252,13 +294,17 @@ struct DeterministicAtomic { DeterministicSchedule::beforeSharedAccess(); T rv = data; data += v; + FOLLY_TEST_DSCHED_VLOG(this << ".fetch_add(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator-= (T v) noexcept { + T operator-=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data -= v); + FOLLY_TEST_DSCHED_VLOG(this << " -= " << std::hex << v << " -> " << std::hex + << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -267,13 +313,17 @@ struct DeterministicAtomic { DeterministicSchedule::beforeSharedAccess(); T rv = data; data -= v; + FOLLY_TEST_DSCHED_VLOG(this << ".fetch_sub(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator&= (T v) noexcept { + T operator&=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data &= v); + FOLLY_TEST_DSCHED_VLOG(this << " &= " << std::hex << v << " -> " << std::hex + << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -282,13 +332,17 @@ struct DeterministicAtomic { DeterministicSchedule::beforeSharedAccess(); T rv = data; data &= v; + FOLLY_TEST_DSCHED_VLOG(this << ".fetch_and(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator|= (T v) noexcept { + T operator|=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data |= v); + FOLLY_TEST_DSCHED_VLOG(this << " |= " << std::hex << v << " -> " << std::hex + << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -297,13 +351,17 @@ struct DeterministicAtomic { DeterministicSchedule::beforeSharedAccess(); T rv = data; data |= v; + FOLLY_TEST_DSCHED_VLOG(this << ".fetch_or(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } - T operator^= (T v) noexcept { + T operator^=(T v) noexcept { DeterministicSchedule::beforeSharedAccess(); T rv = (data ^= v); + FOLLY_TEST_DSCHED_VLOG(this << " ^= " << std::hex << v << " -> " << std::hex + << rv); DeterministicSchedule::afterSharedAccess(); return rv; } @@ -312,29 +370,32 @@ struct DeterministicAtomic { DeterministicSchedule::beforeSharedAccess(); T rv = data; data ^= v; + FOLLY_TEST_DSCHED_VLOG(this << ".fetch_xor(" << std::hex << v << ") -> " + << std::hex << rv); DeterministicSchedule::afterSharedAccess(); return rv; } }; - -}} // namespace folly::test +} +} // namespace folly::test /* Specialization declarations */ -namespace folly { namespace detail { +namespace folly { +namespace detail { -template<> +template <> int Futex::futexWake(int count, uint32_t wakeMask); -template<> +template <> FutexResult Futex::futexWaitImpl( - uint32_t expected, - std::chrono::time_point* absSystemTime, - std::chrono::time_point* absSteadyTime, - uint32_t waitMask); - -template<> -Getcpu::Func -AccessSpreader::pickGetcpuFunc(size_t numStripes); - -}} // namespace folly::detail + uint32_t expected, + std::chrono::time_point* absSystemTime, + std::chrono::time_point* absSteadyTime, + uint32_t waitMask); + +template <> +Getcpu::Func AccessSpreader::pickGetcpuFunc( + size_t numStripes); +} +} // namespace folly::detail -- 2.34.1