From 1d83c51f9df5c66b3a61048efbbe4ab3d97cf959 Mon Sep 17 00:00:00 2001 From: Maged Michael Date: Thu, 15 Jun 2017 10:43:26 -0700 Subject: [PATCH] Add thread caching of hazard pointers. Benchmarks. Minor fixes, optimizations, and refactoring. Summary: Added support for thread caching of hazard pointers. Added thread caching benchmarks. Removed function call from hazptr_domain constexpr constructor. Optimizations of memory order and code refactoring. Reviewed By: davidtgoldblatt Differential Revision: D5249070 fbshipit-source-id: 487fb23abccde228c3c726de4ac8e9f07bfa9498 --- .../hazptr/bench/HazptrBench-Amb-NoTc.cpp | 51 ++++ ...trBench-Amb.cpp => HazptrBench-Amb-Tc.cpp} | 7 +- .../hazptr/bench/HazptrBench-NoAmb-NoTc.cpp | 51 ++++ ...nch-NoAmb.cpp => HazptrBench-NoAmb-Tc.cpp} | 7 +- folly/experimental/hazptr/bench/HazptrBench.h | 66 ++++-- folly/experimental/hazptr/example/SWMRList.h | 21 +- folly/experimental/hazptr/hazptr-impl.h | 218 +++++++++++++++--- 7 files changed, 360 insertions(+), 61 deletions(-) create mode 100644 folly/experimental/hazptr/bench/HazptrBench-Amb-NoTc.cpp rename folly/experimental/hazptr/bench/{HazptrBench-Amb.cpp => HazptrBench-Amb-Tc.cpp} (90%) create mode 100644 folly/experimental/hazptr/bench/HazptrBench-NoAmb-NoTc.cpp rename folly/experimental/hazptr/bench/{HazptrBench-NoAmb.cpp => HazptrBench-NoAmb-Tc.cpp} (86%) diff --git a/folly/experimental/hazptr/bench/HazptrBench-Amb-NoTc.cpp b/folly/experimental/hazptr/bench/HazptrBench-Amb-NoTc.cpp new file mode 100644 index 00000000..17fc961e --- /dev/null +++ b/folly/experimental/hazptr/bench/HazptrBench-Amb-NoTc.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define HAZPTR_AMB true +#define HAZPTR_TC false + +#include +#include +#include + +using namespace folly::hazptr; + +int nthreads; +int size; + +BENCHMARK(amb, iters) { + run_once(nthreads, size, iters); +} + +BENCHMARK(amb_dup, iters) { + run_once(nthreads, size, iters); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + std::cout << "---------------------------------------------- AMB - No TC\n"; + for (int i : nthr) { + nthreads = i; + for (int j : sizes) { + size = j; + std::cout << i << " threads -- " << j << "-item list" << std::endl; + bench("amb - no tc ", i, j, 0); + bench("amb - no tc - dup ", i, j, 0); + } + } + std::cout << "----------------------------------------------------------\n"; +} diff --git a/folly/experimental/hazptr/bench/HazptrBench-Amb.cpp b/folly/experimental/hazptr/bench/HazptrBench-Amb-Tc.cpp similarity index 90% rename from folly/experimental/hazptr/bench/HazptrBench-Amb.cpp rename to folly/experimental/hazptr/bench/HazptrBench-Amb-Tc.cpp index 70a887f9..dea24c87 100644 --- a/folly/experimental/hazptr/bench/HazptrBench-Amb.cpp +++ b/folly/experimental/hazptr/bench/HazptrBench-Amb-Tc.cpp @@ -15,6 +15,7 @@ */ #define HAZPTR_AMB true +#define HAZPTR_TC true #include #include @@ -36,14 +37,14 @@ BENCHMARK(amb_dup, iters) { int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); gflags::ParseCommandLineFlags(&argc, &argv, true); - std::cout << "------------------------------------------------------ AMB\n"; + std::cout << "------------------------------------------------- AMB - TC\n"; for (int i : nthr) { nthreads = i; for (int j : sizes) { size = j; std::cout << i << " threads -- " << j << "-item list" << std::endl; - bench("amb ", i, j, 0); - bench("amb - dup ", i, j, 0); + bench("amb - tc ", i, j, 0); + bench("amb - tc - dup ", i, j, 0); } } std::cout << "----------------------------------------------------------\n"; diff --git a/folly/experimental/hazptr/bench/HazptrBench-NoAmb-NoTc.cpp b/folly/experimental/hazptr/bench/HazptrBench-NoAmb-NoTc.cpp new file mode 100644 index 00000000..db82693d --- /dev/null +++ b/folly/experimental/hazptr/bench/HazptrBench-NoAmb-NoTc.cpp @@ -0,0 +1,51 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define HAZPTR_AMB false +#define HAZPTR_TC false + +#include +#include +#include + +using namespace folly::hazptr; + +int nthreads; +int size; + +BENCHMARK(no_amb, iters) { + run_once(nthreads, size, iters); +} + +BENCHMARK(no_amb_dup, iters) { + run_once(nthreads, size, iters); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + std::cout << "------------------------------------------- No AMB - No Tc\n"; + for (int i : nthr) { + nthreads = i; + for (int j : sizes) { + size = j; + std::cout << i << " threads -- " << j << "-item list" << std::endl; + bench("no amb - no tc ", i, j, 0); + bench("no amb - no tc - dup ", i, j, 0); + } + } + std::cout << "----------------------------------------------------------\n"; +} diff --git a/folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp b/folly/experimental/hazptr/bench/HazptrBench-NoAmb-Tc.cpp similarity index 86% rename from folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp rename to folly/experimental/hazptr/bench/HazptrBench-NoAmb-Tc.cpp index fada77fe..4fb420b2 100644 --- a/folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp +++ b/folly/experimental/hazptr/bench/HazptrBench-NoAmb-Tc.cpp @@ -15,6 +15,7 @@ */ #define HAZPTR_AMB false +#define HAZPTR_TC true #include #include @@ -36,14 +37,14 @@ BENCHMARK(no_amb_dup, iters) { int main(int argc, char** argv) { testing::InitGoogleTest(&argc, argv); gflags::ParseCommandLineFlags(&argc, &argv, true); - std::cout << "--------------------------------------------------- No AMB\n"; + std::cout << "---------------------------------------------- No AMB - TC\n"; for (int i : nthr) { nthreads = i; for (int j : sizes) { size = j; std::cout << i << " threads -- " << j << "-item list" << std::endl; - bench("no amb ", i, j, 0); - bench("no amb - dup ", i, j, 0); + bench("no amb - tc ", i, j, 0); + bench("no amb - tc - dup ", i, j, 0); } } std::cout << "----------------------------------------------------------\n"; diff --git a/folly/experimental/hazptr/bench/HazptrBench.h b/folly/experimental/hazptr/bench/HazptrBench.h index a545996e..9499d051 100644 --- a/folly/experimental/hazptr/bench/HazptrBench.h +++ b/folly/experimental/hazptr/bench/HazptrBench.h @@ -33,6 +33,8 @@ inline uint64_t run_once(int nthreads, int size, int ops) { std::atomic start{false}; std::atomic started{0}; + hazptr_owner dummy_hptr[100]; + for (int i = 0; i < size; ++i) { s.add(i); } @@ -107,32 +109,60 @@ const std::string header = "Test_name, Max time, Avg time, Min time"; } // namespace hazptr { /* ---------------------------------------------------- No AMB +------------------------------------------- No AMB - No Tc +1 threads -- 10-item list +no amb - no tc 756 ns 688 ns 674 ns +no amb - no tc - dup 725 ns 688 ns 676 ns +1 threads -- 100-item list +no amb - no tc 2469 ns 2366 ns 2334 ns +no amb - no tc - dup 2404 ns 2353 ns 2328 ns +10 threads -- 10-item list +no amb - no tc 802 ns 764 ns 750 ns +no amb - no tc - dup 798 ns 776 ns 733 ns +10 threads -- 100-item list +no amb - no tc 2209 ns 2157 ns 2118 ns +no amb - no tc - dup 2266 ns 2152 ns 1993 ns +---------------------------------------------------------- +---------------------------------------------- AMB - No TC +1 threads -- 10-item list +amb - no tc 554 ns 538 ns 525 ns +amb - no tc - dup 540 ns 530 ns 524 ns +1 threads -- 100-item list +amb - no tc 731 ns 721 ns 715 ns +amb - no tc - dup 745 ns 724 ns 714 ns +10 threads -- 10-item list +amb - no tc 777 ns 717 ns 676 ns +amb - no tc - dup 726 ns 669 ns 638 ns +10 threads -- 100-item list +amb - no tc 1015 ns 985 ns 955 ns +amb - no tc - dup 1000 ns 978 ns 952 ns +---------------------------------------------------------- +---------------------------------------------- No AMB - TC 1 threads -- 10-item list -no amb 210 ns 204 ns 202 ns -no amb - dup 213 ns 207 ns 203 ns +no amb - tc 209 ns 203 ns 199 ns +no amb - tc - dup 210 ns 202 ns 196 ns 1 threads -- 100-item list -no amb 1862 ns 1810 ns 1778 ns -no amb - dup 1791 ns 1785 ns 1777 ns +no amb - tc 1872 ns 1849 ns 1840 ns +no amb - tc - dup 1902 ns 1865 ns 1838 ns 10 threads -- 10-item list -no amb 227 ns 161 ns 143 ns -no amb - dup 145 ns 144 ns 143 ns +no amb - tc 136 ns 50 ns 23 ns +no amb - tc - dup 178 ns 85 ns 23 ns 10 threads -- 100-item list -no amb 520 ns 518 ns 515 ns -no amb - dup 684 ns 536 ns 516 ns +no amb - tc 1594 ns 651 ns 201 ns +no amb - tc - dup 1492 ns 615 ns 203 ns ---------------------------------------------------------- ------------------------------------------------------- AMB +------------------------------------------------- AMB - TC 1 threads -- 10-item list -amb 48 ns 46 ns 45 ns -amb - dup 47 ns 45 ns 45 ns +amb - tc 45 ns 44 ns 44 ns +amb - tc - dup 46 ns 46 ns 45 ns 1 threads -- 100-item list -amb 242 ns 236 ns 234 ns -amb - dup 243 ns 238 ns 234 ns +amb - tc 256 ns 246 ns 240 ns +amb - tc - dup 242 ns 240 ns 238 ns 10 threads -- 10-item list -amb 226 ns 130 ns 109 ns -amb - dup 161 ns 115 ns 109 ns +amb - tc 120 ns 35 ns 13 ns +amb - tc - dup 104 ns 34 ns 9 ns 10 threads -- 100-item list -amb 192 ns 192 ns 191 ns -amb - dup 416 ns 324 ns 192 ns +amb - tc 267 ns 129 ns 49 ns +amb - tc - dup 766 ns 147 ns 42 ns ---------------------------------------------------------- */ diff --git a/folly/experimental/hazptr/example/SWMRList.h b/folly/experimental/hazptr/example/SWMRList.h index 6697fce8..1b4eaa6e 100644 --- a/folly/experimental/hazptr/example/SWMRList.h +++ b/folly/experimental/hazptr/example/SWMRList.h @@ -57,11 +57,11 @@ class SWMRListSet { /* Used by the single writer */ void locate_lower_bound(const T& v, std::atomic*& prev) const { - auto curr = prev->load(); + auto curr = prev->load(std::memory_order_relaxed); while (curr) { if (curr->elem_ >= v) break; prev = &(curr->next_); - curr = curr->next_.load(); + curr = curr->next_.load(std::memory_order_relaxed); } return; } @@ -81,7 +81,7 @@ class SWMRListSet { bool add(T v) { auto prev = &head_; locate_lower_bound(v, prev); - auto curr = prev->load(); + auto curr = prev->load(std::memory_order_relaxed); if (curr && curr->elem_ == v) return false; prev->store(new Node(std::move(v), curr)); return true; @@ -90,11 +90,13 @@ class SWMRListSet { bool remove(const T& v) { auto prev = &head_; locate_lower_bound(v, prev); - auto curr = prev->load(); + auto curr = prev->load(std::memory_order_relaxed); if (!curr || curr->elem_ != v) return false; Node *curr_next = curr->next_.load(); - prev->store(curr_next); // Patch up the actual list... - curr->next_.store(nullptr); // ...and only then null out the removed node. + // Patch up the actual list... + prev->store(curr_next, std::memory_order_release); + // ...and only then null out the removed node. + curr->next_.store(nullptr, std::memory_order_release); curr->retire(domain_); return true; } @@ -105,13 +107,14 @@ class SWMRListSet { hazptr_owner hptr_curr(domain_); while (true) { auto prev = &head_; - auto curr = prev->load(); + auto curr = prev->load(std::memory_order_acquire); while (true) { if (!curr) { return false; } if (!hptr_curr.try_protect(curr, *prev)) break; - auto next = curr->next_.load(); - if (prev->load() != curr) break; + auto next = curr->next_.load(std::memory_order_acquire); + if (prev->load(std::memory_order_acquire) != curr) + break; if (curr->elem_ == val) { return true; } else if (!(curr->elem_ < val)) { diff --git a/folly/experimental/hazptr/hazptr-impl.h b/folly/experimental/hazptr/hazptr-impl.h index bcf1b5e8..3c3254bc 100644 --- a/folly/experimental/hazptr/hazptr-impl.h +++ b/folly/experimental/hazptr/hazptr-impl.h @@ -29,6 +29,14 @@ #define HAZPTR_AMB true #endif +#ifndef HAZPTR_TC +#define HAZPTR_TC true +#endif + +#ifndef HAZPTR_TC_SIZE +#define HAZPTR_TC_SIZE 2 +#endif + #ifndef HAZPTR_SCAN_MULT #define HAZPTR_SCAN_MULT 2 #endif @@ -45,7 +53,8 @@ #include #include -#include +#include // for thread caching +#include // for hash set in bulk reclamation namespace folly { namespace hazptr { @@ -67,15 +76,44 @@ class hazptr_mb { static void heavy(); }; +/** hazptr_tc */ + +class hazptr_tc_entry { + friend class hazptr_tc; + std::atomic hprec_{nullptr}; + std::atomic domain_{nullptr}; + + public: + void fill(hazptr_rec* hprec, hazptr_domain* domain); + hazptr_rec* get(hazptr_domain* domain); + void invalidate(hazptr_rec* hprec); + void evict(); +}; + +class hazptr_tc { + hazptr_tc_entry tc_[HAZPTR_TC_SIZE]; + + public: + hazptr_tc(); + ~hazptr_tc(); + hazptr_rec* get(hazptr_domain* domain); + bool put(hazptr_rec* hprec, hazptr_domain* domain); +}; + +hazptr_tc& hazptr_tc(); + +std::mutex& hazptr_tc_lock(); + +using hazptr_tc_guard = std::lock_guard; + /** * public functions */ /** hazptr_domain */ -constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept : mr_(mr) { - hazptr_stats(); -} +constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept + : mr_(mr) {} /** hazptr_obj_base */ @@ -95,15 +133,19 @@ inline void hazptr_obj_base::retire(hazptr_domain& domain, D deleter) { class hazptr_rec { friend class hazptr_domain; + friend class hazptr_tc_entry; template friend class hazptr_owner; - std::atomic hazptr_ = {nullptr}; - hazptr_rec* next_ = {nullptr}; - std::atomic active_ = {false}; + std::atomic hazptr_{nullptr}; + hazptr_rec* next_{nullptr}; + std::atomic tc_{nullptr}; + std::atomic active_{false}; void set(const void* p) noexcept; const void* get() const noexcept; void clear() noexcept; + bool isActive() noexcept; + bool tryAcquire() noexcept; void release() noexcept; }; @@ -182,9 +224,9 @@ inline void swap(hazptr_owner& lhs, hazptr_owner& rhs) noexcept { //////////////////////////////////////////////////////////////////////////////// // [TODO]: -// - Thread caching of hazptr_rec-s // - Private storage of retired objects // - Control of reclamation (when and by whom) +// - End-to-end lock-free implementation /** Definition of default_hazptr_domain() */ inline hazptr_domain& default_hazptr_domain() { @@ -211,6 +253,21 @@ inline void hazptr_rec::clear() noexcept { hazptr_.store(nullptr, std::memory_order_release); } +inline bool hazptr_rec::isActive() noexcept { + return active_.load(std::memory_order_acquire); +} + +inline bool hazptr_rec::tryAcquire() noexcept { + bool active = isActive(); + if (!active && + active_.compare_exchange_strong( + active, true, std::memory_order_release, std::memory_order_relaxed)) { + DEBUG_PRINT(this); + return true; + } + return false; +} + inline void hazptr_rec::release() noexcept { DEBUG_PRINT(this); clear(); @@ -243,26 +300,36 @@ inline hazptr_domain::~hazptr_domain() { hazptr_rec* next; for (auto p = hazptrs_.load(std::memory_order_acquire); p; p = next) { next = p->next_; + if (HAZPTR_TC) { + if (p->isActive()) { + hazptr_tc_guard g(hazptr_tc_lock()); + if (p->isActive()) { + auto tc = p->tc_.load(std::memory_order_acquire); + DCHECK(tc != nullptr); + tc->invalidate(p); + } + } + } else { + CHECK(!p->isActive()); + } mr_->deallocate(static_cast(p), sizeof(hazptr_rec)); } } } inline hazptr_rec* hazptr_domain::hazptrAcquire() { + if (HAZPTR_TC) { + hazptr_rec* hprec = hazptr_tc().get(this); + if (hprec) { + return hprec; + } + } hazptr_rec* p; hazptr_rec* next; for (p = hazptrs_.load(std::memory_order_acquire); p; p = next) { next = p->next_; - bool active = p->active_.load(std::memory_order_acquire); - if (!active) { - if (p->active_.compare_exchange_weak( - active, - true, - std::memory_order_release, - std::memory_order_relaxed)) { - DEBUG_PRINT(this << " " << p); - return p; - } + if (p->tryAcquire()) { + return p; } } p = static_cast(mr_->allocate(sizeof(hazptr_rec))); @@ -271,21 +338,18 @@ inline hazptr_rec* hazptr_domain::hazptrAcquire() { } p->active_.store(true, std::memory_order_relaxed); p->next_ = hazptrs_.load(std::memory_order_acquire); - do { - if (hazptrs_.compare_exchange_weak( - p->next_, - p, - std::memory_order_release, - std::memory_order_acquire)) { - break; - } - } while (true); + while (!hazptrs_.compare_exchange_weak( + p->next_, p, std::memory_order_release, std::memory_order_acquire)) + /* keep trying */; auto hcount = hcount_.fetch_add(1); DEBUG_PRINT(this << " " << p << " " << sizeof(hazptr_rec) << " " << hcount); return p; } inline void hazptr_domain::hazptrRelease(hazptr_rec* p) noexcept { + if (HAZPTR_TC && hazptr_tc().put(p, this)) { + return; + } DEBUG_PRINT(this << " " << p); p->release(); } @@ -334,7 +398,7 @@ inline void hazptr_domain::bulkReclaim() { auto h = hazptrs_.load(std::memory_order_acquire); std::unordered_set hs; // TODO lock-free alternative for (; h; h = h->next_) { - hs.insert(h->hazptr_.load(std::memory_order_relaxed)); + hs.insert(h->get()); } int rcount = 0; hazptr_obj* retired = nullptr; @@ -428,5 +492,103 @@ inline void hazptr_mb::heavy() { } } +/** hazptr_tc - functions */ + +inline void hazptr_tc_entry::fill(hazptr_rec* hprec, hazptr_domain* domain) { + hprec_.store(hprec, std::memory_order_release); + domain_.store(domain, std::memory_order_release); + hprec->tc_.store(this, std::memory_order_release); + DEBUG_PRINT(this << " " << domain << " " << hprec); +} + +inline hazptr_rec* hazptr_tc_entry::get(hazptr_domain* domain) { + if (domain == domain_.load(std::memory_order_acquire)) { + auto hprec = hprec_.load(std::memory_order_acquire); + if (hprec) { + hprec_.store(nullptr, std::memory_order_release); + DEBUG_PRINT(this << " " << domain << " " << hprec); + return hprec; + } + } + return nullptr; +} + +inline void hazptr_tc_entry::invalidate(hazptr_rec* hprec) { + DCHECK_EQ(hprec, hprec_); + hprec_.store(nullptr, std::memory_order_release); + domain_.store(nullptr, std::memory_order_release); + hprec->tc_.store(nullptr, std::memory_order_relaxed); + hprec->release(); + DEBUG_PRINT(this << " " << hprec); +} + +inline void hazptr_tc_entry::evict() { + auto hprec = hprec_.load(std::memory_order_relaxed); + if (hprec) { + hazptr_tc_guard g(hazptr_tc_lock()); + hprec = hprec_.load(std::memory_order_relaxed); + if (hprec) { + invalidate(hprec); + } + } + DEBUG_PRINT(hprec); +} + +inline hazptr_tc::hazptr_tc() { + DEBUG_PRINT(this); +} + +inline hazptr_tc::~hazptr_tc() { + DEBUG_PRINT(this); + for (int i = 0; i < HAZPTR_TC_SIZE; ++i) { + tc_[i].evict(); + } +} + +inline hazptr_rec* hazptr_tc::get(hazptr_domain* domain) { + for (int i = 0; i < HAZPTR_TC_SIZE; ++i) { + auto hprec = tc_[i].get(domain); + if (hprec) { + DEBUG_PRINT(this << " " << domain << " " << hprec); + return hprec; + } + } + DEBUG_PRINT(this << " " << domain << " nullptr"); + return nullptr; +} + +inline bool hazptr_tc::put(hazptr_rec* hprec, hazptr_domain* domain) { + int other = HAZPTR_TC_SIZE; + for (int i = 0; i < HAZPTR_TC_SIZE; ++i) { + if (tc_[i].hprec_.load(std::memory_order_acquire) == nullptr) { + tc_[i].fill(hprec, domain); + DEBUG_PRINT(this << " " << i); + return true; + } + if (tc_[i].domain_.load(std::memory_order_relaxed) != domain) { + other = i; + } + } + if (other < HAZPTR_TC_SIZE) { + tc_[other].evict(); + tc_[other].fill(hprec, domain); + DEBUG_PRINT(this << " " << other); + return true; + } + return false; +} + +inline class hazptr_tc& hazptr_tc() { + static thread_local class hazptr_tc tc; + DEBUG_PRINT(&tc); + return tc; +} + +inline std::mutex& hazptr_tc_lock() { + static std::mutex m; + DEBUG_PRINT(&m); + return m; +} + } // namespace folly } // namespace hazptr -- 2.34.1