From 3c1182171f38b6506bd818e95ae24636b1cd0ca0 Mon Sep 17 00:00:00 2001 From: Maged Michael Date: Fri, 2 Jun 2017 11:16:04 -0700 Subject: [PATCH] Hazard pointers: Optimize memory order, add bulk reclamation control, add stats, add implementation switches, add benchmarks. Summary: Make the implementation partially performant by optimizing memory order and using asymmetric memory barrier for full fences. Also added more control for bulk reclamation implementation, stats, and quality of implementation switches. Reviewed By: djwatson Differential Revision: D5129614 fbshipit-source-id: c597edf711fdc8f16f80523bd8cae42c51fbe140 --- .../hazptr/bench/HazptrBench-Amb.cpp | 50 +++++ .../hazptr/bench/HazptrBench-NoAmb.cpp | 50 +++++ folly/experimental/hazptr/bench/HazptrBench.h | 138 ++++++++++++ folly/experimental/hazptr/debug.h | 6 +- folly/experimental/hazptr/hazptr-impl.h | 197 ++++++++++++++---- folly/experimental/hazptr/hazptr.h | 7 +- folly/experimental/hazptr/test/HazptrTest.cpp | 6 +- 7 files changed, 410 insertions(+), 44 deletions(-) create mode 100644 folly/experimental/hazptr/bench/HazptrBench-Amb.cpp create mode 100644 folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp create mode 100644 folly/experimental/hazptr/bench/HazptrBench.h diff --git a/folly/experimental/hazptr/bench/HazptrBench-Amb.cpp b/folly/experimental/hazptr/bench/HazptrBench-Amb.cpp new file mode 100644 index 00000000..70a887f9 --- /dev/null +++ b/folly/experimental/hazptr/bench/HazptrBench-Amb.cpp @@ -0,0 +1,50 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define HAZPTR_AMB true + +#include +#include +#include + +using namespace folly::hazptr; + +int nthreads; +int size; + +BENCHMARK(amb, iters) { + run_once(nthreads, size, iters); +} + +BENCHMARK(amb_dup, iters) { + run_once(nthreads, size, iters); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + std::cout << "------------------------------------------------------ AMB\n"; + for (int i : nthr) { + nthreads = i; + for (int j : sizes) { + size = j; + std::cout << i << " threads -- " << j << "-item list" << std::endl; + bench("amb ", i, j, 0); + bench("amb - dup ", i, j, 0); + } + } + std::cout << "----------------------------------------------------------\n"; +} diff --git a/folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp b/folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp new file mode 100644 index 00000000..fada77fe --- /dev/null +++ b/folly/experimental/hazptr/bench/HazptrBench-NoAmb.cpp @@ -0,0 +1,50 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#define HAZPTR_AMB false + +#include +#include +#include + +using namespace folly::hazptr; + +int nthreads; +int size; + +BENCHMARK(no_amb, iters) { + run_once(nthreads, size, iters); +} + +BENCHMARK(no_amb_dup, iters) { + run_once(nthreads, size, iters); +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + std::cout << "--------------------------------------------------- No AMB\n"; + for (int i : nthr) { + nthreads = i; + for (int j : sizes) { + size = j; + std::cout << i << " threads -- " << j << "-item list" << std::endl; + bench("no amb ", i, j, 0); + bench("no amb - dup ", i, j, 0); + } + } + std::cout << "----------------------------------------------------------\n"; +} diff --git a/folly/experimental/hazptr/bench/HazptrBench.h b/folly/experimental/hazptr/bench/HazptrBench.h new file mode 100644 index 00000000..a545996e --- /dev/null +++ b/folly/experimental/hazptr/bench/HazptrBench.h @@ -0,0 +1,138 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include + +#include +#include + +namespace folly { +namespace hazptr { + +inline uint64_t run_once(int nthreads, int size, int ops) { + folly::BenchmarkSuspender susp; + SWMRListSet s; + std::atomic start{false}; + std::atomic started{0}; + + for (int i = 0; i < size; ++i) { + s.add(i); + } + + std::vector threads(nthreads); + for (int tid = 0; tid < nthreads; ++tid) { + threads[tid] = std::thread([&, tid] { + started.fetch_add(1); + while (!start.load()) + /* spin */; + + for (int j = tid; j < ops; j += nthreads) { + s.contains(j); + } + }); + } + + while (started.load() < nthreads) + /* spin */; + + // begin time measurement + auto tbegin = std::chrono::steady_clock::now(); + susp.dismiss(); + start.store(true); + + for (auto& t : threads) { + t.join(); + } + + susp.rehire(); + // end time measurement + uint64_t duration = 0; + auto tend = std::chrono::steady_clock::now(); + duration = std::chrono::duration_cast(tend - tbegin) + .count(); + return duration; +} + +inline uint64_t bench(std::string name, int nthreads, int size, uint64_t base) { + int reps = 10; + int ops = 100000; + uint64_t min = UINTMAX_MAX; + uint64_t max = 0; + uint64_t sum = 0; + + run_once(nthreads, size, ops); // sometimes first run is outlier + for (int r = 0; r < reps; ++r) { + uint64_t dur = run_once(nthreads, size, ops); + sum += dur; + min = std::min(min, dur); + max = std::max(max, dur); + } + + uint64_t avg = sum / reps; + uint64_t res = min; + std::cout << name; + std::cout << " " << std::setw(4) << max / ops << " ns"; + std::cout << " " << std::setw(4) << avg / ops << " ns"; + std::cout << " " << std::setw(4) << res / ops << " ns"; + if (base) { + std::cout << " " << std::setw(3) << 100 * base / res << "%"; + } + std::cout << std::endl; + return res; +} + +const int nthr[] = {1, 10}; +const int sizes[] = {10, 100}; +const std::string header = "Test_name, Max time, Avg time, Min time"; + +} // namespace folly { +} // namespace hazptr { + +/* +--------------------------------------------------- No AMB +1 threads -- 10-item list +no amb 210 ns 204 ns 202 ns +no amb - dup 213 ns 207 ns 203 ns +1 threads -- 100-item list +no amb 1862 ns 1810 ns 1778 ns +no amb - dup 1791 ns 1785 ns 1777 ns +10 threads -- 10-item list +no amb 227 ns 161 ns 143 ns +no amb - dup 145 ns 144 ns 143 ns +10 threads -- 100-item list +no amb 520 ns 518 ns 515 ns +no amb - dup 684 ns 536 ns 516 ns +---------------------------------------------------------- +------------------------------------------------------ AMB +1 threads -- 10-item list +amb 48 ns 46 ns 45 ns +amb - dup 47 ns 45 ns 45 ns +1 threads -- 100-item list +amb 242 ns 236 ns 234 ns +amb - dup 243 ns 238 ns 234 ns +10 threads -- 10-item list +amb 226 ns 130 ns 109 ns +amb - dup 161 ns 115 ns 109 ns +10 threads -- 100-item list +amb 192 ns 192 ns 191 ns +amb - dup 416 ns 324 ns 192 ns +---------------------------------------------------------- + */ diff --git a/folly/experimental/hazptr/debug.h b/folly/experimental/hazptr/debug.h index b723ae1d..671e6515 100644 --- a/folly/experimental/hazptr/debug.h +++ b/folly/experimental/hazptr/debug.h @@ -17,11 +17,13 @@ #include -#define DO_DEBUG true +#ifndef HAZPTR_DEBUG +#define HAZPTR_DEBUG false +#endif #define DEBUG_PRINT(...) \ do { \ - if (DO_DEBUG) { \ + if (HAZPTR_DEBUG) { \ VLOG(2) << __func__ << " --- " << __VA_ARGS__; \ } \ } while (false) diff --git a/folly/experimental/hazptr/hazptr-impl.h b/folly/experimental/hazptr/hazptr-impl.h index 50874df7..bcf1b5e8 100644 --- a/folly/experimental/hazptr/hazptr-impl.h +++ b/folly/experimental/hazptr/hazptr-impl.h @@ -19,6 +19,30 @@ #error "This should only be included by hazptr.h" #endif +/* quality of implementation switches */ + +// NOTE: The #ifndef pattern is prone to ODR violation. Its use for +// quality of implementation options is temporary. Eventually these +// options should be added to the API in future API extensions. + +#ifndef HAZPTR_AMB +#define HAZPTR_AMB true +#endif + +#ifndef HAZPTR_SCAN_MULT +#define HAZPTR_SCAN_MULT 2 +#endif + +#ifndef HAZPTR_SCAN_THRESHOLD +#define HAZPTR_SCAN_THRESHOLD 1000 +#endif + +/* stats switch */ +#ifndef HAZPTR_STATS +#define HAZPTR_STATS false +#endif + +#include #include #include @@ -26,10 +50,32 @@ namespace folly { namespace hazptr { +/** + * helper classes and functions + */ + +/** hazptr_stats */ + +class hazptr_stats; +hazptr_stats& hazptr_stats(); + +/** hazptr_mb */ + +class hazptr_mb { + public: + static void light(); + static void heavy(); +}; + +/** + * public functions + */ + /** hazptr_domain */ -constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept - : mr_(mr) {} +constexpr hazptr_domain::hazptr_domain(memory_resource* mr) noexcept : mr_(mr) { + hazptr_stats(); +} /** hazptr_obj_base */ @@ -85,7 +131,8 @@ inline bool hazptr_owner::try_protect(T*& ptr, const A& src) noexcept { "Return type of A::load() must be T*"); DEBUG_PRINT(this << " " << ptr << " " << &src); set(ptr); - T* p = src.load(); + /*** Full fence ***/ hazptr_mb::light(); + T* p = src.load(std::memory_order_acquire); if (p != ptr) { ptr = p; clear(); @@ -100,7 +147,7 @@ inline T* hazptr_owner::get_protected(const A& src) noexcept { static_assert( std::is_same().load()), T*>::value, "Return type of A::load() must be T*"); - T* p = src.load(); + T* p = src.load(std::memory_order_relaxed); while (!try_protect(p, src)) {} DEBUG_PRINT(this << " " << p << " " << &src); return p; @@ -133,14 +180,11 @@ inline void swap(hazptr_owner& lhs, hazptr_owner& rhs) noexcept { lhs.swap(rhs); } -//////////////////////////////////////////////////////////////////////////////// -// Non-template part of implementation //////////////////////////////////////////////////////////////////////////////// // [TODO]: // - Thread caching of hazptr_rec-s // - Private storage of retired objects // - Control of reclamation (when and by whom) -// - Optimized memory order /** Definition of default_hazptr_domain() */ inline hazptr_domain& default_hazptr_domain() { @@ -153,23 +197,24 @@ inline hazptr_domain& default_hazptr_domain() { inline void hazptr_rec::set(const void* p) noexcept { DEBUG_PRINT(this << " " << p); - hazptr_.store(p); + hazptr_.store(p, std::memory_order_release); } inline const void* hazptr_rec::get() const noexcept { - DEBUG_PRINT(this << " " << hazptr_.load()); - return hazptr_.load(); + auto p = hazptr_.load(std::memory_order_acquire); + DEBUG_PRINT(this << " " << p); + return p; } inline void hazptr_rec::clear() noexcept { DEBUG_PRINT(this); - hazptr_.store(nullptr); + hazptr_.store(nullptr, std::memory_order_release); } inline void hazptr_rec::release() noexcept { DEBUG_PRINT(this); clear(); - active_.store(false); + active_.store(false, std::memory_order_release); } /** hazptr_obj */ @@ -196,27 +241,25 @@ inline hazptr_domain::~hazptr_domain() { } { /* free all hazptr_rec-s */ hazptr_rec* next; - for (auto p = hazptrs_.load(); p; p = next) { + for (auto p = hazptrs_.load(std::memory_order_acquire); p; p = next) { next = p->next_; mr_->deallocate(static_cast(p), sizeof(hazptr_rec)); } } } -inline void hazptr_domain::try_reclaim() { - DEBUG_PRINT(this); - rcount_.exchange(0); - bulkReclaim(); -} - inline hazptr_rec* hazptr_domain::hazptrAcquire() { hazptr_rec* p; hazptr_rec* next; - for (p = hazptrs_.load(); p; p = next) { + for (p = hazptrs_.load(std::memory_order_acquire); p; p = next) { next = p->next_; - bool active = p->active_.load(); + bool active = p->active_.load(std::memory_order_acquire); if (!active) { - if (p->active_.compare_exchange_weak(active, true)) { + if (p->active_.compare_exchange_weak( + active, + true, + std::memory_order_release, + std::memory_order_relaxed)) { DEBUG_PRINT(this << " " << p); return p; } @@ -226,10 +269,14 @@ inline hazptr_rec* hazptr_domain::hazptrAcquire() { if (p == nullptr) { return nullptr; } - p->active_.store(true); + p->active_.store(true, std::memory_order_relaxed); + p->next_ = hazptrs_.load(std::memory_order_acquire); do { - p->next_ = hazptrs_.load(); - if (hazptrs_.compare_exchange_weak(p->next_, p)) { + if (hazptrs_.compare_exchange_weak( + p->next_, + p, + std::memory_order_release, + std::memory_order_acquire)) { break; } } while (true); @@ -245,14 +292,21 @@ inline void hazptr_domain::hazptrRelease(hazptr_rec* p) noexcept { inline int hazptr_domain::pushRetired(hazptr_obj* head, hazptr_obj* tail, int count) { - tail->next_ = retired_.load(); - while (!retired_.compare_exchange_weak(tail->next_, head)) {} + /*** Full fence ***/ hazptr_mb::light(); + tail->next_ = retired_.load(std::memory_order_acquire); + while (!retired_.compare_exchange_weak( + tail->next_, + head, + std::memory_order_release, + std::memory_order_acquire)) { + } return rcount_.fetch_add(count); } inline void hazptr_domain::objRetire(hazptr_obj* p) { auto rcount = pushRetired(p, p, 1) + 1; - if (rcount >= kScanThreshold * hcount_.load()) { + if (rcount >= HAZPTR_SCAN_THRESHOLD && + rcount >= HAZPTR_SCAN_MULT * hcount_.load(std::memory_order_acquire)) { tryBulkReclaim(); } } @@ -260,12 +314,13 @@ inline void hazptr_domain::objRetire(hazptr_obj* p) { inline void hazptr_domain::tryBulkReclaim() { DEBUG_PRINT(this); do { - auto hcount = hcount_.load(); - auto rcount = rcount_.load(); - if (rcount < kScanThreshold * hcount) { + auto hcount = hcount_.load(std::memory_order_acquire); + auto rcount = rcount_.load(std::memory_order_acquire); + if (rcount < HAZPTR_SCAN_THRESHOLD || rcount < HAZPTR_SCAN_MULT * hcount) { return; } - if (rcount_.compare_exchange_weak(rcount, 0)) { + if (rcount_.compare_exchange_weak( + rcount, 0, std::memory_order_release, std::memory_order_relaxed)) { break; } } while (true); @@ -274,11 +329,12 @@ inline void hazptr_domain::tryBulkReclaim() { inline void hazptr_domain::bulkReclaim() { DEBUG_PRINT(this); - auto p = retired_.exchange(nullptr); - auto h = hazptrs_.load(); - std::unordered_set hs; + /*** Full fence ***/ hazptr_mb::heavy(); + auto p = retired_.exchange(nullptr, std::memory_order_acquire); + auto h = hazptrs_.load(std::memory_order_acquire); + std::unordered_set hs; // TODO lock-free alternative for (; h; h = h->next_) { - hs.insert(h->hazptr_.load()); + hs.insert(h->hazptr_.load(std::memory_order_relaxed)); } int rcount = 0; hazptr_obj* retired = nullptr; @@ -303,5 +359,74 @@ inline void hazptr_domain::bulkReclaim() { } } +/** hazptr_stats */ + +class hazptr_stats { + public: + ~hazptr_stats(); + void light(); + void heavy(); + void seq_cst(); + + private: + std::atomic light_{0}; + std::atomic heavy_{0}; + std::atomic seq_cst_{0}; +}; + +inline hazptr_stats::~hazptr_stats() { + DEBUG_PRINT(this << " light " << light_.load()); + DEBUG_PRINT(this << " heavy " << heavy_.load()); + DEBUG_PRINT(this << " seq_cst " << seq_cst_.load()); +} + +inline void hazptr_stats::light() { + if (HAZPTR_STATS) { + /* atomic */ ++light_; + } +} + +inline void hazptr_stats::heavy() { + if (HAZPTR_STATS) { + /* atomic */ ++heavy_; + } +} + +inline void hazptr_stats::seq_cst() { + if (HAZPTR_STATS) { + /* atomic */ ++seq_cst_; + } +} + +inline class hazptr_stats& hazptr_stats() { + static class hazptr_stats stats_; + DEBUG_PRINT(&stats_); + return stats_; +} + +/** hazptr_mb */ + +inline void hazptr_mb::light() { + DEBUG_PRINT(""); + if (HAZPTR_AMB) { + folly::asymmetricLightBarrier(); + hazptr_stats().light(); + } else { + atomic_thread_fence(std::memory_order_seq_cst); + hazptr_stats().seq_cst(); + } +} + +inline void hazptr_mb::heavy() { + DEBUG_PRINT(""); + if (HAZPTR_AMB) { + folly::asymmetricHeavyBarrier(); + hazptr_stats().heavy(); + } else { + atomic_thread_fence(std::memory_order_seq_cst); + hazptr_stats().seq_cst(); + } +} + } // namespace folly } // namespace hazptr diff --git a/folly/experimental/hazptr/hazptr.h b/folly/experimental/hazptr/hazptr.h index d06cefcb..fdc9901f 100644 --- a/folly/experimental/hazptr/hazptr.h +++ b/folly/experimental/hazptr/hazptr.h @@ -53,10 +53,8 @@ class hazptr_domain { private: template friend class hazptr_obj_base; - template friend class hazptr_owner; - - /** Constant -- May be changed to parameter in the future */ - enum { kScanThreshold = 3 }; + template + friend class hazptr_owner; memory_resource* mr_; std::atomic hazptrs_ = {nullptr}; @@ -70,7 +68,6 @@ class hazptr_domain { int pushRetired(hazptr_obj* head, hazptr_obj* tail, int count); void tryBulkReclaim(); void bulkReclaim(); - void try_reclaim(); }; /** Get the default hazptr_domain */ diff --git a/folly/experimental/hazptr/test/HazptrTest.cpp b/folly/experimental/hazptr/test/HazptrTest.cpp index 20505262..45452b58 100644 --- a/folly/experimental/hazptr/test/HazptrTest.cpp +++ b/folly/experimental/hazptr/test/HazptrTest.cpp @@ -13,6 +13,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ +#define HAZPTR_DEBUG true +#define HAZPTR_STATS true +#define HAZPTR_SCAN_THRESHOLD 10 + #include #include #include @@ -26,7 +30,7 @@ #include -DEFINE_int32(num_threads, 1, "Number of threads"); +DEFINE_int32(num_threads, 5, "Number of threads"); DEFINE_int64(num_reps, 1, "Number of test reps"); DEFINE_int64(num_ops, 10, "Number of ops or pairs of ops per rep"); -- 2.34.1