From bd0686484d15d1f36b04c23c2624bac49059220a Mon Sep 17 00:00:00 2001 From: Dave Watson Date: Mon, 3 Apr 2017 10:51:19 -0700 Subject: [PATCH] atomic_shared_ptr Summary: A (almost) lock-free atomic_shared_ptr, matching the proposed concurrency TS interface. See notes at top of file Reviewed By: davidtgoldblatt Differential Revision: D4716098 fbshipit-source-id: b9ca2443ba9e227ebb6f40807128073c6e14222a --- folly/AtomicStruct.h | 37 +- folly/Makefile.am | 3 + folly/PackedSyncPtr.h | 5 + folly/detail/AtomicUtils.h | 36 ++ folly/experimental/AtomicSharedPtr.h | 403 ++++++++++++++++++ .../detail/AtomicSharedPtr-detail.h | 174 ++++++++ .../test/AtomicSharedPtrCounted.h | 162 +++++++ .../experimental/test/AtomicSharedPtrTest.cpp | 173 ++++++++ folly/test/DeterministicSchedule.h | 29 +- 9 files changed, 1009 insertions(+), 13 deletions(-) create mode 100644 folly/detail/AtomicUtils.h create mode 100644 folly/experimental/AtomicSharedPtr.h create mode 100644 folly/experimental/detail/AtomicSharedPtr-detail.h create mode 100644 folly/experimental/test/AtomicSharedPtrCounted.h create mode 100644 folly/experimental/test/AtomicSharedPtrTest.cpp diff --git a/folly/AtomicStruct.h b/folly/AtomicStruct.h index 1fb151cb..d15d0d93 100644 --- a/folly/AtomicStruct.h +++ b/folly/AtomicStruct.h @@ -16,11 +16,12 @@ #pragma once -#include -#include #include -#include +#include #include +#include +#include +#include namespace folly { @@ -75,10 +76,19 @@ class AtomicStruct { } bool compare_exchange_strong( - T& v0, T v1, - std::memory_order mo = std::memory_order_seq_cst) noexcept { + T& v0, + T v1, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_strong( + v0, v1, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_strong( + T& v0, + T v1, + std::memory_order success, + std::memory_order failure) noexcept { Raw d0 = encode(v0); - bool rv = data.compare_exchange_strong(d0, encode(v1), mo); + bool rv = data.compare_exchange_strong(d0, encode(v1), success, failure); if (!rv) { v0 = decode(d0); } @@ -86,10 +96,19 @@ class AtomicStruct { } bool compare_exchange_weak( - T& v0, T v1, - std::memory_order mo = std::memory_order_seq_cst) noexcept { + T& v0, + T v1, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_weak( + v0, v1, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_weak( + T& v0, + T v1, + std::memory_order success, + std::memory_order failure) noexcept { Raw d0 = encode(v0); - bool rv = data.compare_exchange_weak(d0, encode(v1), mo); + bool rv = data.compare_exchange_weak(d0, encode(v1), success, failure); if (!rv) { v0 = decode(d0); } diff --git a/folly/Makefile.am b/folly/Makefile.am index aedd5989..94c8fe4d 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -55,6 +55,7 @@ nobase_follyinclude_HEADERS = \ CPortability.h \ detail/AtomicHashUtils.h \ detail/AtomicUnorderedMapUtils.h \ + detail/AtomicUtils.h \ detail/BitIteratorDetail.h \ detail/BitsDetail.h \ detail/CacheLocality.h \ @@ -95,6 +96,8 @@ nobase_follyinclude_HEADERS = \ Executor.h \ Expected.h \ experimental/AsymmetricMemoryBarrier.h \ + experimental/AtomicSharedPtr.h \ + experimental/detail/AtomicSharedPtr-detail.h \ experimental/AutoTimer.h \ experimental/Bits.h \ experimental/BitVectorCoding.h \ diff --git a/folly/PackedSyncPtr.h b/folly/PackedSyncPtr.h index 1289a436..64337674 100644 --- a/folly/PackedSyncPtr.h +++ b/folly/PackedSyncPtr.h @@ -144,4 +144,9 @@ static_assert(sizeof(PackedSyncPtr) == 8, "PackedSyncPtr should be only 8 bytes---something is " "messed up"); +template +std::ostream& operator<<(std::ostream& os, const PackedSyncPtr& ptr) { + os << "PackedSyncPtr(" << ptr.get() << ", " << ptr.extra() << ")"; + return os; +} } diff --git a/folly/detail/AtomicUtils.h b/folly/detail/AtomicUtils.h new file mode 100644 index 00000000..65d68cf6 --- /dev/null +++ b/folly/detail/AtomicUtils.h @@ -0,0 +1,36 @@ +/* + * Copyright 2017 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace folly { +namespace detail { + +inline std::memory_order default_failure_memory_order( + std::memory_order successMode) { + switch (successMode) { + case std::memory_order_acq_rel: + return std::memory_order_acquire; + case std::memory_order_release: + return std::memory_order_relaxed; + default: + return successMode; + } +} +} +} // namespace diff --git a/folly/experimental/AtomicSharedPtr.h b/folly/experimental/AtomicSharedPtr.h new file mode 100644 index 00000000..ad1f7d3d --- /dev/null +++ b/folly/experimental/AtomicSharedPtr.h @@ -0,0 +1,403 @@ +/* + * Copyright 2017-present Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +/* + * This is an implementation of the std::atomic_shared_ptr TS + * http://en.cppreference.com/w/cpp/experimental/atomic_shared_ptr + * https://isocpp.org/files/papers/N4162.pdf + * + * AFAIK, the only other implementation is Anthony Williams from + * Just::thread library: + * + * https://bitbucket.org/anthonyw/atomic_shared_ptr + * + * implementation details: + * + * Basically, three things need to be atomically exchanged to make this work: + * * the local count + * * the pointer to the control block + * * the aliased pointer, if any. + * + * The Williams version does it with DWcas: 32 bits for local count, 64 + * bits for control block ptr, and he changes the shared_ptr + * implementation to also store the aliased pointers using a linked list + * like structure, and provides 32-bit index accessors to them (like + * IndexedMemPool trick). + * + * This version instead stores the 48 bits of address, plus 16 bits of + * local count in a single 8byte pointer. This avoids 'lock cmpxchg16b', + * which is much slower than 'lock xchg' in the normal 'store' case. In + * the less-common aliased pointer scenaro, we just allocate it in a new + * block, and store a pointer to that instead. + * + * Note that even if we only want to use the 3-bits of pointer alignment, + * this trick should still work - Any more than 4 concurrent accesses + * will have to go to an external map count instead (slower, but lots of + * concurrent access will be slow anyway due to bouncing cachelines). + * + * As a perf optimization, we currently batch up local count and only + * move it global every once in a while. This means load() is usually + * only a single atomic operation, instead of 3. For this trick to work, + * we probably need at least 8 bits to make batching worth it. + */ + +// A note on noexcept: If the pointer is an aliased pointer, +// store() will allocate. Otherwise is noexcept. +namespace folly { + +template < + typename T, + template class Atom = std::atomic, + typename CountedDetail = detail::shared_ptr_internals> +class atomic_shared_ptr { + using SharedPtr = typename CountedDetail::template CountedPtr; + using BasePtr = typename CountedDetail::counted_base; + using PackedPtr = folly::PackedSyncPtr; + + public: + atomic_shared_ptr() noexcept { + init(); + } + explicit atomic_shared_ptr(SharedPtr foo) /* noexcept */ + : atomic_shared_ptr() { + store(foo); + } + atomic_shared_ptr(const atomic_shared_ptr&) = delete; + + ~atomic_shared_ptr() { + store(SharedPtr(nullptr)); + } + void operator=(SharedPtr desired) /* noexcept */ { + store(desired); + } + void operator=(const atomic_shared_ptr&) = delete; + + bool is_lock_free() const noexcept { + // lock free unless more than EXTERNAL_OFFSET threads are + // contending and they all get unlucky and scheduled out during + // load(). + // + // TODO: Could use a lock-free external map to fix this + // corner case. + return true; + } + + SharedPtr load(std::memory_order order = std::memory_order_seq_cst) const + noexcept { + auto local = takeOwnedBase(order); + + auto res = get_shared_ptr(local, false); + + return std::move(res); + } + /* implicit */ operator SharedPtr() const { + return load(); + } + + void store( + SharedPtr n, + std::memory_order order = std::memory_order_seq_cst) /* noexcept */ { + auto newptr = get_newptr(std::move(n)); + auto old = ptr_.exchange(newptr, order); + release_external(old); + } + + SharedPtr exchange( + SharedPtr n, + std::memory_order order = std::memory_order_seq_cst) /* noexcept */ { + auto newptr = get_newptr(std::move(n)); + auto old = ptr_.exchange(newptr, order); + + SharedPtr old_ptr; + + if (old.get()) { + old_ptr = get_shared_ptr(old); + release_external(old); + } + + return old_ptr; + } + + bool compare_exchange_weak( + SharedPtr& expected, + const SharedPtr& n, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_weak( + expected, n, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_weak( + SharedPtr& expected, + const SharedPtr& n, + std::memory_order success, + std::memory_order failure) /* noexcept */ { + auto newptr = get_newptr(n); + PackedPtr oldptr, expectedptr; + + oldptr = takeOwnedBase(success); + if (!owners_eq(oldptr, CountedDetail::get_counted_base(expected))) { + expected = get_shared_ptr(oldptr, false); + release_external(newptr); + return false; + } + expectedptr = oldptr; // Need oldptr to release if failed + if (ptr_.compare_exchange_weak(expectedptr, newptr, success, failure)) { + if (oldptr.get()) { + release_external(oldptr, -1); + } + return true; + } else { + if (oldptr.get()) { + expected = get_shared_ptr(oldptr, false); + } else { + expected = SharedPtr(nullptr); + } + release_external(newptr); + return false; + } + } + bool compare_exchange_weak( + SharedPtr& expected, + SharedPtr&& desired, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_weak( + expected, desired, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_weak( + SharedPtr& expected, + SharedPtr&& desired, + std::memory_order success, + std::memory_order failure) /* noexcept */ { + return compare_exchange_weak(expected, desired, success, failure); + } + bool compare_exchange_strong( + SharedPtr& expected, + const SharedPtr& n, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_strong( + expected, n, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_strong( + SharedPtr& expected, + const SharedPtr& n, + std::memory_order success, + std::memory_order failure) /* noexcept */ { + auto local_expected = expected; + do { + if (compare_exchange_weak(expected, n, success, failure)) { + return true; + } + } while (local_expected == expected); + + return false; + } + bool compare_exchange_strong( + SharedPtr& expected, + SharedPtr&& desired, + std::memory_order mo = std::memory_order_seq_cst) noexcept { + return compare_exchange_strong( + expected, desired, mo, detail::default_failure_memory_order(mo)); + } + bool compare_exchange_strong( + SharedPtr& expected, + SharedPtr&& desired, + std::memory_order success, + std::memory_order failure) /* noexcept */ { + return compare_exchange_strong(expected, desired, success, failure); + } + + private: + // Matches packed_sync_pointer. Must be > max number of local + // counts. This is the max number of threads that can access this + // atomic_shared_ptr at once before we start blocking. + static constexpr unsigned EXTERNAL_OFFSET{0x2000}; + // Bit signifying aliased constructor + static constexpr unsigned ALIASED_PTR{0x4000}; + + mutable AtomicStruct ptr_; + + void add_external(BasePtr* res, int64_t c = 0) const { + assert(res); + CountedDetail::inc_shared_count(res, EXTERNAL_OFFSET + c); + } + void release_external(PackedPtr& res, int64_t c = 0) const { + if (!res.get()) { + return; + } + int64_t count = get_local_count(res) + c; + int64_t diff = EXTERNAL_OFFSET - count; + assert(diff >= 0); + CountedDetail::template release_shared(res.get(), diff); + } + PackedPtr get_newptr(const SharedPtr& n) const { + BasePtr* newval; + unsigned count = 0; + if (!n) { + newval = nullptr; + } else { + newval = CountedDetail::get_counted_base(n); + if (n.get() != CountedDetail::template get_shared_ptr(newval)) { + // This is an aliased sharedptr. Make an un-aliased one + // by wrapping in *another* shared_ptr. + auto data = CountedDetail::template make_ptr(n); + newval = CountedDetail::get_counted_base(data); + count = ALIASED_PTR; + // (add external must happen before data goes out of scope) + add_external(newval); + } else { + add_external(newval); + } + } + + PackedPtr newptr; + newptr.init(newval, count); + + return newptr; + } + PackedPtr get_newptr(SharedPtr&& n) const { + BasePtr* newval; + unsigned count = 0; + if (!n) { + newval = nullptr; + } else { + newval = CountedDetail::get_counted_base(n); + if (n.get() != CountedDetail::template get_shared_ptr(newval)) { + // This is an aliased sharedptr. Make an un-aliased one + // by wrapping in *another* shared_ptr. + auto data = CountedDetail::template make_ptr(std::move(n)); + newval = CountedDetail::get_counted_base(data); + count = ALIASED_PTR; + CountedDetail::release_ptr(data); + add_external(newval, -1); + } else { + CountedDetail::release_ptr(n); + add_external(newval, -1); + } + } + + PackedPtr newptr; + newptr.init(newval, count); + + return newptr; + } + void init() { + PackedPtr data; + data.init(); + ptr_.store(data); + } + + unsigned int get_local_count(const PackedPtr& p) const { + return p.extra() & ~ALIASED_PTR; + } + + // Check pointer equality considering wrapped aliased pointers. + bool owners_eq(PackedPtr& p1, BasePtr* p2) { + bool aliased1 = p1.extra() & ALIASED_PTR; + if (aliased1) { + auto p1a = CountedDetail::template get_shared_ptr_from_counted_base( + p1.get(), false); + return CountedDetail::get_counted_base(p1a) == p2; + } + return p1.get() == p2; + } + + SharedPtr get_shared_ptr(const PackedPtr& p, bool inc = true) const { + bool aliased = p.extra() & ALIASED_PTR; + + auto res = CountedDetail::template get_shared_ptr_from_counted_base( + p.get(), inc); + if (aliased) { + auto aliasedp = + CountedDetail::template get_shared_ptr_from_counted_base( + p.get()); + res = *aliasedp; + } + return std::move(res); + } + + /* Get a reference to the pointer, either from the local batch or + * from the global count. + * + * return is the base ptr, and the previous local count, if it is + * needed for compare_and_swap later. + */ + PackedPtr takeOwnedBase(std::memory_order order) const noexcept { + PackedPtr local, newlocal; + local = ptr_.load(std::memory_order_acquire); + while (true) { + if (!local.get()) { + return local; + } + newlocal = local; + if (get_local_count(newlocal) + 1 > EXTERNAL_OFFSET) { + // spinlock in the rare case we have more than + // EXTERNAL_OFFSET threads trying to access at once. + std::this_thread::yield(); + // Force DeterministicSchedule to choose a different thread + local = ptr_.load(std::memory_order_acquire); + } else { + newlocal.setExtra(newlocal.extra() + 1); + assert(get_local_count(newlocal) > 0); + if (ptr_.compare_exchange_weak(local, newlocal, order)) { + break; + } + } + } + + // Check if we need to push a batch from local -> global + auto batchcount = EXTERNAL_OFFSET / 2; + if (get_local_count(newlocal) > batchcount) { + CountedDetail::inc_shared_count(newlocal.get(), batchcount); + putOwnedBase(newlocal.get(), batchcount, order); + } + + return newlocal; + } + + void putOwnedBase(BasePtr* p, unsigned int count, std::memory_order mo) const + noexcept { + PackedPtr local = ptr_.load(std::memory_order_acquire); + while (true) { + if (local.get() != p) { + break; + } + auto newlocal = local; + if (get_local_count(local) > count) { + newlocal.setExtra(local.extra() - count); + } else { + // Otherwise it may be the same pointer, but someone else won + // the compare_exchange below, local count was already made + // global. We decrement the global count directly instead of + // the local one. + break; + } + if (ptr_.compare_exchange_weak(local, newlocal, mo)) { + return; + } + } + + CountedDetail::template release_shared(p, count); + } +}; + +} // namespace folly diff --git a/folly/experimental/detail/AtomicSharedPtr-detail.h b/folly/experimental/detail/AtomicSharedPtr-detail.h new file mode 100644 index 00000000..bd416cb0 --- /dev/null +++ b/folly/experimental/detail/AtomicSharedPtr-detail.h @@ -0,0 +1,174 @@ +/* + * Copyright 2017-present Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once +#include +#include + +namespace folly { +namespace detail { + +class shared_ptr_internals { + public: + template + static std::shared_ptr make_ptr(Args&&... args) { + return std::make_shared(std::forward(args...)); + } + typedef std::__shared_count shared_count; + typedef std::_Sp_counted_base counted_base; + template + using CountedPtr = std::shared_ptr; + template + static counted_base* get_counted_base(const std::shared_ptr& bar) { + // reinterpret_pointer_cast + // Not quite C++ legal, but explicit template instantiation access to + // private members requires full type name (i.e. shared_ptr, not + // shared_ptr) + const std::shared_ptr& ptr( + reinterpret_cast&>(bar)); + return (ptr.*fieldPtr(access_shared_ptr{})).*fieldPtr(access_base{}); + } + + static void inc_shared_count(counted_base* base, long count) { + __gnu_cxx::__atomic_add_dispatch( + &(base->*fieldPtr(access_use_count{})), count); + } + + template + static void release_shared(counted_base* base, long count) { + // If count == 1, this is equivalent to base->_M_release() + if (__gnu_cxx::__exchange_and_add_dispatch( + &(base->*fieldPtr(access_use_count{})), -count) == count) { + base->_M_dispose(); + + if (__gnu_cxx::__exchange_and_add_dispatch( + &(base->*fieldPtr(access_weak_count{})), -1) == 1) { + base->_M_destroy(); + } + } + } + + template + static T* get_shared_ptr(counted_base* base) { + // See if this was a make_shared allocation + auto inplace = base->_M_get_deleter(typeid(std::_Sp_make_shared_tag)); + if (inplace) { + return (T*)inplace; + } + // Could also be a _Sp_counted_deleter, but the layout is the same + auto ptr = + static_cast*>(base); + return (T*)(ptr->*fieldPtr(access_counted_ptr_ptr{})); + } + + template + static T* release_ptr(std::shared_ptr& p) { + auto res = p.get(); + std::shared_ptr& ptr( + reinterpret_cast&>(p)); + ptr.*fieldPtr(access_shared_ptr_ptr{}) = nullptr; + (ptr.*fieldPtr(access_refcount{})).*fieldPtr(access_base{}) = nullptr; + return res; + } + + template + static std::shared_ptr get_shared_ptr_from_counted_base( + counted_base* base, + bool inc = true) { + if (!base) { + return nullptr; + } + std::shared_ptr newp; + if (inc) { + inc_shared_count(base, 1); + } + newp.*fieldPtr(access_shared_ptr_ptr{}) = + get_shared_ptr(base); // _M_ptr + (newp.*fieldPtr(access_refcount{})).*fieldPtr(access_base{}) = base; + // reinterpret_pointer_cast + auto res = reinterpret_cast*>(&newp); + return std::move(*res); + } + + private: + /* Accessors for private members using explicit template instantiation */ + struct access_shared_ptr { + typedef shared_count std::__shared_ptr::*type; + friend type fieldPtr(access_shared_ptr); + }; + + struct access_base { + typedef counted_base* shared_count::*type; + friend type fieldPtr(access_base); + }; + + struct access_use_count { + typedef _Atomic_word counted_base::*type; + friend type fieldPtr(access_use_count); + }; + + struct access_weak_count { + typedef _Atomic_word counted_base::*type; + friend type fieldPtr(access_weak_count); + }; + + struct access_counted_ptr_ptr { + typedef const void* std::_Sp_counted_ptr::* + type; + friend type fieldPtr(access_counted_ptr_ptr); + }; + + struct access_shared_ptr_ptr { + typedef const void* std::__shared_ptr::*type; + friend type fieldPtr(access_shared_ptr_ptr); + }; + + struct access_refcount { + typedef shared_count std::__shared_ptr::*type; + friend type fieldPtr(access_refcount); + }; + + template + struct Rob { + friend typename Tag::type fieldPtr(Tag) { + return M; + } + }; +}; + +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_shared_ptr, + &std::__shared_ptr::_M_refcount>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_base, + &shared_ptr_internals::shared_count::_M_pi>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_use_count, + &shared_ptr_internals::counted_base::_M_use_count>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_weak_count, + &shared_ptr_internals::counted_base::_M_weak_count>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_counted_ptr_ptr, + &std::_Sp_counted_ptr::_M_ptr>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_shared_ptr_ptr, + &std::__shared_ptr::_M_ptr>; +template struct shared_ptr_internals::Rob< + shared_ptr_internals::access_refcount, + &std::__shared_ptr::_M_refcount>; + +} // namespace detail +} // namespace folly diff --git a/folly/experimental/test/AtomicSharedPtrCounted.h b/folly/experimental/test/AtomicSharedPtrCounted.h new file mode 100644 index 00000000..35ec7400 --- /dev/null +++ b/folly/experimental/test/AtomicSharedPtrCounted.h @@ -0,0 +1,162 @@ +/* + * Copyright 2017-present Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +struct counted_shared_tag {}; +template