2 * Copyright 2017-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include <folly/AtomicStruct.h>
19 #include <folly/PackedSyncPtr.h>
20 #include <folly/concurrency/detail/AtomicSharedPtr-detail.h>
21 #include <folly/synchronization/detail/AtomicUtils.h>
26 * This is an implementation of the std::atomic_shared_ptr TS
27 * http://en.cppreference.com/w/cpp/experimental/atomic_shared_ptr
28 * https://isocpp.org/files/papers/N4162.pdf
30 * AFAIK, the only other implementation is Anthony Williams from
31 * Just::thread library:
33 * https://bitbucket.org/anthonyw/atomic_shared_ptr
35 * implementation details:
37 * Basically, three things need to be atomically exchanged to make this work:
39 * * the pointer to the control block
40 * * the aliased pointer, if any.
42 * The Williams version does it with DWcas: 32 bits for local count, 64
43 * bits for control block ptr, and he changes the shared_ptr
44 * implementation to also store the aliased pointers using a linked list
45 * like structure, and provides 32-bit index accessors to them (like
46 * IndexedMemPool trick).
48 * This version instead stores the 48 bits of address, plus 16 bits of
49 * local count in a single 8byte pointer. This avoids 'lock cmpxchg16b',
50 * which is much slower than 'lock xchg' in the normal 'store' case. In
51 * the less-common aliased pointer scenaro, we just allocate it in a new
52 * block, and store a pointer to that instead.
54 * Note that even if we only want to use the 3-bits of pointer alignment,
55 * this trick should still work - Any more than 4 concurrent accesses
56 * will have to go to an external map count instead (slower, but lots of
57 * concurrent access will be slow anyway due to bouncing cachelines).
59 * As a perf optimization, we currently batch up local count and only
60 * move it global every once in a while. This means load() is usually
61 * only a single atomic operation, instead of 3. For this trick to work,
62 * we probably need at least 8 bits to make batching worth it.
65 // A note on noexcept: If the pointer is an aliased pointer,
66 // store() will allocate. Otherwise is noexcept.
71 template <typename> class Atom = std::atomic,
72 typename CountedDetail = detail::shared_ptr_internals>
73 class atomic_shared_ptr {
74 using SharedPtr = typename CountedDetail::template CountedPtr<T>;
75 using BasePtr = typename CountedDetail::counted_base;
76 using PackedPtr = folly::PackedSyncPtr<BasePtr>;
79 atomic_shared_ptr() noexcept {
82 explicit atomic_shared_ptr(SharedPtr foo) /* noexcept */
83 : atomic_shared_ptr() {
84 store(std::move(foo));
86 atomic_shared_ptr(const atomic_shared_ptr<T>&) = delete;
88 ~atomic_shared_ptr() {
89 store(SharedPtr(nullptr));
91 void operator=(SharedPtr desired) /* noexcept */ {
92 store(std::move(desired));
94 void operator=(const atomic_shared_ptr<T>&) = delete;
96 bool is_lock_free() const noexcept {
97 // lock free unless more than EXTERNAL_OFFSET threads are
98 // contending and they all get unlucky and scheduled out during
101 // TODO: Could use a lock-free external map to fix this
106 SharedPtr load(std::memory_order order = std::memory_order_seq_cst) const
108 auto local = takeOwnedBase(order);
109 return get_shared_ptr(local, false);
112 /* implicit */ operator SharedPtr() const {
118 std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
119 auto newptr = get_newptr(std::move(n));
120 auto old = ptr_.exchange(newptr, order);
121 release_external(old);
126 std::memory_order order = std::memory_order_seq_cst) /* noexcept */ {
127 auto newptr = get_newptr(std::move(n));
128 auto old = ptr_.exchange(newptr, order);
133 old_ptr = get_shared_ptr(old);
134 release_external(old);
140 bool compare_exchange_weak(
143 std::memory_order mo = std::memory_order_seq_cst) noexcept {
144 return compare_exchange_weak(
145 expected, n, mo, detail::default_failure_memory_order(mo));
147 bool compare_exchange_weak(
150 std::memory_order success,
151 std::memory_order failure) /* noexcept */ {
152 auto newptr = get_newptr(n);
153 PackedPtr oldptr, expectedptr;
155 oldptr = takeOwnedBase(success);
156 if (!owners_eq(oldptr, CountedDetail::get_counted_base(expected))) {
157 expected = get_shared_ptr(oldptr, false);
158 release_external(newptr);
161 expectedptr = oldptr; // Need oldptr to release if failed
162 if (ptr_.compare_exchange_weak(expectedptr, newptr, success, failure)) {
164 release_external(oldptr, -1);
169 expected = get_shared_ptr(oldptr, false);
171 expected = SharedPtr(nullptr);
173 release_external(newptr);
177 bool compare_exchange_weak(
180 std::memory_order mo = std::memory_order_seq_cst) noexcept {
181 return compare_exchange_weak(
182 expected, desired, mo, detail::default_failure_memory_order(mo));
184 bool compare_exchange_weak(
187 std::memory_order success,
188 std::memory_order failure) /* noexcept */ {
189 return compare_exchange_weak(expected, desired, success, failure);
191 bool compare_exchange_strong(
194 std::memory_order mo = std::memory_order_seq_cst) noexcept {
195 return compare_exchange_strong(
196 expected, n, mo, detail::default_failure_memory_order(mo));
198 bool compare_exchange_strong(
201 std::memory_order success,
202 std::memory_order failure) /* noexcept */ {
203 auto local_expected = expected;
205 if (compare_exchange_weak(expected, n, success, failure)) {
208 } while (local_expected == expected);
212 bool compare_exchange_strong(
215 std::memory_order mo = std::memory_order_seq_cst) noexcept {
216 return compare_exchange_strong(
217 expected, desired, mo, detail::default_failure_memory_order(mo));
219 bool compare_exchange_strong(
222 std::memory_order success,
223 std::memory_order failure) /* noexcept */ {
224 return compare_exchange_strong(expected, desired, success, failure);
228 // Matches packed_sync_pointer. Must be > max number of local
229 // counts. This is the max number of threads that can access this
230 // atomic_shared_ptr at once before we start blocking.
231 static constexpr unsigned EXTERNAL_OFFSET{0x2000};
232 // Bit signifying aliased constructor
233 static constexpr unsigned ALIASED_PTR{0x4000};
235 mutable AtomicStruct<PackedPtr, Atom> ptr_;
237 void add_external(BasePtr* res, int64_t c = 0) const {
239 CountedDetail::inc_shared_count(res, EXTERNAL_OFFSET + c);
241 void release_external(PackedPtr& res, int64_t c = 0) const {
245 int64_t count = get_local_count(res) + c;
246 int64_t diff = EXTERNAL_OFFSET - count;
248 CountedDetail::template release_shared<T>(res.get(), diff);
250 PackedPtr get_newptr(const SharedPtr& n) const {
256 newval = CountedDetail::get_counted_base(n);
257 if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
258 // This is an aliased sharedptr. Make an un-aliased one
259 // by wrapping in *another* shared_ptr.
260 auto data = CountedDetail::template make_ptr<SharedPtr>(n);
261 newval = CountedDetail::get_counted_base(data);
263 // (add external must happen before data goes out of scope)
264 add_external(newval);
266 add_external(newval);
271 newptr.init(newval, count);
275 PackedPtr get_newptr(SharedPtr&& n) const {
281 newval = CountedDetail::get_counted_base(n);
282 if (n.get() != CountedDetail::template get_shared_ptr<T>(newval)) {
283 // This is an aliased sharedptr. Make an un-aliased one
284 // by wrapping in *another* shared_ptr.
285 auto data = CountedDetail::template make_ptr<SharedPtr>(std::move(n));
286 newval = CountedDetail::get_counted_base(data);
288 CountedDetail::release_ptr(data);
289 add_external(newval, -1);
291 CountedDetail::release_ptr(n);
292 add_external(newval, -1);
297 newptr.init(newval, count);
307 unsigned int get_local_count(const PackedPtr& p) const {
308 return p.extra() & ~ALIASED_PTR;
311 // Check pointer equality considering wrapped aliased pointers.
312 bool owners_eq(PackedPtr& p1, BasePtr* p2) {
313 bool aliased1 = p1.extra() & ALIASED_PTR;
315 auto p1a = CountedDetail::template get_shared_ptr_from_counted_base<T>(
317 return CountedDetail::get_counted_base(p1a) == p2;
319 return p1.get() == p2;
322 SharedPtr get_shared_ptr(const PackedPtr& p, bool inc = true) const {
323 bool aliased = p.extra() & ALIASED_PTR;
325 auto res = CountedDetail::template get_shared_ptr_from_counted_base<T>(
329 CountedDetail::template get_shared_ptr_from_counted_base<SharedPtr>(
336 /* Get a reference to the pointer, either from the local batch or
337 * from the global count.
339 * return is the base ptr, and the previous local count, if it is
340 * needed for compare_and_swap later.
342 PackedPtr takeOwnedBase(std::memory_order order) const noexcept {
343 PackedPtr local, newlocal;
344 local = ptr_.load(std::memory_order_acquire);
350 if (get_local_count(newlocal) + 1 > EXTERNAL_OFFSET) {
351 // spinlock in the rare case we have more than
352 // EXTERNAL_OFFSET threads trying to access at once.
353 std::this_thread::yield();
354 // Force DeterministicSchedule to choose a different thread
355 local = ptr_.load(std::memory_order_acquire);
357 newlocal.setExtra(newlocal.extra() + 1);
358 assert(get_local_count(newlocal) > 0);
359 if (ptr_.compare_exchange_weak(local, newlocal, order)) {
365 // Check if we need to push a batch from local -> global
366 auto batchcount = EXTERNAL_OFFSET / 2;
367 if (get_local_count(newlocal) > batchcount) {
368 CountedDetail::inc_shared_count(newlocal.get(), batchcount);
369 putOwnedBase(newlocal.get(), batchcount, order);
375 void putOwnedBase(BasePtr* p, unsigned int count, std::memory_order mo) const
377 PackedPtr local = ptr_.load(std::memory_order_acquire);
379 if (local.get() != p) {
382 auto newlocal = local;
383 if (get_local_count(local) > count) {
384 newlocal.setExtra(local.extra() - count);
386 // Otherwise it may be the same pointer, but someone else won
387 // the compare_exchange below, local count was already made
388 // global. We decrement the global count directly instead of
392 if (ptr_.compare_exchange_weak(local, newlocal, mo)) {
397 CountedDetail::template release_shared<T>(p, count);