From 8091d7199edecafa678d009b82f04c36dd8ce9a7 Mon Sep 17 00:00:00 2001 From: Yedidya Feldblum Date: Wed, 20 Dec 2017 11:41:29 -0800 Subject: [PATCH] Kill FOLLY_ALIGNED etc Summary: [Folly] Kill `FOLLY_ALIGNED` etc. `alignas` is standardized as of C++11. Let us just use that. Replace: * `FOLLY_ALIGNED` with `alignas` * `FOLLY_ALIGNED_MAX` with `alignas(folly::max_align_v)` * `FOLLY_ALIGN_TO_AVOID_FALSE_SHARING` with `alignas(folly::hardware_destructive_interference_size)` Because where `alignas` may be placed is more restrictive than where attributes may be placed, we also need to move these directives in some cases on top of doing the replacement. Reviewed By: Orvid Differential Revision: D6555167 fbshipit-source-id: 4b05b570bace3f8c0fe810b6dd58781dd45757f4 --- folly/Conv.cpp | 8 ++++---- folly/Conv.h | 2 +- folly/IndexedMemPool.h | 7 ++++--- folly/MPMCQueue.h | 11 ++++++----- folly/Portability.h | 8 -------- folly/ProducerConsumerQueue.h | 6 ++++-- folly/SharedMutex.h | 15 +++++++-------- folly/TokenBucket.h | 2 +- folly/concurrency/CacheLocality.h | 6 ------ folly/concurrency/UnboundedQueue.h | 3 +-- .../detail/ConcurrentHashMap-detail.h | 2 +- folly/executors/IOThreadPoolExecutor.h | 3 ++- folly/executors/ThreadPoolExecutor.h | 3 ++- .../experimental/flat_combining/FlatCombining.h | 17 +++++++---------- .../flat_combining/test/FlatCombiningExamples.h | 3 +-- folly/experimental/hazptr/hazptr-impl.h | 3 +-- 16 files changed, 42 insertions(+), 57 deletions(-) diff --git a/folly/Conv.cpp b/folly/Conv.cpp index 8e2f5752..b15d44d9 100644 --- a/folly/Conv.cpp +++ b/folly/Conv.cpp @@ -86,7 +86,7 @@ template <> const char *const MaxString<__uint128_t>::value = // still not overflow uint16_t. constexpr int32_t OOR = 10000; -FOLLY_ALIGNED(16) constexpr uint16_t shift1[] = { +alignas(16) constexpr uint16_t shift1[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 @@ -115,7 +115,7 @@ FOLLY_ALIGNED(16) constexpr uint16_t shift1[] = { OOR, OOR, OOR, OOR, OOR, OOR // 250 }; -FOLLY_ALIGNED(16) constexpr uint16_t shift10[] = { +alignas(16) constexpr uint16_t shift10[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 @@ -144,7 +144,7 @@ FOLLY_ALIGNED(16) constexpr uint16_t shift10[] = { OOR, OOR, OOR, OOR, OOR, OOR // 250 }; -FOLLY_ALIGNED(16) constexpr uint16_t shift100[] = { +alignas(16) constexpr uint16_t shift100[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 @@ -173,7 +173,7 @@ FOLLY_ALIGNED(16) constexpr uint16_t shift100[] = { OOR, OOR, OOR, OOR, OOR, OOR // 250 }; -FOLLY_ALIGNED(16) constexpr uint16_t shift1000[] = { +alignas(16) constexpr uint16_t shift1000[] = { OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 0-9 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 10 OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, OOR, // 20 diff --git a/folly/Conv.h b/folly/Conv.h index a545816d..b37e1aa0 100644 --- a/folly/Conv.h +++ b/folly/Conv.h @@ -303,7 +303,7 @@ inline uint32_t digits10(uint64_t v) { // 10^i, defined for i 0 through 19. // This is 20 * 8 == 160 bytes, which fits neatly into 5 cache lines // (assuming a cache line size of 64). - static const uint64_t powersOf10[20] FOLLY_ALIGNED(64) = { + alignas(64) static const uint64_t powersOf10[20] = { 1, 10, 100, diff --git a/folly/IndexedMemPool.h b/folly/IndexedMemPool.h index 82ae6295..cab5cac1 100644 --- a/folly/IndexedMemPool.h +++ b/folly/IndexedMemPool.h @@ -351,7 +351,7 @@ struct IndexedMemPool : boost::noncopyable { } }; - struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING LocalList { + struct alignas(hardware_destructive_interference_size) LocalList { AtomicStruct head; LocalList() : head(TaggedPtr{}) {} @@ -377,7 +377,7 @@ struct IndexedMemPool : boost::noncopyable { /// raw storage, only 1..min(size_,actualCapacity_) (inclusive) are /// actually constructed. Note that slots_[0] is not constructed or used - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Slot* slots_; + alignas(hardware_destructive_interference_size) Slot* slots_; /// use AccessSpreader to find your list. We use stripes instead of /// thread-local to avoid the need to grow or shrink on thread start @@ -386,7 +386,8 @@ struct IndexedMemPool : boost::noncopyable { /// this is the head of a list of node chained by globalNext, that are /// themselves each the head of a list chained by localNext - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING AtomicStruct globalHead_; + alignas(hardware_destructive_interference_size) + AtomicStruct globalHead_; ///////////// private methods diff --git a/folly/MPMCQueue.h b/folly/MPMCQueue.h index 0db70fa0..932ae8fc 100644 --- a/folly/MPMCQueue.h +++ b/folly/MPMCQueue.h @@ -981,7 +981,7 @@ class MPMCQueueBase> : boost::noncopyable { }; /// The maximum number of items in the queue at once - size_t FOLLY_ALIGN_TO_AVOID_FALSE_SHARING capacity_; + alignas(hardware_destructive_interference_size) size_t capacity_; /// Anonymous union for use when Dynamic = false and true, respectively union { @@ -1014,18 +1014,19 @@ class MPMCQueueBase> : boost::noncopyable { Atom dcapacity_; /// Enqueuers get tickets from here - Atom FOLLY_ALIGN_TO_AVOID_FALSE_SHARING pushTicket_; + alignas(hardware_destructive_interference_size) Atom pushTicket_; /// Dequeuers get tickets from here - Atom FOLLY_ALIGN_TO_AVOID_FALSE_SHARING popTicket_; + alignas(hardware_destructive_interference_size) Atom popTicket_; /// This is how many times we will spin before using FUTEX_WAIT when /// the queue is full on enqueue, adaptively computed by occasionally /// spinning for longer and smoothing with an exponential moving average - Atom FOLLY_ALIGN_TO_AVOID_FALSE_SHARING pushSpinCutoff_; + alignas( + hardware_destructive_interference_size) Atom pushSpinCutoff_; /// The adaptive spin cutoff when the queue is empty on dequeue - Atom FOLLY_ALIGN_TO_AVOID_FALSE_SHARING popSpinCutoff_; + alignas(hardware_destructive_interference_size) Atom popSpinCutoff_; /// Alignment doesn't prevent false sharing at the end of the struct, /// so fill out the last cache line diff --git a/folly/Portability.h b/folly/Portability.h index 11eb8f82..b41893b5 100644 --- a/folly/Portability.h +++ b/folly/Portability.h @@ -34,14 +34,6 @@ constexpr bool kHasUnalignedAccess = false; // compiler specific attribute translation // msvc should come first, so if clang is in msvc mode it gets the right defines -#if defined(__clang__) || defined(__GNUC__) -# define FOLLY_ALIGNED(size) __attribute__((__aligned__(size))) -#elif defined(_MSC_VER) -# define FOLLY_ALIGNED(size) __declspec(align(size)) -#else -# error Cannot define FOLLY_ALIGNED on this platform -#endif - // NOTE: this will only do checking in msvc with versions that support /analyze #if _MSC_VER # ifdef _USE_ATTRIBUTES_FOR_SAL diff --git a/folly/ProducerConsumerQueue.h b/folly/ProducerConsumerQueue.h index 2a8f04c6..b020da84 100644 --- a/folly/ProducerConsumerQueue.h +++ b/folly/ProducerConsumerQueue.h @@ -177,8 +177,10 @@ struct ProducerConsumerQueue { const uint32_t size_; T* const records_; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic readIndex_; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic writeIndex_; + alignas(hardware_destructive_interference_size) + std::atomic readIndex_; + alignas(hardware_destructive_interference_size) + std::atomic writeIndex_; char pad1_[hardware_destructive_interference_size - sizeof(writeIndex_)]; }; diff --git a/folly/SharedMutex.h b/folly/SharedMutex.h index b6a1a263..ce160f80 100644 --- a/folly/SharedMutex.h +++ b/folly/SharedMutex.h @@ -738,9 +738,8 @@ class SharedMutexImpl { typedef Atom DeferredReaderSlot; private: - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING static DeferredReaderSlot deferredReaders - [kMaxDeferredReaders * - kDeferredSeparationFactor]; + alignas(hardware_destructive_interference_size) static DeferredReaderSlot + deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor]; // Performs an exclusive lock, waiting for state_ & waitMask to be // zero first @@ -1350,11 +1349,11 @@ template < typename Tag_, template class Atom, bool BlockImmediately> -typename SharedMutexImpl:: - DeferredReaderSlot - SharedMutexImpl:: - deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] = - {}; +alignas(hardware_destructive_interference_size) + typename SharedMutexImpl:: + DeferredReaderSlot + SharedMutexImpl:: + deferredReaders[kMaxDeferredReaders * kDeferredSeparationFactor] = {}; template < bool ReaderPriority, diff --git a/folly/TokenBucket.h b/folly/TokenBucket.h index 2abbfe37..7c18ea2c 100644 --- a/folly/TokenBucket.h +++ b/folly/TokenBucket.h @@ -221,7 +221,7 @@ class ParameterizedDynamicTokenBucket { return true; } - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic zeroTime_; + alignas(hardware_destructive_interference_size) std::atomic zeroTime_; }; /** diff --git a/folly/concurrency/CacheLocality.h b/folly/concurrency/CacheLocality.h index 420f5334..cd6c06e2 100644 --- a/folly/concurrency/CacheLocality.h +++ b/folly/concurrency/CacheLocality.h @@ -118,12 +118,6 @@ struct CacheLocality { static CacheLocality uniform(size_t numCpus); }; -// TODO replace with alignas(hardware_destructive_interference_size) - -/// An attribute that will cause a variable or field to be aligned so that -/// it doesn't have false sharing with anything at a smaller memory address. -#define FOLLY_ALIGN_TO_AVOID_FALSE_SHARING FOLLY_ALIGNED(128) - /// Knows how to derive a function pointer to the VDSO implementation of /// getcpu(2), if available struct Getcpu { diff --git a/folly/concurrency/UnboundedQueue.h b/folly/concurrency/UnboundedQueue.h index 221e73ce..1aa2f416 100644 --- a/folly/concurrency/UnboundedQueue.h +++ b/folly/concurrency/UnboundedQueue.h @@ -648,8 +648,7 @@ class UnboundedQueue { Atom next_; const Ticket min_; bool marked_; // used for iterative deletion - FOLLY_ALIGNED(Align) - Entry b_[SegmentSize]; + alignas(Align) Entry b_[SegmentSize]; public: explicit Segment(const Ticket t) diff --git a/folly/concurrency/detail/ConcurrentHashMap-detail.h b/folly/concurrency/detail/ConcurrentHashMap-detail.h index 51c1375b..99b1ee49 100644 --- a/folly/concurrency/detail/ConcurrentHashMap-detail.h +++ b/folly/concurrency/detail/ConcurrentHashMap-detail.h @@ -197,7 +197,7 @@ template < typename Allocator = std::allocator, template class Atom = std::atomic, class Mutex = std::mutex> -class FOLLY_ALIGNED(64) ConcurrentHashMapSegment { +class alignas(64) ConcurrentHashMapSegment { enum class InsertType { DOES_NOT_EXIST, // insert/emplace operations. If key exists, return false. MUST_EXIST, // assign operations. If key does not exist, return false. diff --git a/folly/executors/IOThreadPoolExecutor.h b/folly/executors/IOThreadPoolExecutor.h index b7c6688d..1d270cf5 100644 --- a/folly/executors/IOThreadPoolExecutor.h +++ b/folly/executors/IOThreadPoolExecutor.h @@ -73,7 +73,8 @@ class IOThreadPoolExecutor : public ThreadPoolExecutor, public IOExecutor { folly::EventBaseManager* getEventBaseManager(); private: - struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING IOThread : public Thread { + struct alignas(hardware_destructive_interference_size) IOThread + : public Thread { IOThread(IOThreadPoolExecutor* pool) : Thread(pool), shouldRun(true), pendingTasks(0) {} std::atomic shouldRun; diff --git a/folly/executors/ThreadPoolExecutor.h b/folly/executors/ThreadPoolExecutor.h index 0e5bcc17..38154a86 100644 --- a/folly/executors/ThreadPoolExecutor.h +++ b/folly/executors/ThreadPoolExecutor.h @@ -129,7 +129,8 @@ class ThreadPoolExecutor : public virtual folly::Executor { struct TaskStatsCallbackRegistry; - struct FOLLY_ALIGN_TO_AVOID_FALSE_SHARING Thread : public ThreadHandle { + struct alignas(hardware_destructive_interference_size) Thread + : public ThreadHandle { explicit Thread(ThreadPoolExecutor* pool) : id(nextId++), handle(), diff --git a/folly/experimental/flat_combining/FlatCombining.h b/folly/experimental/flat_combining/FlatCombining.h index bf19e235..bf6cc4e3 100644 --- a/folly/experimental/flat_combining/FlatCombining.h +++ b/folly/experimental/flat_combining/FlatCombining.h @@ -112,8 +112,8 @@ class FlatCombining { public: /// Combining request record. class Rec { - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING - folly::SaturatingSemaphore valid_; + alignas(hardware_destructive_interference_size) + folly::SaturatingSemaphore valid_; folly::SaturatingSemaphore done_; folly::SaturatingSemaphore disconnected_; size_t index_; @@ -421,23 +421,20 @@ class FlatCombining { const uint64_t kDefaultNumRecs = 64; const uint64_t kIdleThreshold = 10; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING - Mutex m_; + alignas(hardware_destructive_interference_size) Mutex m_; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING - folly::SaturatingSemaphore pending_; + alignas(hardware_destructive_interference_size) + folly::SaturatingSemaphore pending_; Atom shutdown_{false}; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING - uint32_t numRecs_; + alignas(hardware_destructive_interference_size) uint32_t numRecs_; uint32_t maxOps_; Atom recs_; bool dedicated_; std::thread combiner_; Pool recsPool_; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING - uint64_t uncombined_ = 0; + alignas(hardware_destructive_interference_size) uint64_t uncombined_ = 0; uint64_t combined_ = 0; uint64_t passes_ = 0; uint64_t sessions_ = 0; diff --git a/folly/experimental/flat_combining/test/FlatCombiningExamples.h b/folly/experimental/flat_combining/test/FlatCombiningExamples.h index fe6ab184..9e4b1f52 100644 --- a/folly/experimental/flat_combining/test/FlatCombiningExamples.h +++ b/folly/experimental/flat_combining/test/FlatCombiningExamples.h @@ -25,8 +25,7 @@ namespace folly { -struct Line { - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING +struct alignas(hardware_destructive_interference_size) Line { uint64_t val_; }; diff --git a/folly/experimental/hazptr/hazptr-impl.h b/folly/experimental/hazptr/hazptr-impl.h index 07564a43..07e6891d 100644 --- a/folly/experimental/hazptr/hazptr-impl.h +++ b/folly/experimental/hazptr/hazptr-impl.h @@ -265,12 +265,11 @@ inline bool hazptr_obj_base_refcounted::release_ref() { * hazptr_rec */ -class hazptr_rec { +class alignas(hardware_destructive_interference_size) hazptr_rec { friend class hazptr_domain; friend class hazptr_holder; friend struct hazptr_tc_entry; - FOLLY_ALIGN_TO_AVOID_FALSE_SHARING std::atomic hazptr_{nullptr}; hazptr_rec* next_{nullptr}; std::atomic active_{false}; -- 2.34.1