From ec06f66c17f2469fc62221259c66d6bb417f7692 Mon Sep 17 00:00:00 2001 From: Elizabeth Smith Date: Thu, 17 Apr 2014 07:49:10 -0700 Subject: [PATCH] abstract thread_local support Summary: change from using __thread to using FOLLY_THREAD_LOCAL macro, this will allow abstraction over gcc and msvc implementations of thread local (__thread and __declspec(thread)) which have the same semantices and will also allow drop in replacement of thread_local when compiler support for the feature is complete This doesn't do anything about apple, however, which still has broken __thread support This doesn't actually change any implementation for now, simply allows for correct compilation Test Plan: fbmake runtests Reviewed By: delong.j@fb.com FB internal diff: D1278726 --- folly/Portability.h | 12 ++++++++++++ folly/ThreadLocal.h | 3 ++- folly/detail/CacheLocality.cpp | 2 +- folly/detail/CacheLocality.h | 4 ++-- folly/detail/MemoryIdler.cpp | 4 ++-- folly/detail/ThreadLocalDetail.h | 5 +++-- .../exception_tracer/ExceptionTracerLib.cpp | 6 +++--- folly/test/CacheLocalityTest.cpp | 2 +- folly/test/DeterministicSchedule.cpp | 7 ++++--- folly/test/DeterministicSchedule.h | 4 ++-- folly/test/MPMCQueueTest.cpp | 4 ++-- folly/test/ThreadCachedIntTest.cpp | 4 ++-- 12 files changed, 36 insertions(+), 21 deletions(-) diff --git a/folly/Portability.h b/folly/Portability.h index 0a8989af..d3edde9e 100644 --- a/folly/Portability.h +++ b/folly/Portability.h @@ -95,6 +95,18 @@ struct MaxAlign { char c; } __attribute__((aligned)); # endif #endif +/* Platform specific TLS support + * gcc implements __thread + * msvc implements __declspec(thread) + * the semantics are the same (but remember __thread is broken on apple) + */ +#if defined(_MSC_VER) +# define FOLLY_TLS __declspec(thread) +#elif defined(__GNUC__) || defined(__clang__) +# define FOLLY_TLS __thread +#else +# error cannot define platform specific thread local storage +#endif // Define to 1 if you have the `preadv' and `pwritev' functions, respectively #if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV) diff --git a/folly/ThreadLocal.h b/folly/ThreadLocal.h index 4ecc757a..bae58411 100644 --- a/folly/ThreadLocal.h +++ b/folly/ThreadLocal.h @@ -128,7 +128,8 @@ class ThreadLocal { * NOTE: Apple platforms don't support the same semantics for __thread that * Linux does (and it's only supported at all on i386). For these, use * pthread_setspecific()/pthread_getspecific() for the per-thread - * storage. + * storage. Windows (MSVC and GCC) does support the same semantics + * with __declspec(thread) */ template diff --git a/folly/detail/CacheLocality.cpp b/folly/detail/CacheLocality.cpp index e3364dd5..a5b93939 100644 --- a/folly/detail/CacheLocality.cpp +++ b/folly/detail/CacheLocality.cpp @@ -230,7 +230,7 @@ template<> std::atomic SequentialThreadId::prevId(0); template<> -__thread size_t SequentialThreadId::currentId(0); +FOLLY_TLS size_t SequentialThreadId::currentId(0); /////////////// AccessSpreader diff --git a/folly/detail/CacheLocality.h b/folly/detail/CacheLocality.h index 1a0a65f6..66a57889 100644 --- a/folly/detail/CacheLocality.h +++ b/folly/detail/CacheLocality.h @@ -26,6 +26,7 @@ #include #include #include "folly/Likely.h" +#include "folly/Portability.h" namespace folly { namespace detail { @@ -172,8 +173,7 @@ struct SequentialThreadId { private: static Atom prevId; - // TODO: switch to thread_local - static __thread size_t currentId; + static FOLLY_TLS size_t currentId; }; template class Atom, size_t kMaxCpus> diff --git a/folly/detail/MemoryIdler.cpp b/folly/detail/MemoryIdler.cpp index 2d026e48..979d9329 100644 --- a/folly/detail/MemoryIdler.cpp +++ b/folly/detail/MemoryIdler.cpp @@ -90,8 +90,8 @@ void MemoryIdler::flushLocalMallocCaches() { #ifdef __x86_64__ static const size_t s_pageSize = sysconf(_SC_PAGESIZE); -static __thread uintptr_t tls_stackLimit; -static __thread size_t tls_stackSize; +static FOLLY_TLS uintptr_t tls_stackLimit; +static FOLLY_TLS size_t tls_stackSize; static void fetchStackLimits() { pthread_attr_t attr; diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h index 52e6212a..423b0f2d 100644 --- a/folly/detail/ThreadLocalDetail.h +++ b/folly/detail/ThreadLocalDetail.h @@ -169,7 +169,7 @@ struct StaticMeta { } #if !__APPLE__ - static __thread ThreadEntry threadEntry_; + static FOLLY_TLS ThreadEntry threadEntry_; #endif static StaticMeta* inst_; @@ -412,7 +412,8 @@ struct StaticMeta { }; #if !__APPLE__ -template __thread ThreadEntry StaticMeta::threadEntry_ = {0}; +template +FOLLY_TLS ThreadEntry StaticMeta::threadEntry_ = {0}; #endif template StaticMeta* StaticMeta::inst_ = nullptr; diff --git a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp index 8105a156..e13774af 100644 --- a/folly/experimental/exception_tracer/ExceptionTracerLib.cpp +++ b/folly/experimental/exception_tracer/ExceptionTracerLib.cpp @@ -42,9 +42,9 @@ using namespace folly::exception_tracer; namespace { -__thread bool invalid; -__thread StackTraceStack activeExceptions; -__thread StackTraceStack caughtExceptions; +FOLLY_TLS bool invalid; +FOLLY_TLS StackTraceStack activeExceptions; +FOLLY_TLS StackTraceStack caughtExceptions; pthread_once_t initialized = PTHREAD_ONCE_INIT; extern "C" { diff --git a/folly/test/CacheLocalityTest.cpp b/folly/test/CacheLocalityTest.cpp index 3a739a00..338c9c47 100644 --- a/folly/test/CacheLocalityTest.cpp +++ b/folly/test/CacheLocalityTest.cpp @@ -327,7 +327,7 @@ TEST(SequentialThreadId, Simple) { EXPECT_EQ(cpu, again); } -static __thread unsigned testingCpu = 0; +static FOLLY_TLS unsigned testingCpu = 0; static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) { if (cpu != nullptr) { diff --git a/folly/test/DeterministicSchedule.cpp b/folly/test/DeterministicSchedule.cpp index 3136cf30..a19c977e 100644 --- a/folly/test/DeterministicSchedule.cpp +++ b/folly/test/DeterministicSchedule.cpp @@ -25,8 +25,8 @@ namespace folly { namespace test { -__thread sem_t* DeterministicSchedule::tls_sem; -__thread DeterministicSchedule* DeterministicSchedule::tls_sched; +FOLLY_TLS sem_t* DeterministicSchedule::tls_sem; +FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched; // access is protected by futexLock static std::unordered_map*, @@ -335,7 +335,8 @@ test::DeterministicAtomic SequentialThreadId::prevId(0); template<> -__thread size_t SequentialThreadId::currentId(0); +FOLLY_TLS size_t + SequentialThreadId::currentId(0); template<> const AccessSpreader diff --git a/folly/test/DeterministicSchedule.h b/folly/test/DeterministicSchedule.h index b73d7c5a..a344ac9e 100644 --- a/folly/test/DeterministicSchedule.h +++ b/folly/test/DeterministicSchedule.h @@ -129,8 +129,8 @@ class DeterministicSchedule : boost::noncopyable { static int getRandNumber(int n); private: - static __thread sem_t* tls_sem; - static __thread DeterministicSchedule* tls_sched; + static FOLLY_TLS sem_t* tls_sem; + static FOLLY_TLS DeterministicSchedule* tls_sched; std::function scheduler_; std::vector sems_; diff --git a/folly/test/MPMCQueueTest.cpp b/folly/test/MPMCQueueTest.cpp index 5bb2fa7b..81dce263 100644 --- a/folly/test/MPMCQueueTest.cpp +++ b/folly/test/MPMCQueueTest.cpp @@ -418,8 +418,8 @@ enum LifecycleEvent { MAX_LIFECYCLE_EVENT }; -static __thread int lc_counts[MAX_LIFECYCLE_EVENT]; -static __thread int lc_prev[MAX_LIFECYCLE_EVENT]; +static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT]; +static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT]; static int lc_outstanding() { return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] + diff --git a/folly/test/ThreadCachedIntTest.cpp b/folly/test/ThreadCachedIntTest.cpp index 15c0f96b..a84a82fd 100644 --- a/folly/test/ThreadCachedIntTest.cpp +++ b/folly/test/ThreadCachedIntTest.cpp @@ -152,8 +152,8 @@ ThreadLocal globalTL64Baseline; ThreadLocal globalTL32Baseline; std::atomic globalInt64Baseline(0); std::atomic globalInt32Baseline(0); -__thread int64_t global__thread64; -__thread int32_t global__thread32; +FOLLY_TLS int64_t global__thread64; +FOLLY_TLS int32_t global__thread32; // Alternate lock-free implementation. Achieves about the same performance, // but uses about 20x more memory than ThreadCachedInt with 24 threads. -- 2.34.1