Summary:
change from using __thread to using FOLLY_THREAD_LOCAL macro, this will allow abstraction over gcc and msvc implementations of thread local (__thread and __declspec(thread)) which have the same semantices and will also allow drop in replacement of thread_local when compiler support for the feature is complete This doesn't do anything about apple, however, which still has broken __thread support
This doesn't actually change any implementation for now, simply allows for correct compilation
Test Plan: fbmake runtests
Reviewed By: delong.j@fb.com
FB internal diff:
D1278726
# endif
#endif
+/* Platform specific TLS support
+ * gcc implements __thread
+ * msvc implements __declspec(thread)
+ * the semantics are the same (but remember __thread is broken on apple)
+ */
+#if defined(_MSC_VER)
+# define FOLLY_TLS __declspec(thread)
+#elif defined(__GNUC__) || defined(__clang__)
+# define FOLLY_TLS __thread
+#else
+# error cannot define platform specific thread local storage
+#endif
// Define to 1 if you have the `preadv' and `pwritev' functions, respectively
#if !defined(FOLLY_HAVE_PREADV) && !defined(FOLLY_HAVE_PWRITEV)
* NOTE: Apple platforms don't support the same semantics for __thread that
* Linux does (and it's only supported at all on i386). For these, use
* pthread_setspecific()/pthread_getspecific() for the per-thread
- * storage.
+ * storage. Windows (MSVC and GCC) does support the same semantics
+ * with __declspec(thread)
*/
template<class T, class Tag=void>
std::atomic<size_t> SequentialThreadId<std::atomic>::prevId(0);
template<>
-__thread size_t SequentialThreadId<std::atomic>::currentId(0);
+FOLLY_TLS size_t SequentialThreadId<std::atomic>::currentId(0);
/////////////// AccessSpreader
#include <type_traits>
#include <vector>
#include "folly/Likely.h"
+#include "folly/Portability.h"
namespace folly { namespace detail {
private:
static Atom<size_t> prevId;
- // TODO: switch to thread_local
- static __thread size_t currentId;
+ static FOLLY_TLS size_t currentId;
};
template <template<typename> class Atom, size_t kMaxCpus>
#ifdef __x86_64__
static const size_t s_pageSize = sysconf(_SC_PAGESIZE);
-static __thread uintptr_t tls_stackLimit;
-static __thread size_t tls_stackSize;
+static FOLLY_TLS uintptr_t tls_stackLimit;
+static FOLLY_TLS size_t tls_stackSize;
static void fetchStackLimits() {
pthread_attr_t attr;
}
#if !__APPLE__
- static __thread ThreadEntry threadEntry_;
+ static FOLLY_TLS ThreadEntry threadEntry_;
#endif
static StaticMeta<Tag>* inst_;
};
#if !__APPLE__
-template <class Tag> __thread ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
+template <class Tag>
+FOLLY_TLS ThreadEntry StaticMeta<Tag>::threadEntry_ = {0};
#endif
template <class Tag> StaticMeta<Tag>* StaticMeta<Tag>::inst_ = nullptr;
namespace {
-__thread bool invalid;
-__thread StackTraceStack activeExceptions;
-__thread StackTraceStack caughtExceptions;
+FOLLY_TLS bool invalid;
+FOLLY_TLS StackTraceStack activeExceptions;
+FOLLY_TLS StackTraceStack caughtExceptions;
pthread_once_t initialized = PTHREAD_ONCE_INIT;
extern "C" {
EXPECT_EQ(cpu, again);
}
-static __thread unsigned testingCpu = 0;
+static FOLLY_TLS unsigned testingCpu = 0;
static int testingGetcpu(unsigned* cpu, unsigned* node, void* unused) {
if (cpu != nullptr) {
namespace folly { namespace test {
-__thread sem_t* DeterministicSchedule::tls_sem;
-__thread DeterministicSchedule* DeterministicSchedule::tls_sched;
+FOLLY_TLS sem_t* DeterministicSchedule::tls_sem;
+FOLLY_TLS DeterministicSchedule* DeterministicSchedule::tls_sched;
// access is protected by futexLock
static std::unordered_map<detail::Futex<DeterministicAtomic>*,
SequentialThreadId<test::DeterministicAtomic>::prevId(0);
template<>
-__thread size_t SequentialThreadId<test::DeterministicAtomic>::currentId(0);
+FOLLY_TLS size_t
+ SequentialThreadId<test::DeterministicAtomic>::currentId(0);
template<>
const AccessSpreader<test::DeterministicAtomic>
static int getRandNumber(int n);
private:
- static __thread sem_t* tls_sem;
- static __thread DeterministicSchedule* tls_sched;
+ static FOLLY_TLS sem_t* tls_sem;
+ static FOLLY_TLS DeterministicSchedule* tls_sched;
std::function<int(int)> scheduler_;
std::vector<sem_t*> sems_;
MAX_LIFECYCLE_EVENT
};
-static __thread int lc_counts[MAX_LIFECYCLE_EVENT];
-static __thread int lc_prev[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_counts[MAX_LIFECYCLE_EVENT];
+static FOLLY_TLS int lc_prev[MAX_LIFECYCLE_EVENT];
static int lc_outstanding() {
return lc_counts[DEFAULT_CONSTRUCTOR] + lc_counts[COPY_CONSTRUCTOR] +
ThreadLocal<int32_t> globalTL32Baseline;
std::atomic<int64_t> globalInt64Baseline(0);
std::atomic<int32_t> globalInt32Baseline(0);
-__thread int64_t global__thread64;
-__thread int32_t global__thread32;
+FOLLY_TLS int64_t global__thread64;
+FOLLY_TLS int32_t global__thread32;
// Alternate lock-free implementation. Achieves about the same performance,
// but uses about 20x more memory than ThreadCachedInt with 24 threads.