From ea10c90474c682e76c4a147d96734704462f99dc Mon Sep 17 00:00:00 2001 From: Peter Griess Date: Sat, 12 Oct 2013 04:11:59 -0700 Subject: [PATCH] Handle platforms that don't support __thread. Summary: - Apple platforms either have different __thread behavior than Linux: on i386 __thread values are zeroed out before the destructor provided to pthread_key_create(3) is called; on ARM, __thread isn't supported at all. To handle this, use pthread_getspecific(3) to manage the array of IDs on these platforms only. Test Plan: - fbconfig -r folly && fbmake runtests - ./configure && make check on Ubuntu/FC/Mac Reviewed By: simpkins@fb.com FB internal diff: D1008923 --- folly/ThreadLocal.h | 5 +++ folly/detail/ThreadLocalDetail.h | 76 +++++++++++++++++++++++--------- 2 files changed, 60 insertions(+), 21 deletions(-) diff --git a/folly/ThreadLocal.h b/folly/ThreadLocal.h index e2ef58b0..4c901391 100644 --- a/folly/ThreadLocal.h +++ b/folly/ThreadLocal.h @@ -124,6 +124,11 @@ class ThreadLocal { * We use a single global pthread_key_t per Tag to manage object destruction and * memory cleanup upon thread exit because there is a finite number of * pthread_key_t's available per machine. + * + * NOTE: Apple platforms don't support the same semantics for __thread that + * Linux does (and it's only supported at all on i386). For these, use + * pthread_setspecific()/pthread_getspecific() for the per-thread + * storage. */ template diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h index 7dbe6e7f..5e578ab9 100644 --- a/folly/detail/ThreadLocalDetail.h +++ b/folly/detail/ThreadLocalDetail.h @@ -168,7 +168,9 @@ struct StaticMeta { t->next = t->prev = t; } +#if !__APPLE__ static __thread ThreadEntry threadEntry_; +#endif static StaticMeta* inst_; StaticMeta() : nextId_(1) { @@ -185,6 +187,21 @@ struct StaticMeta { LOG(FATAL) << "StaticMeta lives forever!"; } + static ThreadEntry* getThreadEntry() { +#if !__APPLE__ + return &threadEntry_; +#else + ThreadEntry* threadEntry = + static_cast(pthread_getspecific(inst_->pthreadKey_)); + if (!threadEntry) { + threadEntry = new ThreadEntry(); + int ret = pthread_setspecific(inst_->pthreadKey_, threadEntry); + checkPosixError(ret, "pthread_setspecific failed"); + } + return threadEntry; +#endif + } + static void preFork(void) { instance().lock_.lock(); // Make sure it's created } @@ -194,28 +211,39 @@ struct StaticMeta { } static void onForkChild(void) { + // only the current thread survives inst_->head_.next = inst_->head_.prev = &inst_->head_; - inst_->push_back(&threadEntry_); // only the current thread survives + inst_->push_back(getThreadEntry()); inst_->lock_.unlock(); } static void onThreadExit(void* ptr) { auto & meta = instance(); +#if !__APPLE__ + ThreadEntry* threadEntry = getThreadEntry(); + DCHECK_EQ(ptr, &meta); - // We wouldn't call pthread_setspecific unless we actually called get() - DCHECK_NE(threadEntry_.elementsCapacity, 0); + DCHECK_GT(threadEntry->elementsCapacity, 0); +#else + ThreadEntry* threadEntry = static_cast(ptr); +#endif { std::lock_guard g(meta.lock_); - meta.erase(&threadEntry_); - // No need to hold the lock any longer; threadEntry_ is private to this + meta.erase(threadEntry); + // No need to hold the lock any longer; the ThreadEntry is private to this // thread now that it's been removed from meta. } - FOR_EACH_RANGE(i, 0, threadEntry_.elementsCapacity) { - threadEntry_.elements[i].dispose(TLPDestructionMode::THIS_THREAD); + FOR_EACH_RANGE(i, 0, threadEntry->elementsCapacity) { + threadEntry->elements[i].dispose(TLPDestructionMode::THIS_THREAD); } - free(threadEntry_.elements); - threadEntry_.elements = NULL; + free(threadEntry->elements); + threadEntry->elements = NULL; pthread_setspecific(meta.pthreadKey_, NULL); + +#if __APPLE__ + // Allocated in getThreadEntry(); free it + delete threadEntry; +#endif } static int create() { @@ -270,21 +298,22 @@ struct StaticMeta { } /** - * Reserve enough space in the threadEntry_.elements for the item + * Reserve enough space in the ThreadEntry::elements for the item * @id to fit in. */ static void reserve(int id) { - size_t prevCapacity = threadEntry_.elementsCapacity; + auto& meta = instance(); + ThreadEntry* threadEntry = getThreadEntry(); + size_t prevCapacity = threadEntry->elementsCapacity; // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent // very slow start. size_t newCapacity = static_cast((id + 5) * 1.7); assert(newCapacity > prevCapacity); - auto& meta = instance(); ElementWrapper* reallocated = nullptr; // Need to grow. Note that we can't call realloc, as elements is // still linked in meta, so another thread might access invalid memory - // after realloc succeeds. We'll copy by hand and update threadEntry_ + // after realloc succeeds. We'll copy by hand and update our ThreadEntry // under the lock. if (usingJEMalloc()) { bool success = false; @@ -299,7 +328,7 @@ struct StaticMeta { // always expand our allocation to the real size. if (prevCapacity * sizeof(ElementWrapper) >= jemallocMinInPlaceExpandable) { - success = (rallocm(reinterpret_cast(&threadEntry_.elements), + success = (rallocm(reinterpret_cast(&threadEntry->elements), &realByteSize, newByteSize, 0, @@ -341,7 +370,7 @@ struct StaticMeta { std::lock_guard g(meta.lock_); if (prevCapacity == 0) { - meta.push_back(&threadEntry_); + meta.push_back(threadEntry); } if (reallocated) { @@ -351,31 +380,36 @@ struct StaticMeta { * destructing a ThreadLocal and writing to the elements vector * of this thread. */ - memcpy(reallocated, threadEntry_.elements, + memcpy(reallocated, threadEntry->elements, sizeof(ElementWrapper) * prevCapacity); using std::swap; - swap(reallocated, threadEntry_.elements); + swap(reallocated, threadEntry->elements); } - threadEntry_.elementsCapacity = newCapacity; + threadEntry->elementsCapacity = newCapacity; } free(reallocated); +#if !__APPLE__ if (prevCapacity == 0) { pthread_setspecific(meta.pthreadKey_, &meta); } +#endif } static ElementWrapper& get(int id) { - if (UNLIKELY(threadEntry_.elementsCapacity <= id)) { + ThreadEntry* threadEntry = getThreadEntry(); + if (UNLIKELY(threadEntry->elementsCapacity <= id)) { reserve(id); - assert(threadEntry_.elementsCapacity > id); + assert(threadEntry->elementsCapacity > id); } - return threadEntry_.elements[id]; + return threadEntry->elements[id]; } }; +#if !__APPLE__ template __thread ThreadEntry StaticMeta::threadEntry_ = {0}; +#endif template StaticMeta* StaticMeta::inst_ = nullptr; } // namespace threadlocal_detail -- 2.34.1