From 206dce5b9e700e02026fbff304433d066c90ab3c Mon Sep 17 00:00:00 2001 From: Philip Pronin Date: Tue, 1 Nov 2016 21:24:55 -0700 Subject: [PATCH] fix race between StaticMetaBase::destroy() and StaticMetaBase::onThreadExit() Summary: We would like to guarantee that after `folly::ThreadLocal<>` dtor returns no per-thread instances are longer alive. Currently this is not a case: * T1 is excuting `StaticMetaBase::onThreadExit()`, it acquired all per-thread instances and erased them from meta under `accessAllThreadsLock_`. * T2 destroys `folly::ThreadLocal<>`, it executes `StaticMetaBase::destroy()`, collects all per-thread instances (thus missing the ones being destroyed by T1), destroys them and returns. * T1 executes dtor of per-thread instances, after parent `folly::ThreadLocal<>` dtor is finished. Reviewed By: ot Differential Revision: D4109820 fbshipit-source-id: d547b8cc77c9871126538c38644c2e98ddccf220 --- folly/ThreadCachedInt.h | 15 +------ folly/detail/ThreadLocalDetail.cpp | 64 +++++++++++++++++++----------- 2 files changed, 42 insertions(+), 37 deletions(-) diff --git a/folly/ThreadCachedInt.h b/folly/ThreadCachedInt.h index 10ccbbc5..0627e878 100644 --- a/folly/ThreadCachedInt.h +++ b/folly/ThreadCachedInt.h @@ -47,7 +47,7 @@ class ThreadCachedInt : boost::noncopyable { void increment(IntT inc) { auto cache = cache_.get(); - if (UNLIKELY(cache == nullptr || cache->parent_ == nullptr)) { + if (UNLIKELY(cache == nullptr)) { cache = new IntCache(*this); cache_.reset(cache); } @@ -122,15 +122,6 @@ class ThreadCachedInt : boost::noncopyable { target_.store(newVal, std::memory_order_release); } - // This is a little tricky - it's possible that our IntCaches are still alive - // in another thread and will get destroyed after this destructor runs, so we - // need to make sure we signal that this parent is dead. - ~ThreadCachedInt() { - for (auto& cache : cache_.accessAllThreads()) { - cache.parent_ = nullptr; - } - } - private: std::atomic target_; std::atomic cacheSize_; @@ -173,9 +164,7 @@ class ThreadCachedInt : boost::noncopyable { } ~IntCache() { - if (parent_) { - flush(); - } + flush(); } }; }; diff --git a/folly/detail/ThreadLocalDetail.cpp b/folly/detail/ThreadLocalDetail.cpp index 6be41ed4..be0cbf60 100644 --- a/folly/detail/ThreadLocalDetail.cpp +++ b/folly/detail/ThreadLocalDetail.cpp @@ -98,38 +98,54 @@ uint32_t StaticMetaBase::allocate(EntryID* ent) { void StaticMetaBase::destroy(EntryID* ent) { try { auto& meta = *this; + // Elements in other threads that use this id. std::vector elements; + { - std::lock_guard g(meta.lock_); - uint32_t id = ent->value.exchange(kEntryIDInvalid); - if (id == kEntryIDInvalid) { - return; + SharedMutex::WriteHolder wlock; + if (meta.strict_) { + /* + * In strict mode, the logic guarantees per-thread instances are + * destroyed by the moment ThreadLocal<> dtor returns. + * In order to achieve that, we should wait until concurrent + * onThreadExit() calls (that might acquire ownership over per-thread + * instances in order to destroy them) are finished. + */ + wlock = SharedMutex::WriteHolder(meta.accessAllThreadsLock_); } - for (ThreadEntry* e = meta.head_.next; e != &meta.head_; e = e->next) { - if (id < e->elementsCapacity && e->elements[id].ptr) { - elements.push_back(e->elements[id]); - - /* - * Writing another thread's ThreadEntry from here is fine; - * the only other potential reader is the owning thread -- - * from onThreadExit (which grabs the lock, so is properly - * synchronized with us) or from get(), which also grabs - * the lock if it needs to resize the elements vector. - * - * We can't conflict with reads for a get(id), because - * it's illegal to call get on a thread local that's - * destructing. - */ - e->elements[id].ptr = nullptr; - e->elements[id].deleter1 = nullptr; - e->elements[id].ownsDeleter = false; + { + std::lock_guard g(meta.lock_); + uint32_t id = ent->value.exchange(kEntryIDInvalid); + if (id == kEntryIDInvalid) { + return; + } + + for (ThreadEntry* e = meta.head_.next; e != &meta.head_; e = e->next) { + if (id < e->elementsCapacity && e->elements[id].ptr) { + elements.push_back(e->elements[id]); + + /* + * Writing another thread's ThreadEntry from here is fine; + * the only other potential reader is the owning thread -- + * from onThreadExit (which grabs the lock, so is properly + * synchronized with us) or from get(), which also grabs + * the lock if it needs to resize the elements vector. + * + * We can't conflict with reads for a get(id), because + * it's illegal to call get on a thread local that's + * destructing. + */ + e->elements[id].ptr = nullptr; + e->elements[id].deleter1 = nullptr; + e->elements[id].ownsDeleter = false; + } } + meta.freeIds_.push_back(id); } - meta.freeIds_.push_back(id); } - // Delete elements outside the lock + // Delete elements outside the locks. for (ElementWrapper& elem : elements) { elem.dispose(TLPDestructionMode::ALL_THREADS); } -- 2.34.1