From: Ben Maurer Date: Mon, 12 Oct 2015 21:35:54 +0000 (-0700) Subject: SIOF-proof thread local X-Git-Tag: deprecate-dynamic-initializer~341 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=2bebe62f4abfe60242af8dc71a3d6b12f1416c8b;p=folly.git SIOF-proof thread local Summary: Right now ThreadLocal & friends don't operate correctly when used as a static variable (which is the idiomatic way to use them). The TLS id is allocated in the static constructor so anybody who uses the ID prior to first use would use an invalid ID. This makes ThreadLocal unusable for core code such as per-thread reference counting. This diff allocates the ID on first use. By making the invalid ID maxint we avoid adding any extra branches in the fast path. We can then make the constructor a constexpr meaning that initialization will happen prior to any code running. Reviewed By: @meyering Differential Revision: D2457989 fb-gh-sync-id: 21d0c0d00c638fbbd36148d14d4c891f66f83706 --- diff --git a/folly/ThreadLocal.h b/folly/ThreadLocal.h index fee6fac4..a0d93fb4 100644 --- a/folly/ThreadLocal.h +++ b/folly/ThreadLocal.h @@ -59,7 +59,7 @@ template class ThreadLocalPtr; template class ThreadLocal { public: - ThreadLocal() = default; + constexpr ThreadLocal() {} T* get() const { T* ptr = tlp_.get(); @@ -134,18 +134,19 @@ class ThreadLocal { template class ThreadLocalPtr { + private: + typedef threadlocal_detail::StaticMeta StaticMeta; public: - ThreadLocalPtr() : id_(threadlocal_detail::StaticMeta::create()) { } + constexpr ThreadLocalPtr() : id_() {} - ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : id_(other.id_) { - other.id_ = 0; + ThreadLocalPtr(ThreadLocalPtr&& other) noexcept : + id_(std::move(other.id_)) { } ThreadLocalPtr& operator=(ThreadLocalPtr&& other) { assert(this != &other); destroy(); - id_ = other.id_; - other.id_ = 0; + id_ = std::move(other.id_); return *this; } @@ -154,7 +155,8 @@ class ThreadLocalPtr { } T* get() const { - return static_cast(threadlocal_detail::StaticMeta::get(id_).ptr); + threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); + return static_cast(w.ptr); } T* operator->() const { @@ -166,15 +168,14 @@ class ThreadLocalPtr { } T* release() { - threadlocal_detail::ElementWrapper& w = - threadlocal_detail::StaticMeta::get(id_); + threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); return static_cast(w.release()); } void reset(T* newPtr = nullptr) { - threadlocal_detail::ElementWrapper& w = - threadlocal_detail::StaticMeta::get(id_); + threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); + if (w.ptr != newPtr) { w.dispose(TLPDestructionMode::THIS_THREAD); w.set(newPtr); @@ -194,8 +195,7 @@ class ThreadLocalPtr { */ template void reset(T* newPtr, Deleter deleter) { - threadlocal_detail::ElementWrapper& w = - threadlocal_detail::StaticMeta::get(id_); + threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_); if (w.ptr != newPtr) { w.dispose(TLPDestructionMode::THIS_THREAD); w.set(newPtr, deleter); @@ -330,21 +330,19 @@ class ThreadLocalPtr { Accessor accessAllThreads() const { static_assert(!std::is_same::value, "Must use a unique Tag to use the accessAllThreads feature"); - return Accessor(id_); + return Accessor(id_.getOrAllocate()); } private: void destroy() { - if (id_) { - threadlocal_detail::StaticMeta::destroy(id_); - } + StaticMeta::destroy(&id_); } // non-copyable ThreadLocalPtr(const ThreadLocalPtr&) = delete; ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete; - uint32_t id_; // every instantiation has a unique id + mutable typename StaticMeta::EntryID id_; }; } // namespace folly diff --git a/folly/detail/ThreadLocalDetail.h b/folly/detail/ThreadLocalDetail.h index bd8658cf..d3d62ad8 100644 --- a/folly/detail/ThreadLocalDetail.h +++ b/folly/detail/ThreadLocalDetail.h @@ -161,6 +161,8 @@ struct ThreadEntry { ThreadEntry* prev; }; + + // Held in a singleton to track our global instances. // We have one of these per "Tag", by default one for the whole system // (Tag=void). @@ -170,6 +172,50 @@ struct ThreadEntry { // StaticMeta; you can specify multiple Tag types to break that lock. template struct StaticMeta { + // Represents an ID of a thread local object. Initially set to the maximum + // uint. This representation allows us to avoid a branch in accessing TLS data + // (because if you test capacity > id if id = maxint then the test will always + // fail). It allows us to keep a constexpr constructor and avoid SIOF. + class EntryID { + public: + static constexpr uint32_t kInvalid = std::numeric_limits::max(); + std::atomic value; + + constexpr EntryID() : value(kInvalid) { + } + + EntryID(EntryID&& other) noexcept : value(other.value.load()) { + other.value = kInvalid; + } + + EntryID& operator=(EntryID&& other) { + assert(this != &other); + value = other.value.load(); + other.value = kInvalid; + return *this; + } + + EntryID(const EntryID& other) = delete; + EntryID& operator=(const EntryID& other) = delete; + + uint32_t getOrInvalid() { + // It's OK for this to be relaxed, even though we're effectively doing + // double checked locking in using this value. We only care about the + // uniqueness of IDs, getOrAllocate does not modify any other memory + // this thread will use. + return value.load(std::memory_order_relaxed); + } + + uint32_t getOrAllocate() { + uint32_t id = getOrInvalid(); + if (id != kInvalid) { + return id; + } + // The lock inside allocate ensures that a single value is allocated + return instance().allocate(this); + } + }; + static StaticMeta& instance() { // Leak it on exit, there's only one per process and we don't have to // worry about synchronization with exiting threads. @@ -303,26 +349,40 @@ struct StaticMeta { #endif } - static uint32_t create() { + static uint32_t allocate(EntryID* ent) { uint32_t id; auto & meta = instance(); std::lock_guard g(meta.lock_); + + id = ent->value.load(); + if (id != EntryID::kInvalid) { + return id; + } + if (!meta.freeIds_.empty()) { id = meta.freeIds_.back(); meta.freeIds_.pop_back(); } else { id = meta.nextId_++; } + + uint32_t old_id = ent->value.exchange(id); + DCHECK_EQ(old_id, EntryID::kInvalid); return id; } - static void destroy(uint32_t id) { + static void destroy(EntryID* ent) { try { auto & meta = instance(); // Elements in other threads that use this id. std::vector elements; { std::lock_guard g(meta.lock_); + uint32_t id = ent->value.exchange(EntryID::kInvalid); + if (id == EntryID::kInvalid) { + return; + } + for (ThreadEntry* e = meta.head_.next; e != &meta.head_; e = e->next) { if (id < e->elementsCapacity && e->elements[id].ptr) { elements.push_back(e->elements[id]); @@ -358,13 +418,18 @@ struct StaticMeta { * Reserve enough space in the ThreadEntry::elements for the item * @id to fit in. */ - static void reserve(uint32_t id) { + static void reserve(EntryID* id) { auto& meta = instance(); ThreadEntry* threadEntry = getThreadEntry(); size_t prevCapacity = threadEntry->elementsCapacity; + + uint32_t idval = id->getOrAllocate(); + if (prevCapacity > idval) { + return; + } // Growth factor < 2, see folly/docs/FBVector.md; + 5 to prevent // very slow start. - size_t newCapacity = static_cast((id + 5) * 1.7); + size_t newCapacity = static_cast((idval + 5) * 1.7); assert(newCapacity > prevCapacity); ElementWrapper* reallocated = nullptr; @@ -444,10 +509,14 @@ struct StaticMeta { #endif } - static ElementWrapper& get(uint32_t id) { + static ElementWrapper& get(EntryID* ent) { ThreadEntry* threadEntry = getThreadEntry(); + uint32_t id = ent->getOrInvalid(); + // if id is invalid, it is equal to uint32_t's max value. + // x <= max value is always true if (UNLIKELY(threadEntry->elementsCapacity <= id)) { - reserve(id); + reserve(ent); + id = ent->getOrInvalid(); assert(threadEntry->elementsCapacity > id); } return threadEntry->elements[id]; diff --git a/folly/test/ThreadLocalTest.cpp b/folly/test/ThreadLocalTest.cpp index b6ab9cdf..a319c2f6 100644 --- a/folly/test/ThreadLocalTest.cpp +++ b/folly/test/ThreadLocalTest.cpp @@ -538,6 +538,21 @@ TEST(ThreadLocal, Fork2) { } } +// clang is unable to compile this code unless in c++14 mode. +#if __cplusplus >= 201402L +namespace { +// This will fail to compile unless ThreadLocal{Ptr} has a constexpr +// default constructor. This ensures that ThreadLocal is safe to use in +// static constructors without worrying about initialization order +class ConstexprThreadLocalCompile { + ThreadLocal a_; + ThreadLocalPtr b_; + + constexpr ConstexprThreadLocalCompile() {} +}; +} +#endif + // Simple reference implementation using pthread_get_specific template class PThreadGetSpecific {