2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Improved thread local storage for non-trivial types (similar speed as
19 * pthread_getspecific but only consumes a single pthread_key_t, and 4x faster
20 * than boost::thread_specific_ptr).
22 * Also includes an accessor interface to walk all the thread local child
23 * objects of a parent. accessAllThreads() initializes an accessor which holds
24 * a global lock *that blocks all creation and destruction of ThreadLocal
25 * objects with the same Tag* and can be used as an iterable container.
26 * accessAllThreads() can race with destruction of thread-local elements. We
27 * provide a strict mode which is dangerous because it requires the access lock
28 * to be held while destroying thread-local elements which could cause
29 * deadlocks. We gate this mode behind the AccessModeStrict template parameter.
31 * Intended use is for frequent write, infrequent read data access patterns such
34 * There are two classes here - ThreadLocal and ThreadLocalPtr. ThreadLocalPtr
35 * has semantics similar to boost::thread_specific_ptr. ThreadLocal is a thin
36 * wrapper around ThreadLocalPtr that manages allocation automatically.
38 * @author Spencer Ahrens (sahrens)
43 #include <boost/iterator/iterator_facade.hpp>
44 #include <folly/Likely.h>
45 #include <folly/Portability.h>
46 #include <folly/ScopeGuard.h>
47 #include <folly/SharedMutex.h>
48 #include <type_traits>
52 enum class TLPDestructionMode {
56 struct AccessModeStrict {};
59 #include <folly/detail/ThreadLocalDetail.h>
63 template <class T, class Tag, class AccessMode>
66 template <class T, class Tag = void, class AccessMode = void>
69 constexpr ThreadLocal() : constructor_([]() {
73 explicit ThreadLocal(std::function<T*()> constructor) :
74 constructor_(constructor) {
79 if (LIKELY(ptr != nullptr)) {
83 // separated new item creation out to speed up the fast path.
87 T* operator->() const {
91 T& operator*() const {
95 void reset(T* newPtr = nullptr) {
99 typedef typename ThreadLocalPtr<T, Tag, AccessMode>::Accessor Accessor;
100 Accessor accessAllThreads() const {
101 return tlp_.accessAllThreads();
105 ThreadLocal(ThreadLocal&&) = default;
106 ThreadLocal& operator=(ThreadLocal&&) = default;
110 ThreadLocal(const ThreadLocal&) = delete;
111 ThreadLocal& operator=(const ThreadLocal&) = delete;
114 auto ptr = constructor_();
119 mutable ThreadLocalPtr<T, Tag, AccessMode> tlp_;
120 std::function<T*()> constructor_;
124 * The idea here is that __thread is faster than pthread_getspecific, so we
125 * keep a __thread array of pointers to objects (ThreadEntry::elements) where
126 * each array has an index for each unique instance of the ThreadLocalPtr
127 * object. Each ThreadLocalPtr object has a unique id that is an index into
128 * these arrays so we can fetch the correct object from thread local storage
131 * In order to prevent unbounded growth of the id space and thus huge
132 * ThreadEntry::elements, arrays, for example due to continuous creation and
133 * destruction of ThreadLocalPtr objects, we keep a set of all active
134 * instances. When an instance is destroyed we remove it from the active
135 * set and insert the id into freeIds_ for reuse. These operations require a
136 * global mutex, but only happen at construction and destruction time.
138 * We use a single global pthread_key_t per Tag to manage object destruction and
139 * memory cleanup upon thread exit because there is a finite number of
140 * pthread_key_t's available per machine.
142 * NOTE: Apple platforms don't support the same semantics for __thread that
143 * Linux does (and it's only supported at all on i386). For these, use
144 * pthread_setspecific()/pthread_getspecific() for the per-thread
145 * storage. Windows (MSVC and GCC) does support the same semantics
146 * with __declspec(thread)
149 template <class T, class Tag = void, class AccessMode = void>
150 class ThreadLocalPtr {
152 typedef threadlocal_detail::StaticMeta<Tag, AccessMode> StaticMeta;
155 constexpr ThreadLocalPtr() : id_() {}
157 ThreadLocalPtr(ThreadLocalPtr&& other) noexcept :
158 id_(std::move(other.id_)) {
161 ThreadLocalPtr& operator=(ThreadLocalPtr&& other) {
162 assert(this != &other);
164 id_ = std::move(other.id_);
173 threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
174 return static_cast<T*>(w.ptr);
177 T* operator->() const {
181 T& operator*() const {
186 threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
188 return static_cast<T*>(w.release());
191 void reset(T* newPtr = nullptr) {
192 auto guard = makeGuard([&] { delete newPtr; });
193 threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
195 w.dispose(TLPDestructionMode::THIS_THREAD);
200 explicit operator bool() const {
201 return get() != nullptr;
205 * reset() that transfers ownership from a smart pointer
210 typename = typename std::enable_if<
211 std::is_convertible<SourceT*, T*>::value>::type>
212 void reset(std::unique_ptr<SourceT, Deleter> source) {
213 auto deleter = [delegate = source.get_deleter()](
214 T * ptr, TLPDestructionMode) {
217 reset(source.release(), deleter);
221 * reset() that transfers ownership from a smart pointer with the default
226 typename = typename std::enable_if<
227 std::is_convertible<SourceT*, T*>::value>::type>
228 void reset(std::unique_ptr<SourceT> source) {
229 reset(source.release());
233 * reset() with a custom deleter:
234 * deleter(T* ptr, TLPDestructionMode mode)
235 * "mode" is ALL_THREADS if we're destructing this ThreadLocalPtr (and thus
236 * deleting pointers for all threads), and THIS_THREAD if we're only deleting
237 * the member for one thread (because of thread exit or reset()).
238 * Invoking the deleter must not throw.
240 template <class Deleter>
241 void reset(T* newPtr, const Deleter& deleter) {
242 auto guard = makeGuard([&] {
244 deleter(newPtr, TLPDestructionMode::THIS_THREAD);
247 threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
248 w.dispose(TLPDestructionMode::THIS_THREAD);
250 w.set(newPtr, deleter);
253 // Holds a global lock for iteration through all thread local child objects.
254 // Can be used as an iterable container.
255 // Use accessAllThreads() to obtain one.
257 friend class ThreadLocalPtr<T, Tag, AccessMode>;
259 threadlocal_detail::StaticMetaBase& meta_;
260 SharedMutex* accessAllThreadsLock_;
266 friend class Iterator;
268 // The iterators obtained from Accessor are bidirectional iterators.
269 class Iterator : public boost::iterator_facade<
272 boost::bidirectional_traversal_tag> { // traversal
273 friend class Accessor;
274 friend class boost::iterator_core_access;
275 const Accessor* accessor_;
276 threadlocal_detail::ThreadEntry* e_;
288 T& dereference() const {
289 return *static_cast<T*>(e_->elements[accessor_->id_].ptr);
292 bool equal(const Iterator& other) const {
293 return (accessor_->id_ == other.accessor_->id_ &&
297 explicit Iterator(const Accessor* accessor)
298 : accessor_(accessor),
299 e_(&accessor_->meta_.head_) {
303 return (e_->elements &&
304 accessor_->id_ < e_->elementsCapacity &&
305 e_->elements[accessor_->id_].ptr);
308 void incrementToValid() {
309 for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->next) { }
312 void decrementToValid() {
313 for (; e_ != &accessor_->meta_.head_ && !valid(); e_ = e_->prev) { }
321 Iterator begin() const {
322 return ++Iterator(this);
325 Iterator end() const {
326 return Iterator(this);
329 Accessor(const Accessor&) = delete;
330 Accessor& operator=(const Accessor&) = delete;
332 Accessor(Accessor&& other) noexcept
333 : meta_(other.meta_),
334 accessAllThreadsLock_(other.accessAllThreadsLock_),
338 other.accessAllThreadsLock_ = nullptr;
339 other.lock_ = nullptr;
342 Accessor& operator=(Accessor&& other) noexcept {
343 // Each Tag has its own unique meta, and accessors with different Tags
344 // have different types. So either *this is empty, or this and other
345 // have the same tag. But if they have the same tag, they have the same
346 // meta (and lock), so they'd both hold the lock at the same time,
347 // which is impossible, which leaves only one possible scenario --
348 // *this is empty. Assert it.
349 assert(&meta_ == &other.meta_);
350 assert(lock_ == nullptr);
352 swap(accessAllThreadsLock_, other.accessAllThreadsLock_);
353 swap(lock_, other.lock_);
354 swap(id_, other.id_);
358 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
359 accessAllThreadsLock_(nullptr),
364 explicit Accessor(uint32_t id)
365 : meta_(threadlocal_detail::StaticMeta<Tag, AccessMode>::instance()),
366 accessAllThreadsLock_(&meta_.accessAllThreadsLock_),
367 lock_(&meta_.lock_) {
368 accessAllThreadsLock_->lock();
376 DCHECK(accessAllThreadsLock_ != nullptr);
377 accessAllThreadsLock_->unlock();
380 accessAllThreadsLock_ = nullptr;
385 // accessor allows a client to iterate through all thread local child
386 // elements of this ThreadLocal instance. Holds a global lock for each <Tag>
387 Accessor accessAllThreads() const {
388 static_assert(!std::is_same<Tag, void>::value,
389 "Must use a unique Tag to use the accessAllThreads feature");
390 return Accessor(id_.getOrAllocate(StaticMeta::instance()));
395 StaticMeta::instance().destroy(&id_);
399 ThreadLocalPtr(const ThreadLocalPtr&) = delete;
400 ThreadLocalPtr& operator=(const ThreadLocalPtr&) = delete;
402 mutable typename StaticMeta::EntryID id_;