}
T* get() const {
- threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
+ threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
return static_cast<T*>(w.ptr);
}
}
T* release() {
- threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
+ threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
return static_cast<T*>(w.release());
}
void reset(T* newPtr = nullptr) {
auto guard = makeGuard([&] { delete newPtr; });
- threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
+ threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
w.dispose(TLPDestructionMode::THIS_THREAD);
guard.dismiss();
deleter(newPtr, TLPDestructionMode::THIS_THREAD);
}
});
- threadlocal_detail::ElementWrapper& w = StaticMeta::instance().get(&id_);
+ threadlocal_detail::ElementWrapper& w = StaticMeta::get(&id_);
w.dispose(TLPDestructionMode::THIS_THREAD);
guard.dismiss();
w.set(newPtr, deleter);
*/
void reserve(EntryID* id);
- ElementWrapper& get(EntryID* ent);
+ ElementWrapper& getElement(EntryID* ent);
static void initAtFork();
static void registerAtFork(
return *instance;
}
- ElementWrapper& get(EntryID* ent) {
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ // Eliminate as many branches as possible:
+ // One branch on capacityCache, vs. three:
+ // 1) instance() static initializer
+ // 2) getThreadEntry null check
+ // 3) elementsCapacity size check.
+ // 3 will never be true if 1 or 2 are false.
+ FOLLY_ALWAYS_INLINE static ElementWrapper& get(EntryID* ent) {
+ uint32_t id = ent->getOrInvalid();
+ if (UNLIKELY(capacityCache_ <= id)) {
+ return getSlow(ent);
+ } else {
+ return threadEntryCache_->elements[id];
+ }
+ }
+
+ static ElementWrapper& getSlow(EntryID* ent) {
+ ElementWrapper& res = instance().getElement(ent);
+ // Cache new capacity
+ capacityCache_ = getThreadEntry()->elementsCapacity;
+ return res;
+ }
+#else
+ static ElementWrapper& get(EntryID* ent) {
+ return instance().getElement(ent);
+ }
+#endif
+
+ ElementWrapper& getElement(EntryID* ent) {
ThreadEntry* threadEntry = getThreadEntry();
uint32_t id = ent->getOrInvalid();
// if id is invalid, it is equal to uint32_t's max value.
inline static ThreadEntry* getThreadEntry() {
#ifdef FOLLY_TLD_USE_FOLLY_TLS
- static FOLLY_TLS ThreadEntry* threadEntryCache{nullptr};
- if (UNLIKELY(threadEntryCache == nullptr)) {
- threadEntryCache = instance().threadEntry_();
+ if (UNLIKELY(threadEntryCache_ == nullptr)) {
+ threadEntryCache_ = instance().threadEntry_();
}
- return threadEntryCache;
+ return threadEntryCache_;
#else
return instance().threadEntry_();
#endif
}
instance().lock_.unlock();
}
+
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+ static FOLLY_TLS ThreadEntry* threadEntryCache_;
+ static FOLLY_TLS size_t capacityCache_;
+#endif
};
+#ifdef FOLLY_TLD_USE_FOLLY_TLS
+template <class Tag, class AccessMode>
+FOLLY_TLS ThreadEntry* StaticMeta<Tag, AccessMode>::threadEntryCache_{nullptr};
+template <class Tag, class AccessMode>
+FOLLY_TLS size_t StaticMeta<Tag, AccessMode>::capacityCache_{0};
+#endif
} // namespace threadlocal_detail
} // namespace folly
============================================================================
folly/test/ThreadLocalBenchmark.cpp relative time/iter iters/s
============================================================================
-BM_mt_tlp 2.30ns 434.53M
-BM_mt_pthread_get_specific 2.69ns 371.75M
-BM_mt_boost_tsp 11.66ns 85.78M
+BM_mt_tlp 1.92ns 520.02M
+BM_mt_pthread_get_specific 2.69ns 372.15M
+BM_mt_boost_tsp 11.81ns 84.67M
----------------------------------------------------------------------------
-BM_mt_tlp_multi 12.46ns 80.25M
-BM_mt_pthread_get_specific_multi 16.58ns 60.32M
-BM_mt_boost_tsp_multi 70.85ns 14.12M
+BM_mt_tlp_multi 7.53ns 132.79M
+BM_mt_pthread_get_specific_multi 15.80ns 63.29M
+BM_mt_boost_tsp_multi 71.70ns 13.95M
----------------------------------------------------------------------------
============================================================================
*/