folly/detail/ThreadLocalDetail.h

   1 /*
   2  * Copyright 2017 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #pragma once
  18
  19 #include <limits.h>
  20
  21 #include <atomic>
  22 #include <functional>
  23 #include <mutex>
  24 #include <string>
  25 #include <vector>
  26
  27 #include <glog/logging.h>
  28
  29 #include <folly/Exception.h>
  30 #include <folly/Foreach.h>
  31 #include <folly/Function.h>
  32 #include <folly/Malloc.h>
  33 #include <folly/MicroSpinLock.h>
  34 #include <folly/Portability.h>
  35 #include <folly/ScopeGuard.h>
  36 #include <folly/portability/PThread.h>
  37
  38 #include <folly/detail/StaticSingletonManager.h>
  39
  40 // In general, emutls cleanup is not guaranteed to play nice with the way
  41 // StaticMeta mixes direct pthread calls and the use of __thread. This has
  42 // caused problems on multiple platforms so don't use __thread there.
  43 //
  44 // XXX: Ideally we would instead determine if emutls is in use at runtime as it
  45 // is possible to configure glibc on Linux to use emutls regardless.
  46 #if !FOLLY_MOBILE && !defined(__APPLE__) && !defined(_MSC_VER)
  47 #define FOLLY_TLD_USE_FOLLY_TLS 1
  48 #else
  49 #undef FOLLY_TLD_USE_FOLLY_TLS
  50 #endif
  51
  52 namespace folly {
  53 namespace threadlocal_detail {
  54
  55 /**
  56  * POD wrapper around an element (a void*) and an associated deleter.
  57  * This must be POD, as we memset() it to 0 and memcpy() it around.
  58  */
  59 struct ElementWrapper {
  60   using DeleterFunType = void(void*, TLPDestructionMode);
  61
  62   bool dispose(TLPDestructionMode mode) {
  63     if (ptr == nullptr) {
  64       return false;
  65     }
  66
  67     DCHECK(deleter1 != nullptr);
  68     ownsDeleter ? (*deleter2)(ptr, mode) : (*deleter1)(ptr, mode);
  69     cleanup();
  70     return true;
  71   }
  72
  73   void* release() {
  74     auto retPtr = ptr;
  75
  76     if (ptr != nullptr) {
  77       cleanup();
  78     }
  79
  80     return retPtr;
  81   }
  82
  83   template <class Ptr>
  84   void set(Ptr p) {
  85     auto guard = makeGuard([&] { delete p; });
  86     DCHECK(ptr == nullptr);
  87     DCHECK(deleter1 == nullptr);
  88
  89     if (p) {
  90       ptr = p;
  91       deleter1 = [](void* pt, TLPDestructionMode) {
  92         delete static_cast<Ptr>(pt);
  93       };
  94       ownsDeleter = false;
  95       guard.dismiss();
  96     }
  97   }
  98
  99   template <class Ptr, class Deleter>
 100   void set(Ptr p, const Deleter& d) {
 101     auto guard = makeGuard([&] {
 102       if (p) {
 103         d(p, TLPDestructionMode::THIS_THREAD);
 104       }
 105     });
 106     DCHECK(ptr == nullptr);
 107     DCHECK(deleter2 == nullptr);
 108     if (p) {
 109       ptr = p;
 110       auto d2 = d; // gcc-4.8 doesn't decay types correctly in lambda captures
 111       deleter2 = new std::function<DeleterFunType>(
 112           [d2](void* pt, TLPDestructionMode mode) {
 113             d2(static_cast<Ptr>(pt), mode);
 114           });
 115       ownsDeleter = true;
 116       guard.dismiss();
 117     }
 118   }
 119
 120   void cleanup() {
 121     if (ownsDeleter) {
 122       delete deleter2;
 123     }
 124     ptr = nullptr;
 125     deleter1 = nullptr;
 126     ownsDeleter = false;
 127   }
 128
 129   void* ptr;
 130   union {
 131     DeleterFunType* deleter1;
 132     std::function<DeleterFunType>* deleter2;
 133   };
 134   bool ownsDeleter;
 135 };
 136
 137 struct StaticMetaBase;
 138
 139 /**
 140  * Per-thread entry.  Each thread using a StaticMeta object has one.
 141  * This is written from the owning thread only (under the lock), read
 142  * from the owning thread (no lock necessary), and read from other threads
 143  * (under the lock).
 144  */
 145 struct ThreadEntry {
 146   ElementWrapper* elements{nullptr};
 147   size_t elementsCapacity{0};
 148   ThreadEntry* next{nullptr};
 149   ThreadEntry* prev{nullptr};
 150   StaticMetaBase* meta{nullptr};
 151 };
 152
 153 constexpr uint32_t kEntryIDInvalid = std::numeric_limits<uint32_t>::max();
 154
 155 struct PthreadKeyUnregisterTester;
 156
 157 /**
 158  * We want to disable onThreadExit call at the end of shutdown, we don't care
 159  * about leaking memory at that point.
 160  *
 161  * Otherwise if ThreadLocal is used in a shared library, onThreadExit may be
 162  * called after dlclose().
 163  *
 164  * This class has one single static instance; however since it's so widely used,
 165  * directly or indirectly, by so many classes, we need to take care to avoid
 166  * problems stemming from the Static Initialization/Destruction Order Fiascos.
 167  * Therefore this class needs to be constexpr-constructible, so as to avoid
 168  * the need for this to participate in init/destruction order.
 169  */
 170 class PthreadKeyUnregister {
 171  public:
 172   static constexpr size_t kMaxKeys = 1UL << 16;
 173
 174   ~PthreadKeyUnregister() {
 175     // If static constructor priorities are not supported then
 176     // ~PthreadKeyUnregister logic is not safe.
 177 #if !defined(__APPLE__) && !defined(_MSC_VER)
 178     MSLGuard lg(lock_);
 179     while (size_) {
 180       pthread_key_delete(keys_[--size_]);
 181     }
 182 #endif
 183   }
 184
 185   static void registerKey(pthread_key_t key) {
 186     instance_.registerKeyImpl(key);
 187   }
 188
 189  private:
 190   /**
 191    * Only one global instance should exist, hence this is private.
 192    * See also the important note at the top of this class about `constexpr`
 193    * usage.
 194    */
 195   constexpr PthreadKeyUnregister() : lock_(), size_(0), keys_() { }
 196   friend struct folly::threadlocal_detail::PthreadKeyUnregisterTester;
 197
 198   void registerKeyImpl(pthread_key_t key) {
 199     MSLGuard lg(lock_);
 200     if (size_ == kMaxKeys) {
 201       throw std::logic_error("pthread_key limit has already been reached");
 202     }
 203     keys_[size_++] = key;
 204   }
 205
 206   MicroSpinLock lock_;
 207   size_t size_;
 208   pthread_key_t keys_[kMaxKeys];
 209
 210   static PthreadKeyUnregister instance_;
 211 };
 212
 213 struct StaticMetaBase {
 214   // Represents an ID of a thread local object. Initially set to the maximum
 215   // uint. This representation allows us to avoid a branch in accessing TLS data
 216   // (because if you test capacity > id if id = maxint then the test will always
 217   // fail). It allows us to keep a constexpr constructor and avoid SIOF.
 218   class EntryID {
 219    public:
 220     std::atomic<uint32_t> value;
 221
 222     constexpr EntryID() : value(kEntryIDInvalid) {
 223     }
 224
 225     EntryID(EntryID&& other) noexcept : value(other.value.load()) {
 226       other.value = kEntryIDInvalid;
 227     }
 228
 229     EntryID& operator=(EntryID&& other) {
 230       assert(this != &other);
 231       value = other.value.load();
 232       other.value = kEntryIDInvalid;
 233       return *this;
 234     }
 235
 236     EntryID(const EntryID& other) = delete;
 237     EntryID& operator=(const EntryID& other) = delete;
 238
 239     uint32_t getOrInvalid() {
 240       // It's OK for this to be relaxed, even though we're effectively doing
 241       // double checked locking in using this value. We only care about the
 242       // uniqueness of IDs, getOrAllocate does not modify any other memory
 243       // this thread will use.
 244       return value.load(std::memory_order_relaxed);
 245     }
 246
 247     uint32_t getOrAllocate(StaticMetaBase& meta) {
 248       uint32_t id = getOrInvalid();
 249       if (id != kEntryIDInvalid) {
 250         return id;
 251       }
 252       // The lock inside allocate ensures that a single value is allocated
 253       return meta.allocate(this);
 254     }
 255   };
 256
 257   StaticMetaBase(ThreadEntry* (*threadEntry)(), bool strict);
 258
 259   [[noreturn]] ~StaticMetaBase() {
 260     folly::assume_unreachable();
 261   }
 262
 263   void push_back(ThreadEntry* t) {
 264     t->next = &head_;
 265     t->prev = head_.prev;
 266     head_.prev->next = t;
 267     head_.prev = t;
 268   }
 269
 270   void erase(ThreadEntry* t) {
 271     t->next->prev = t->prev;
 272     t->prev->next = t->next;
 273     t->next = t->prev = t;
 274   }
 275
 276   static void onThreadExit(void* ptr);
 277
 278   uint32_t allocate(EntryID* ent);
 279
 280   void destroy(EntryID* ent);
 281
 282   /**
 283    * Reserve enough space in the ThreadEntry::elements for the item
 284    * @id to fit in.
 285    */
 286   void reserve(EntryID* id);
 287
 288   ElementWrapper& get(EntryID* ent);
 289
 290   static void initAtFork();
 291   static void registerAtFork(
 292       folly::Function<void()> prepare,
 293       folly::Function<void()> parent,
 294       folly::Function<void()> child);
 295
 296   uint32_t nextId_;
 297   std::vector<uint32_t> freeIds_;
 298   std::mutex lock_;
 299   SharedMutex accessAllThreadsLock_;
 300   pthread_key_t pthreadKey_;
 301   ThreadEntry head_;
 302   ThreadEntry* (*threadEntry_)();
 303   bool strict_;
 304 };
 305
 306 // Held in a singleton to track our global instances.
 307 // We have one of these per "Tag", by default one for the whole system
 308 // (Tag=void).
 309 //
 310 // Creating and destroying ThreadLocalPtr objects, as well as thread exit
 311 // for threads that use ThreadLocalPtr objects collide on a lock inside
 312 // StaticMeta; you can specify multiple Tag types to break that lock.
 313 template <class Tag, class AccessMode>
 314 struct StaticMeta : StaticMetaBase {
 315   StaticMeta()
 316       : StaticMetaBase(
 317             &StaticMeta::getThreadEntrySlow,
 318             std::is_same<AccessMode, AccessModeStrict>::value) {
 319     registerAtFork(
 320         /*prepare*/ &StaticMeta::preFork,
 321         /*parent*/ &StaticMeta::onForkParent,
 322         /*child*/ &StaticMeta::onForkChild);
 323   }
 324
 325   static StaticMeta<Tag, AccessMode>& instance() {
 326     // Leak it on exit, there's only one per process and we don't have to
 327     // worry about synchronization with exiting threads.
 328     /* library-local */ static auto instance =
 329         detail::createGlobal<StaticMeta<Tag, AccessMode>, void>();
 330     return *instance;
 331   }
 332
 333   ElementWrapper& get(EntryID* ent) {
 334     ThreadEntry* threadEntry = getThreadEntry();
 335     uint32_t id = ent->getOrInvalid();
 336     // if id is invalid, it is equal to uint32_t's max value.
 337     // x <= max value is always true
 338     if (UNLIKELY(threadEntry->elementsCapacity <= id)) {
 339       reserve(ent);
 340       id = ent->getOrInvalid();
 341       assert(threadEntry->elementsCapacity > id);
 342     }
 343     return threadEntry->elements[id];
 344   }
 345
 346   static ThreadEntry* getThreadEntrySlow() {
 347     auto& meta = instance();
 348     auto key = meta.pthreadKey_;
 349     ThreadEntry* threadEntry =
 350       static_cast<ThreadEntry*>(pthread_getspecific(key));
 351     if (!threadEntry) {
 352 #ifdef FOLLY_TLD_USE_FOLLY_TLS
 353       static FOLLY_TLS ThreadEntry threadEntrySingleton;
 354       threadEntry = &threadEntrySingleton;
 355 #else
 356       threadEntry = new ThreadEntry();
 357 #endif
 358       threadEntry->meta = &meta;
 359       int ret = pthread_setspecific(key, threadEntry);
 360       checkPosixError(ret, "pthread_setspecific failed");
 361     }
 362     return threadEntry;
 363   }
 364
 365   inline static ThreadEntry* getThreadEntry() {
 366 #ifdef FOLLY_TLD_USE_FOLLY_TLS
 367     static FOLLY_TLS ThreadEntry* threadEntryCache{nullptr};
 368     if (UNLIKELY(threadEntryCache == nullptr)) {
 369       threadEntryCache = instance().threadEntry_();
 370     }
 371     return threadEntryCache;
 372 #else
 373     return instance().threadEntry_();
 374 #endif
 375   }
 376
 377   static void preFork(void) {
 378     instance().lock_.lock();  // Make sure it's created
 379   }
 380
 381   static void onForkParent(void) { instance().lock_.unlock(); }
 382
 383   static void onForkChild(void) {
 384     // only the current thread survives
 385     instance().head_.next = instance().head_.prev = &instance().head_;
 386     ThreadEntry* threadEntry = getThreadEntry();
 387     // If this thread was in the list before the fork, add it back.
 388     if (threadEntry->elementsCapacity != 0) {
 389       instance().push_back(threadEntry);
 390     }
 391     instance().lock_.unlock();
 392   }
 393 };
 394
 395 }  // namespace threadlocal_detail
 396 }  // namespace folly