2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/ThreadLocal.h>
19 #include <sys/types.h>
26 #include <condition_variable>
31 #include <unordered_map>
33 #include <boost/thread/tss.hpp>
34 #include <gflags/gflags.h>
35 #include <glog/logging.h>
36 #include <gtest/gtest.h>
38 #include <folly/Benchmark.h>
40 using namespace folly;
49 static void customDeleter(Widget* w, TLPDestructionMode mode) {
50 totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
54 int Widget::totalVal_ = 0;
56 TEST(ThreadLocalPtr, BasicDestructor) {
57 Widget::totalVal_ = 0;
58 ThreadLocalPtr<Widget> w;
60 w.reset(new Widget());
63 EXPECT_EQ(10, Widget::totalVal_);
66 TEST(ThreadLocalPtr, CustomDeleter1) {
67 Widget::totalVal_ = 0;
69 ThreadLocalPtr<Widget> w;
71 w.reset(new Widget(), Widget::customDeleter);
74 EXPECT_EQ(10, Widget::totalVal_);
76 EXPECT_EQ(10, Widget::totalVal_);
79 TEST(ThreadLocalPtr, resetNull) {
80 ThreadLocalPtr<int> tl;
83 EXPECT_TRUE(static_cast<bool>(tl));
84 EXPECT_EQ(*tl.get(), 4);
89 TEST(ThreadLocalPtr, TestRelease) {
90 Widget::totalVal_ = 0;
91 ThreadLocalPtr<Widget> w;
92 std::unique_ptr<Widget> wPtr;
93 std::thread([&w, &wPtr]() {
94 w.reset(new Widget());
97 wPtr.reset(w.release());
99 EXPECT_EQ(0, Widget::totalVal_);
101 EXPECT_EQ(10, Widget::totalVal_);
104 // Test deleting the ThreadLocalPtr object
105 TEST(ThreadLocalPtr, CustomDeleter2) {
106 Widget::totalVal_ = 0;
109 std::condition_variable cv;
115 State state = State::START;
117 ThreadLocalPtr<Widget> w;
118 t = std::thread([&]() {
119 w.reset(new Widget(), Widget::customDeleter);
122 // Notify main thread that we're done
124 std::unique_lock<std::mutex> lock(mutex);
129 // Wait for main thread to allow us to exit
131 std::unique_lock<std::mutex> lock(mutex);
132 while (state != State::EXIT) {
138 // Wait for main thread to start (and set w.get()->val_)
140 std::unique_lock<std::mutex> lock(mutex);
141 while (state != State::DONE) {
146 // Thread started but hasn't exited yet
147 EXPECT_EQ(0, Widget::totalVal_);
149 // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
152 EXPECT_EQ(1010, Widget::totalVal_);
154 // Allow thread to exit
156 std::unique_lock<std::mutex> lock(mutex);
162 EXPECT_EQ(1010, Widget::totalVal_);
165 TEST(ThreadLocal, BasicDestructor) {
166 Widget::totalVal_ = 0;
167 ThreadLocal<Widget> w;
168 std::thread([&w]() { w->val_ += 10; }).join();
169 EXPECT_EQ(10, Widget::totalVal_);
172 TEST(ThreadLocal, SimpleRepeatDestructor) {
173 Widget::totalVal_ = 0;
175 ThreadLocal<Widget> w;
179 ThreadLocal<Widget> w;
182 EXPECT_EQ(20, Widget::totalVal_);
185 TEST(ThreadLocal, InterleavedDestructors) {
186 Widget::totalVal_ = 0;
187 std::unique_ptr<ThreadLocal<Widget>> w;
189 const int wVersionMax = 2;
192 auto th = std::thread([&]() {
193 int wVersionPrev = 0;
196 std::lock_guard<std::mutex> g(lock);
197 if (wVersion > wVersionMax) {
200 if (wVersion > wVersionPrev) {
201 // We have a new version of w, so it should be initialized to zero
202 EXPECT_EQ((*w)->val_, 0);
206 std::lock_guard<std::mutex> g(lock);
207 wVersionPrev = wVersion;
212 FOR_EACH_RANGE(i, 0, wVersionMax) {
215 std::lock_guard<std::mutex> g(lock);
217 w.reset(new ThreadLocal<Widget>());
221 std::lock_guard<std::mutex> g(lock);
222 if (thIter > thIterPrev) {
228 std::lock_guard<std::mutex> g(lock);
229 wVersion = wVersionMax + 1;
232 EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
235 class SimpleThreadCachedInt {
238 ThreadLocal<int,NewTag> val_;
247 for (const auto& i : val_.accessAllThreads()) {
254 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
255 const int kNumThreads = 10;
256 SimpleThreadCachedInt stci;
257 std::atomic<bool> run(true);
258 std::atomic<int> totalAtomic(0);
259 std::vector<std::thread> threads;
260 for (int i = 0; i < kNumThreads; ++i) {
261 threads.push_back(std::thread([&,i]() {
263 totalAtomic.fetch_add(1);
264 while (run.load()) { usleep(100); }
267 while (totalAtomic.load() != kNumThreads) { usleep(100); }
268 EXPECT_EQ(kNumThreads, stci.read());
270 for (auto& t : threads) {
275 TEST(ThreadLocal, resetNull) {
277 tl.reset(new int(4));
278 EXPECT_EQ(*tl.get(), 4);
280 EXPECT_EQ(*tl.get(), 0);
281 tl.reset(new int(5));
282 EXPECT_EQ(*tl.get(), 5);
289 folly::ThreadLocal<int, Tag> tl;
293 TEST(ThreadLocal, Movable1) {
296 EXPECT_TRUE(a.tl.get() != b.tl.get());
300 EXPECT_TRUE(a.tl.get() != b.tl.get());
303 TEST(ThreadLocal, Movable2) {
304 std::map<int, Foo> map;
312 for (auto& m : map) {
313 tls.insert(m.second.tl.get());
316 // Make sure that we have 4 different instances of *tl
317 EXPECT_EQ(4, tls.size());
322 constexpr size_t kFillObjectSize = 300;
324 std::atomic<uint64_t> gDestroyed;
327 * Fill a chunk of memory with a unique-ish pattern that includes the thread id
328 * (so deleting one of these from another thread would cause a failure)
330 * Verify it explicitly and on destruction.
334 explicit FillObject(uint64_t idx) : idx_(idx) {
336 for (size_t i = 0; i < kFillObjectSize; ++i) {
343 for (size_t i = 0; i < kFillObjectSize; ++i) {
344 CHECK_EQ(v, data_[i]);
353 uint64_t val() const {
354 return (idx_ << 40) | uint64_t(pthread_self());
358 uint64_t data_[kFillObjectSize];
363 #if FOLLY_HAVE_STD__THIS_THREAD__SLEEP_FOR
364 TEST(ThreadLocal, Stress) {
365 constexpr size_t numFillObjects = 250;
366 std::array<ThreadLocalPtr<FillObject>, numFillObjects> objects;
368 constexpr size_t numThreads = 32;
369 constexpr size_t numReps = 20;
371 std::vector<std::thread> threads;
372 threads.reserve(numThreads);
374 for (size_t i = 0; i < numThreads; ++i) {
375 threads.emplace_back([&objects] {
376 for (size_t rep = 0; rep < numReps; ++rep) {
377 for (size_t i = 0; i < objects.size(); ++i) {
378 objects[i].reset(new FillObject(rep * objects.size() + i));
379 std::this_thread::sleep_for(std::chrono::microseconds(100));
381 for (size_t i = 0; i < objects.size(); ++i) {
388 for (auto& t : threads) {
392 EXPECT_EQ(numFillObjects * numThreads * numReps, gDestroyed);
396 // Yes, threads and fork don't mix
397 // (http://cppwisdom.quora.com/Why-threads-and-fork-dont-mix) but if you're
398 // stupid or desperate enough to try, we shouldn't stand in your way.
402 HoldsOne() : value_(1) { }
403 // Do an actual access to catch the buggy case where this == nullptr
404 int value() const { return value_; }
409 struct HoldsOneTag {};
411 ThreadLocal<HoldsOne, HoldsOneTag> ptr;
415 for (auto& p : ptr.accessAllThreads()) {
423 TEST(ThreadLocal, Fork) {
424 EXPECT_EQ(1, ptr->value()); // ensure created
425 EXPECT_EQ(1, totalValue());
426 // Spawn a new thread
429 bool started = false;
430 std::condition_variable startedCond;
431 bool stopped = false;
432 std::condition_variable stoppedCond;
434 std::thread t([&] () {
435 EXPECT_EQ(1, ptr->value()); // ensure created
437 std::unique_lock<std::mutex> lock(mutex);
439 startedCond.notify_all();
442 std::unique_lock<std::mutex> lock(mutex);
444 stoppedCond.wait(lock);
450 std::unique_lock<std::mutex> lock(mutex);
452 startedCond.wait(lock);
456 EXPECT_EQ(2, totalValue());
461 int v = totalValue();
463 // exit successfully if v == 1 (one thread)
464 // diagnostic error code otherwise :)
470 } else if (pid > 0) {
473 EXPECT_EQ(pid, waitpid(pid, &status, 0));
474 EXPECT_TRUE(WIFEXITED(status));
475 EXPECT_EQ(0, WEXITSTATUS(status));
477 EXPECT_TRUE(false) << "fork failed";
480 EXPECT_EQ(2, totalValue());
483 std::unique_lock<std::mutex> lock(mutex);
485 stoppedCond.notify_all();
490 EXPECT_EQ(1, totalValue());
493 struct HoldsOneTag2 {};
495 TEST(ThreadLocal, Fork2) {
496 // A thread-local tag that was used in the parent from a *different* thread
497 // (but not the forking thread) would cause the child to hang in a
498 // ThreadLocalPtr's object destructor. Yeah.
499 ThreadLocal<HoldsOne, HoldsOneTag2> p;
501 // use tag in different thread
502 std::thread t([&p] { p.get(); });
508 ThreadLocal<HoldsOne, HoldsOneTag2> q;
512 } else if (pid > 0) {
514 EXPECT_EQ(pid, waitpid(pid, &status, 0));
515 EXPECT_TRUE(WIFEXITED(status));
516 EXPECT_EQ(0, WEXITSTATUS(status));
518 EXPECT_TRUE(false) << "fork failed";
522 // Simple reference implementation using pthread_get_specific
524 class PThreadGetSpecific {
526 PThreadGetSpecific() : key_(0) {
527 pthread_key_create(&key_, OnThreadExit);
531 return static_cast<T*>(pthread_getspecific(key_));
536 pthread_setspecific(key_, t);
538 static void OnThreadExit(void* obj) {
539 delete static_cast<T*>(obj);
545 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
548 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
549 const int itersPerThread = iters / FLAGS_numThreads; \
550 std::vector<std::thread> threads; \
551 for (int i = 0; i < FLAGS_numThreads; ++i) { \
552 threads.push_back(std::thread([&]() { \
553 var.reset(new int(0)); \
554 for (int i = 0; i < itersPerThread; ++i) { \
559 for (auto& t : threads) { \
564 ThreadLocalPtr<int> tlp;
566 PThreadGetSpecific<int> pthread_get_specific;
567 REG(pthread_get_specific);
568 boost::thread_specific_ptr<int> boost_tsp;
570 BENCHMARK_DRAW_LINE();
572 int main(int argc, char** argv) {
573 testing::InitGoogleTest(&argc, argv);
574 gflags::ParseCommandLineFlags(&argc, &argv, true);
575 gflags::SetCommandLineOptionWithMode(
576 "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT
578 if (FLAGS_benchmark) {
579 folly::runBenchmarks();
581 return RUN_ALL_TESTS();
585 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
587 Benchmark Iters Total t t/iter iter/sec
588 ------------------------------------------------------------------------------
589 * BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G
590 +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G
591 + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M
592 ------------------------------------------------------------------------------