2 * Copyright 2012 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "folly/ThreadLocal.h"
20 #include <unordered_map>
24 #include <condition_variable>
26 #include <boost/thread/tss.hpp>
27 #include <gtest/gtest.h>
28 #include <gflags/gflags.h>
29 #include <glog/logging.h>
30 #include "folly/Benchmark.h"
32 using namespace folly;
41 static void customDeleter(Widget* w, TLPDestructionMode mode) {
42 totalVal_ += (mode == TLPDestructionMode::ALL_THREADS) * 1000;
46 int Widget::totalVal_ = 0;
48 TEST(ThreadLocalPtr, BasicDestructor) {
49 Widget::totalVal_ = 0;
50 ThreadLocalPtr<Widget> w;
52 w.reset(new Widget());
55 EXPECT_EQ(10, Widget::totalVal_);
58 TEST(ThreadLocalPtr, CustomDeleter1) {
59 Widget::totalVal_ = 0;
61 ThreadLocalPtr<Widget> w;
63 w.reset(new Widget(), Widget::customDeleter);
66 EXPECT_EQ(10, Widget::totalVal_);
68 EXPECT_EQ(10, Widget::totalVal_);
71 // Test deleting the ThreadLocalPtr object
72 TEST(ThreadLocalPtr, CustomDeleter2) {
73 Widget::totalVal_ = 0;
76 std::condition_variable cv;
82 State state = State::START;
84 ThreadLocalPtr<Widget> w;
85 t = std::thread([&]() {
86 w.reset(new Widget(), Widget::customDeleter);
89 // Notify main thread that we're done
91 std::unique_lock<std::mutex> lock(mutex);
96 // Wait for main thread to allow us to exit
98 std::unique_lock<std::mutex> lock(mutex);
99 while (state != State::EXIT) {
105 // Wait for main thread to start (and set w.get()->val_)
107 std::unique_lock<std::mutex> lock(mutex);
108 while (state != State::DONE) {
113 // Thread started but hasn't exited yet
114 EXPECT_EQ(0, Widget::totalVal_);
116 // Destroy ThreadLocalPtr<Widget> (by letting it go out of scope)
119 EXPECT_EQ(1010, Widget::totalVal_);
121 // Allow thread to exit
123 std::unique_lock<std::mutex> lock(mutex);
129 EXPECT_EQ(1010, Widget::totalVal_);
132 TEST(ThreadLocal, BasicDestructor) {
133 Widget::totalVal_ = 0;
134 ThreadLocal<Widget> w;
135 std::thread([&w]() { w->val_ += 10; }).join();
136 EXPECT_EQ(10, Widget::totalVal_);
139 TEST(ThreadLocal, SimpleRepeatDestructor) {
140 Widget::totalVal_ = 0;
142 ThreadLocal<Widget> w;
146 ThreadLocal<Widget> w;
149 EXPECT_EQ(20, Widget::totalVal_);
152 TEST(ThreadLocal, InterleavedDestructors) {
153 Widget::totalVal_ = 0;
154 ThreadLocal<Widget>* w = NULL;
156 const int wVersionMax = 2;
159 auto th = std::thread([&]() {
160 int wVersionPrev = 0;
163 std::lock_guard<std::mutex> g(lock);
164 if (wVersion > wVersionMax) {
167 if (wVersion > wVersionPrev) {
168 // We have a new version of w, so it should be initialized to zero
169 EXPECT_EQ((*w)->val_, 0);
173 std::lock_guard<std::mutex> g(lock);
174 wVersionPrev = wVersion;
179 FOR_EACH_RANGE(i, 0, wVersionMax) {
182 std::lock_guard<std::mutex> g(lock);
185 w = new ThreadLocal<Widget>();
189 std::lock_guard<std::mutex> g(lock);
190 if (thIter > thIterPrev) {
196 std::lock_guard<std::mutex> g(lock);
197 wVersion = wVersionMax + 1;
200 EXPECT_EQ(wVersionMax * 10, Widget::totalVal_);
203 class SimpleThreadCachedInt {
206 ThreadLocal<int,NewTag> val_;
215 for (const auto& i : val_.accessAllThreads()) {
222 TEST(ThreadLocalPtr, AccessAllThreadsCounter) {
223 const int kNumThreads = 10;
224 SimpleThreadCachedInt stci;
225 std::atomic<bool> run(true);
226 std::atomic<int> totalAtomic(0);
227 std::vector<std::thread> threads;
228 for (int i = 0; i < kNumThreads; ++i) {
229 threads.push_back(std::thread([&,i]() {
231 totalAtomic.fetch_add(1);
232 while (run.load()) { usleep(100); }
235 while (totalAtomic.load() != kNumThreads) { usleep(100); }
236 EXPECT_EQ(kNumThreads, stci.read());
238 for (auto& t : threads) {
243 TEST(ThreadLocal, resetNull) {
245 tl.reset(new int(4));
246 EXPECT_EQ(*tl.get(), 4);
248 EXPECT_EQ(*tl.get(), 0);
249 tl.reset(new int(5));
250 EXPECT_EQ(*tl.get(), 5);
257 folly::ThreadLocal<int, Tag> tl;
261 TEST(ThreadLocal, Movable1) {
264 EXPECT_TRUE(a.tl.get() != b.tl.get());
268 EXPECT_TRUE(a.tl.get() != b.tl.get());
271 TEST(ThreadLocal, Movable2) {
272 std::map<int, Foo> map;
280 for (auto& m : map) {
281 tls.insert(m.second.tl.get());
284 // Make sure that we have 4 different instances of *tl
285 EXPECT_EQ(4, tls.size());
288 // Simple reference implementation using pthread_get_specific
290 class PThreadGetSpecific {
292 PThreadGetSpecific() : key_(0) {
293 pthread_key_create(&key_, OnThreadExit);
297 return static_cast<T*>(pthread_getspecific(key_));
302 pthread_setspecific(key_, t);
304 static void OnThreadExit(void* obj) {
305 delete static_cast<T*>(obj);
311 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
314 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
315 const int itersPerThread = iters / FLAGS_numThreads; \
316 std::vector<std::thread> threads; \
317 for (int i = 0; i < FLAGS_numThreads; ++i) { \
318 threads.push_back(std::thread([&]() { \
319 var.reset(new int(0)); \
320 for (int i = 0; i < itersPerThread; ++i) { \
325 for (auto& t : threads) { \
330 ThreadLocalPtr<int> tlp;
332 PThreadGetSpecific<int> pthread_get_specific;
333 REG(pthread_get_specific);
334 boost::thread_specific_ptr<int> boost_tsp;
336 BENCHMARK_DRAW_LINE();
338 int main(int argc, char** argv) {
339 testing::InitGoogleTest(&argc, argv);
340 google::ParseCommandLineFlags(&argc, &argv, true);
341 google::SetCommandLineOptionWithMode(
342 "bm_max_iters", "100000000", google::SET_FLAG_IF_DEFAULT
344 if (FLAGS_benchmark) {
345 folly::runBenchmarks();
347 return RUN_ALL_TESTS();
351 Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches
353 Benchmark Iters Total t t/iter iter/sec
354 ------------------------------------------------------------------------------
355 * BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G
356 +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G
357 + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M
358 ------------------------------------------------------------------------------