2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include <folly/ThreadLocal.h>
19 #include <sys/types.h>
23 #include <condition_variable>
29 #include <boost/thread/tss.hpp>
30 #include <glog/logging.h>
32 #include <folly/Benchmark.h>
33 #include <folly/experimental/io/FsUtil.h>
34 #include <folly/portability/GFlags.h>
36 using namespace folly;
38 // Simple reference implementation using pthread_get_specific
40 class PThreadGetSpecific {
42 PThreadGetSpecific() : key_(0) { pthread_key_create(&key_, OnThreadExit); }
44 T* get() const { return static_cast<T*>(pthread_getspecific(key_)); }
48 pthread_setspecific(key_, t);
50 static void OnThreadExit(void* obj) { delete static_cast<T*>(obj); }
56 DEFINE_int32(numThreads, 8, "Number simultaneous threads for benchmarks.");
59 BENCHMARK(FB_CONCATENATE(BM_mt_, var), iters) { \
60 const int itersPerThread = iters / FLAGS_numThreads; \
61 std::vector<std::thread> threads; \
62 for (int i = 0; i < FLAGS_numThreads; ++i) { \
63 threads.push_back(std::thread([&]() { \
64 var.reset(new int(0)); \
65 for (int j = 0; j < itersPerThread; ++j) { \
70 for (auto& t : threads) { \
75 ThreadLocalPtr<int> tlp;
77 PThreadGetSpecific<int> pthread_get_specific;
78 REG(pthread_get_specific);
79 boost::thread_specific_ptr<int> boost_tsp;
81 BENCHMARK_DRAW_LINE();
88 template <typename TL>
89 void run_multi(uint32_t iters) {
90 const int itersPerThread = iters / FLAGS_numThreads;
91 std::vector<std::thread> threads;
93 for (int i = 0; i < FLAGS_numThreads; ++i) {
94 threads.push_back(std::thread([&]() {
96 for (int j = 0; j < itersPerThread; ++j) {
98 var.get()->b += var.get()->a;
100 var.get()->b += var.get()->a;
104 for (auto& t : threads) {
109 BENCHMARK(BM_mt_tlp_multi, iters) {
110 run_multi<ThreadLocalPtr<foo>>(iters);
112 BENCHMARK(BM_mt_pthread_get_specific_multi, iters) {
113 run_multi<PThreadGetSpecific<foo>>(iters);
115 BENCHMARK(BM_mt_boost_tsp_multi, iters) {
116 run_multi<boost::thread_specific_ptr<foo>>(iters);
118 BENCHMARK_DRAW_LINE();
120 int main(int argc, char** argv) {
121 gflags::ParseCommandLineFlags(&argc, &argv, true);
122 gflags::SetCommandLineOptionWithMode(
123 "bm_max_iters", "100000000", gflags::SET_FLAG_IF_DEFAULT);
124 folly::runBenchmarks();
129 ./buck-out/gen/folly/test/thread_local_benchmark --bm_min_iters=10000000
132 ============================================================================
133 folly/test/ThreadLocalBenchmark.cpp relative time/iter iters/s
134 ============================================================================
135 BM_mt_tlp 1.92ns 520.02M
136 BM_mt_pthread_get_specific 2.69ns 372.15M
137 BM_mt_boost_tsp 11.81ns 84.67M
138 ----------------------------------------------------------------------------
139 BM_mt_tlp_multi 7.53ns 132.79M
140 BM_mt_pthread_get_specific_multi 15.80ns 63.29M
141 BM_mt_boost_tsp_multi 71.70ns 13.95M
142 ----------------------------------------------------------------------------
143 ============================================================================