From: Dave Watson Date: Tue, 21 Nov 2017 15:34:15 +0000 (-0800) Subject: improve ThreadLocalBenchmark X-Git-Tag: v2017.11.27.00~9 X-Git-Url: http://demsky.eecs.uci.edu/git/?a=commitdiff_plain;h=65d63573f08eef7b847ebf38ec74d52a61d3b936;p=folly.git improve ThreadLocalBenchmark Summary: Benchmark appears to be memory bound, and is affected by different cpus. Add a new benchmark that does more work. Reviewed By: yfeldblum Differential Revision: D6380904 fbshipit-source-id: 5cfbaab70379aa4a2923d957e1147d8486deeff7 --- diff --git a/folly/test/ThreadLocalBenchmark.cpp b/folly/test/ThreadLocalBenchmark.cpp index 9eb14ec9..81f00779 100644 --- a/folly/test/ThreadLocalBenchmark.cpp +++ b/folly/test/ThreadLocalBenchmark.cpp @@ -80,6 +80,43 @@ boost::thread_specific_ptr boost_tsp; REG(boost_tsp); BENCHMARK_DRAW_LINE(); +struct foo { + int a{0}; + int b{0}; +}; + +template +void run_multi(uint32_t iters) { + const int itersPerThread = iters / FLAGS_numThreads; + std::vector threads; + TL var; + for (int i = 0; i < FLAGS_numThreads; ++i) { + threads.push_back(std::thread([&]() { + var.reset(new foo); + for (int j = 0; j < itersPerThread; ++j) { + ++var.get()->a; + var.get()->b += var.get()->a; + --var.get()->a; + var.get()->b += var.get()->a; + } + })); + } + for (auto& t : threads) { + t.join(); + } +} + +BENCHMARK(BM_mt_tlp_multi, iters) { + run_multi>(iters); +} +BENCHMARK(BM_mt_pthread_get_specific_multi, iters) { + run_multi>(iters); +} +BENCHMARK(BM_mt_boost_tsp_multi, iters) { + run_multi>(iters); +} +BENCHMARK_DRAW_LINE(); + int main(int argc, char** argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::SetCommandLineOptionWithMode( @@ -89,12 +126,19 @@ int main(int argc, char** argv) { } /* -Ran with 24 threads on dual 12-core Xeon(R) X5650 @ 2.67GHz with 12-MB caches - -Benchmark Iters Total t t/iter iter/sec ------------------------------------------------------------------------------- -* BM_mt_tlp 100000000 39.88 ms 398.8 ps 2.335 G - +5.91% BM_mt_pthread_get_specific 100000000 42.23 ms 422.3 ps 2.205 G - + 295% BM_mt_boost_tsp 100000000 157.8 ms 1.578 ns 604.5 M ------------------------------------------------------------------------------- +./buck-out/gen/folly/test/thread_local_benchmark --bm_min_iters=10000000 +--numThreads=1 + +============================================================================ +folly/test/ThreadLocalBenchmark.cpp relative time/iter iters/s +============================================================================ +BM_mt_tlp 2.30ns 434.53M +BM_mt_pthread_get_specific 2.69ns 371.75M +BM_mt_boost_tsp 11.66ns 85.78M +---------------------------------------------------------------------------- +BM_mt_tlp_multi 12.46ns 80.25M +BM_mt_pthread_get_specific_multi 16.58ns 60.32M +BM_mt_boost_tsp_multi 70.85ns 14.12M +---------------------------------------------------------------------------- +============================================================================ */