From 13de7740c5b72cf7adafad79a22757ab0848120a Mon Sep 17 00:00:00 2001 From: Lovro Puzar Date: Wed, 17 Feb 2016 05:22:40 -0800 Subject: [PATCH] Add call_once, wrapper around std::call_once with a fast path Summary: std::call_once is a nice API but the current GCC implementation is slower than it needs to be. Adding a header-only wrapper with an additional atomic bool. Reviewed By: luciang Differential Revision: D2938884 fb-gh-sync-id: 5939c94fe62a1523053dcff26c880ecaec9e1150 shipit-source-id: 5939c94fe62a1523053dcff26c880ecaec9e1150 --- folly/CallOnce.h | 79 ++++++++++++++++++++++++++++++++++ folly/Makefile.am | 1 + folly/Random.cpp | 5 ++- folly/detail/Futex.cpp | 7 +-- folly/test/CallOnceTest.cpp | 85 +++++++++++++++++++++++++++++++++++++ 5 files changed, 172 insertions(+), 5 deletions(-) create mode 100644 folly/CallOnce.h create mode 100644 folly/test/CallOnceTest.cpp diff --git a/folly/CallOnce.h b/folly/CallOnce.h new file mode 100644 index 00000000..01118c04 --- /dev/null +++ b/folly/CallOnce.h @@ -0,0 +1,79 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Drop-in replacement for std::call_once() with a fast path, which the GCC + * implementation lacks. The tradeoff is a slightly larger `once_flag' struct + * (8 bytes vs 4 bytes with GCC on Linux/x64). + * + * $ call_once_test --benchmark --bm_min_iters=100000000 --threads=16 + * ============================================================================ + * folly/test/CallOnceTest.cpp relative time/iter iters/s + * ============================================================================ + * StdCallOnceBench 3.54ns 282.82M + * FollyCallOnceBench 698.48ps 1.43G + * ============================================================================ + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace folly { + +class once_flag { + public: + constexpr once_flag() noexcept = default; + once_flag(const once_flag&) = delete; + once_flag& operator=(const once_flag&) = delete; + + template + friend void call_once(once_flag& flag, Callable&& f, Args&&... args); + template + friend void call_once_impl_no_inline(once_flag& flag, + Callable&& f, + Args&&... args); + + private: + std::atomic called_{false}; + std::once_flag std_once_flag_; +}; + +template +void FOLLY_ALWAYS_INLINE +call_once(once_flag& flag, Callable&& f, Args&&... args) { + if (LIKELY(flag.called_.load(std::memory_order_acquire))) { + return; + } + call_once_impl_no_inline( + flag, std::forward(f), std::forward(args)...); +} + +// Implementation detail: out-of-line slow path +template +void FOLLY_NOINLINE +call_once_impl_no_inline(once_flag& flag, Callable&& f, Args&&... args) { + std::call_once(flag.std_once_flag_, + std::forward(f), + std::forward(args)...); + flag.called_.store(true, std::memory_order_release); +} +} diff --git a/folly/Makefile.am b/folly/Makefile.am index 87c984e2..65408068 100644 --- a/folly/Makefile.am +++ b/folly/Makefile.am @@ -38,6 +38,7 @@ nobase_follyinclude_HEADERS = \ Baton.h \ Benchmark.h \ Bits.h \ + CallOnce.h \ Checksum.h \ ConcurrentSkipList.h \ ConcurrentSkipList-inl.h \ diff --git a/folly/Random.cpp b/folly/Random.cpp index dfe9270b..ef2e8b9c 100644 --- a/folly/Random.cpp +++ b/folly/Random.cpp @@ -24,6 +24,7 @@ #include #include +#include #include #include #include @@ -38,9 +39,9 @@ namespace { void readRandomDevice(void* data, size_t size) { #ifdef _MSC_VER - static std::once_flag flag; + static folly::once_flag flag; static HCRYPTPROV cryptoProv; - std::call_once(flag, [&] { + folly::call_once(flag, [&] { PCHECK(CryptAcquireContext(&cryptoProv, nullptr, nullptr, PROV_RSA_FULL, 0)); }); diff --git a/folly/detail/Futex.cpp b/folly/detail/Futex.cpp index 0e429237..0ba72ebf 100644 --- a/folly/detail/Futex.cpp +++ b/folly/detail/Futex.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -187,10 +188,10 @@ struct EmulatedFutexBucket { static const size_t kNumBuckets = 4096; static EmulatedFutexBucket* gBuckets; - static std::once_flag gBucketInit; + static folly::once_flag gBucketInit; static EmulatedFutexBucket& bucketFor(void* addr) { - std::call_once(gBucketInit, [](){ + folly::call_once(gBucketInit, [](){ gBuckets = new EmulatedFutexBucket[kNumBuckets]; }); uint64_t mixedBits = folly::hash::twang_mix64( @@ -200,7 +201,7 @@ struct EmulatedFutexBucket { }; EmulatedFutexBucket* EmulatedFutexBucket::gBuckets; -std::once_flag EmulatedFutexBucket::gBucketInit; +folly::once_flag EmulatedFutexBucket::gBucketInit; int emulatedFutexWake(void* addr, int count, uint32_t waitMask) { auto& bucket = EmulatedFutexBucket::bucketFor(addr); diff --git a/folly/test/CallOnceTest.cpp b/folly/test/CallOnceTest.cpp new file mode 100644 index 00000000..f1b1d719 --- /dev/null +++ b/folly/test/CallOnceTest.cpp @@ -0,0 +1,85 @@ +/* + * Copyright 2016 Facebook, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +DEFINE_int32(threads, 16, "benchmark concurrency"); + +template +void bm_impl(CallOnceFunc&& fn, int64_t iters) { + std::deque threads; + for (int i = 0; i < FLAGS_threads; ++i) { + threads.emplace_back([&fn, iters] { + for (int64_t j = 0; j < iters; ++j) { + fn(); + } + }); + } + for (std::thread& t : threads) { + t.join(); + } +} + +BENCHMARK(StdCallOnceBench, iters) { + std::once_flag flag; + int out = 0; + bm_impl([&] { std::call_once(flag, [&] { ++out; }); }, iters); + ASSERT_EQ(1, out); +} + +BENCHMARK(FollyCallOnceBench, iters) { + folly::once_flag flag; + int out = 0; + bm_impl([&] { folly::call_once(flag, [&] { ++out; }); }, iters); + ASSERT_EQ(1, out); +} + +TEST(FollyCallOnce, Simple) { + folly::once_flag flag; + auto fn = [&](int* outp) { ++*outp; }; + int out = 0; + folly::call_once(flag, fn, &out); + folly::call_once(flag, fn, &out); + ASSERT_EQ(1, out); +} + +TEST(FollyCallOnce, Stress) { + for (int i = 0; i < 100; ++i) { + folly::once_flag flag; + int out = 0; + bm_impl([&] { folly::call_once(flag, [&] { ++out; }); }, 100); + ASSERT_EQ(1, out); + } +} + +int main(int argc, char** argv) { + testing::InitGoogleTest(&argc, argv); + gflags::ParseCommandLineFlags(&argc, &argv, true); + if (FLAGS_benchmark) { + folly::runBenchmarksOnFlag(); + return 0; + } else { + return RUN_ALL_TESTS(); + } +} -- 2.34.1