Add call_once, wrapper around std::call_once with a fast path
authorLovro Puzar <lovro@fb.com>
Wed, 17 Feb 2016 13:22:40 +0000 (05:22 -0800)
committerfacebook-github-bot-0 <folly-bot@fb.com>
Wed, 17 Feb 2016 14:20:26 +0000 (06:20 -0800)
Summary: std::call_once is a nice API but the current GCC implementation is slower than it needs to be.  Adding a header-only wrapper with an additional atomic bool.

Reviewed By: luciang

Differential Revision: D2938884

fb-gh-sync-id: 5939c94fe62a1523053dcff26c880ecaec9e1150
shipit-source-id: 5939c94fe62a1523053dcff26c880ecaec9e1150

folly/CallOnce.h [new file with mode: 0644]
folly/Makefile.am
folly/Random.cpp
folly/detail/Futex.cpp
folly/test/CallOnceTest.cpp [new file with mode: 0644]

diff --git a/folly/CallOnce.h b/folly/CallOnce.h
new file mode 100644 (file)
index 0000000..01118c0
--- /dev/null
@@ -0,0 +1,79 @@
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * Drop-in replacement for std::call_once() with a fast path, which the GCC
+ * implementation lacks.  The tradeoff is a slightly larger `once_flag' struct
+ * (8 bytes vs 4 bytes with GCC on Linux/x64).
+ *
+ * $ call_once_test --benchmark --bm_min_iters=100000000 --threads=16
+ * ============================================================================
+ * folly/test/CallOnceTest.cpp                     relative  time/iter  iters/s
+ * ============================================================================
+ * StdCallOnceBench                                             3.54ns  282.82M
+ * FollyCallOnceBench                                         698.48ps    1.43G
+ * ============================================================================
+ */
+
+#pragma once
+
+#include <atomic>
+#include <mutex>
+#include <utility>
+
+#include <folly/Likely.h>
+#include <folly/Portability.h>
+
+namespace folly {
+
+class once_flag {
+ public:
+  constexpr once_flag() noexcept = default;
+  once_flag(const once_flag&) = delete;
+  once_flag& operator=(const once_flag&) = delete;
+
+  template <typename Callable, class... Args>
+  friend void call_once(once_flag& flag, Callable&& f, Args&&... args);
+  template <typename Callable, class... Args>
+  friend void call_once_impl_no_inline(once_flag& flag,
+                                       Callable&& f,
+                                       Args&&... args);
+
+ private:
+  std::atomic<bool> called_{false};
+  std::once_flag std_once_flag_;
+};
+
+template <class Callable, class... Args>
+void FOLLY_ALWAYS_INLINE
+call_once(once_flag& flag, Callable&& f, Args&&... args) {
+  if (LIKELY(flag.called_.load(std::memory_order_acquire))) {
+    return;
+  }
+  call_once_impl_no_inline(
+      flag, std::forward<Callable>(f), std::forward<Args>(args)...);
+}
+
+// Implementation detail: out-of-line slow path
+template <class Callable, class... Args>
+void FOLLY_NOINLINE
+call_once_impl_no_inline(once_flag& flag, Callable&& f, Args&&... args) {
+  std::call_once(flag.std_once_flag_,
+                 std::forward<Callable>(f),
+                 std::forward<Args>(args)...);
+  flag.called_.store(true, std::memory_order_release);
+}
+}
index 87c984e2f54ae9843c848b9acacf52d5ceaafd8d..65408068567b3e18f240171390de8694c5748a5e 100644 (file)
@@ -38,6 +38,7 @@ nobase_follyinclude_HEADERS = \
        Baton.h \
        Benchmark.h \
        Bits.h \
+       CallOnce.h \
        Checksum.h \
        ConcurrentSkipList.h \
        ConcurrentSkipList-inl.h \
index dfe9270bf15517109f34a4436d5a09256c9ce2ec..ef2e8b9cc5d6bc570870614398d1e31111246cee 100644 (file)
@@ -24,6 +24,7 @@
 #include <array>
 
 #include <glog/logging.h>
+#include <folly/CallOnce.h>
 #include <folly/File.h>
 #include <folly/FileUtil.h>
 #include <folly/ThreadLocal.h>
@@ -38,9 +39,9 @@ namespace {
 
 void readRandomDevice(void* data, size_t size) {
 #ifdef _MSC_VER
-  static std::once_flag flag;
+  static folly::once_flag flag;
   static HCRYPTPROV cryptoProv;
-  std::call_once(flag, [&] {
+  folly::call_once(flag, [&] {
     PCHECK(CryptAcquireContext(&cryptoProv, nullptr, nullptr,
                                PROV_RSA_FULL, 0));
   });
index 0e4292370ad58833f8454f7336c14eb4e2fd45bc..0ba72ebf1fbfc77446ba73c5daf58b7c86c17633 100644 (file)
@@ -20,6 +20,7 @@
 #include <condition_variable>
 #include <mutex>
 #include <boost/intrusive/list.hpp>
+#include <folly/CallOnce.h>
 #include <folly/Hash.h>
 #include <folly/ScopeGuard.h>
 
@@ -187,10 +188,10 @@ struct EmulatedFutexBucket {
 
   static const size_t kNumBuckets = 4096;
   static EmulatedFutexBucket* gBuckets;
-  static std::once_flag gBucketInit;
+  static folly::once_flag gBucketInit;
 
   static EmulatedFutexBucket& bucketFor(void* addr) {
-    std::call_once(gBucketInit, [](){
+    folly::call_once(gBucketInit, [](){
       gBuckets = new EmulatedFutexBucket[kNumBuckets];
     });
     uint64_t mixedBits = folly::hash::twang_mix64(
@@ -200,7 +201,7 @@ struct EmulatedFutexBucket {
 };
 
 EmulatedFutexBucket* EmulatedFutexBucket::gBuckets;
-std::once_flag EmulatedFutexBucket::gBucketInit;
+folly::once_flag EmulatedFutexBucket::gBucketInit;
 
 int emulatedFutexWake(void* addr, int count, uint32_t waitMask) {
   auto& bucket = EmulatedFutexBucket::bucketFor(addr);
diff --git a/folly/test/CallOnceTest.cpp b/folly/test/CallOnceTest.cpp
new file mode 100644 (file)
index 0000000..f1b1d71
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * Copyright 2016 Facebook, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <deque>
+#include <mutex>
+#include <thread>
+
+#include <folly/Benchmark.h>
+#include <folly/CallOnce.h>
+#include <gflags/gflags.h>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+DEFINE_int32(threads, 16, "benchmark concurrency");
+
+template <typename CallOnceFunc>
+void bm_impl(CallOnceFunc&& fn, int64_t iters) {
+  std::deque<std::thread> threads;
+  for (int i = 0; i < FLAGS_threads; ++i) {
+    threads.emplace_back([&fn, iters] {
+      for (int64_t j = 0; j < iters; ++j) {
+        fn();
+      }
+    });
+  }
+  for (std::thread& t : threads) {
+    t.join();
+  }
+}
+
+BENCHMARK(StdCallOnceBench, iters) {
+  std::once_flag flag;
+  int out = 0;
+  bm_impl([&] { std::call_once(flag, [&] { ++out; }); }, iters);
+  ASSERT_EQ(1, out);
+}
+
+BENCHMARK(FollyCallOnceBench, iters) {
+  folly::once_flag flag;
+  int out = 0;
+  bm_impl([&] { folly::call_once(flag, [&] { ++out; }); }, iters);
+  ASSERT_EQ(1, out);
+}
+
+TEST(FollyCallOnce, Simple) {
+  folly::once_flag flag;
+  auto fn = [&](int* outp) { ++*outp; };
+  int out = 0;
+  folly::call_once(flag, fn, &out);
+  folly::call_once(flag, fn, &out);
+  ASSERT_EQ(1, out);
+}
+
+TEST(FollyCallOnce, Stress) {
+  for (int i = 0; i < 100; ++i) {
+    folly::once_flag flag;
+    int out = 0;
+    bm_impl([&] { folly::call_once(flag, [&] { ++out; }); }, 100);
+    ASSERT_EQ(1, out);
+  }
+}
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  gflags::ParseCommandLineFlags(&argc, &argv, true);
+  if (FLAGS_benchmark) {
+    folly::runBenchmarksOnFlag();
+    return 0;
+  } else {
+    return RUN_ALL_TESTS();
+  }
+}