From eec32a760c6597c7053fac69d9c844b9ff422677 Mon Sep 17 00:00:00 2001 From: Hans Fugal Date: Mon, 27 Oct 2014 08:53:23 -0700 Subject: [PATCH] (wangle) fix race in Core::detachOne() Summary: In D1618240 I introduced a race condition in `detachOne()`, where `detached_` is incremented and then tested. If the promise and future are racing, they can both see `detached_ == 2` in the conditional, and then they'll both try to free the Core object. This fixes that. It also fixes a related problem (which actually showed up more often with the test I wrote), where we transition into the Done state before setting the value, and then `maybeCallback()` observes the state is Done (because we're just reading an atomic, not grabbing the lock, which is intentional), tries to execute the callback, but `folly::Optional` throws an exception because the value hasn't been set yet (at least in debug it does). I should have listened to my gut and kept the state assignment after the transition action in the first place. Test Plan: New unit test Reviewed By: jsedgwick@fb.com Subscribers: trunkagent, net-systems@, fugalh, exa, njormrod, folly-diffs@, mnd FB internal diff: D1636490 Tasks: 5438209 Blame Revision: D1618240 --- folly/wangle/detail/Core.h | 7 ++++--- folly/wangle/detail/FSM.h | 6 +++++- folly/wangle/test/FSM.cpp | 4 ++-- folly/wangle/test/FutureTest.cpp | 25 ++++++++++++++++++++++++- 4 files changed, 35 insertions(+), 7 deletions(-) diff --git a/folly/wangle/detail/Core.h b/folly/wangle/detail/Core.h index 9b9295d8..f3aa6d24 100644 --- a/folly/wangle/detail/Core.h +++ b/folly/wangle/detail/Core.h @@ -205,9 +205,10 @@ class Core : protected FSM { } void detachOne() { - ++detached_; - assert(detached_ == 1 || detached_ == 2); - if (detached_ == 2) { + auto d = ++detached_; + assert(d >= 1); + assert(d <= 2); + if (d == 2) { // we should have already executed the callback with the value assert(calledBack_); delete this; diff --git a/folly/wangle/detail/FSM.h b/folly/wangle/detail/FSM.h index 95a0bcde..be4eb8ae 100644 --- a/folly/wangle/detail/FSM.h +++ b/folly/wangle/detail/FSM.h @@ -48,13 +48,14 @@ public: } /// Atomically do a state transition with accompanying action. + /// The action will see the old state. /// @returns true on success, false and action unexecuted otherwise template bool updateState(Enum A, Enum B, F const& action) { std::lock_guard lock(mutex_); if (state_ != A) return false; - state_ = B; action(); + state_ = B; return true; } @@ -82,6 +83,9 @@ public: /// } /// /* do unprotected stuff */ /// return; // or otherwise break out of the loop + /// + /// The protected action will see the old state, and the unprotected action + /// will see the new state. template bool updateState(Enum A, Enum B, F1 const& protectedAction, F2 const& unprotectedAction) { diff --git a/folly/wangle/test/FSM.cpp b/folly/wangle/test/FSM.cpp index 1ce78c2e..80ee5364 100644 --- a/folly/wangle/test/FSM.cpp +++ b/folly/wangle/test/FSM.cpp @@ -108,8 +108,8 @@ TEST(FSM, noActionOnBadUpdate) { EXPECT_EQ(0, count); } -TEST(FSM, stateTransitionBeforeAction) { +TEST(FSM, stateTransitionAfterAction) { FSM fsm(State::A); fsm.updateState(State::A, State::B, - [&]{ EXPECT_EQ(State::B, fsm.getState()); }); + [&]{ EXPECT_EQ(State::A, fsm.getState()); }); } diff --git a/folly/wangle/test/FutureTest.cpp b/folly/wangle/test/FutureTest.cpp index 877efcfb..0feaa50f 100644 --- a/folly/wangle/test/FutureTest.cpp +++ b/folly/wangle/test/FutureTest.cpp @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -670,7 +671,6 @@ TEST(Future, waitWithSemaphore) { }); flag = true; result.store(waitWithSemaphore(std::move(n)).value()); - LOG(INFO) << result; }, std::move(f) ); @@ -847,3 +847,26 @@ TEST(Future, getFuture_after_setException) { p.fulfil([]() -> void { throw std::logic_error("foo"); }); EXPECT_THROW(p.getFuture().value(), std::logic_error); } + +TEST(Future, detachRace) { + // Task #5438209 + // This test is designed to detect a race that was in Core::detachOne() + // where detached_ was incremented and then tested, and that + // allowed a race where both Promise and Future would think they were the + // second and both try to delete. This showed up at scale but was very + // difficult to reliably repro in a test. As it is, this only fails about + // once in every 1,000 executions. Doing this 1,000 times is going to make a + // slow test so I won't do that but if it ever fails, take it seriously, and + // run the test binary with "--gtest_repeat=10000 --gtest_filter=*detachRace" + // (Don't forget to enable ASAN) + auto p = folly::make_unique>(); + auto f = folly::make_unique>(p->getFuture()); + folly::Baton<> baton; + std::thread t1([&]{ + baton.post(); + p.reset(); + }); + baton.wait(); + f.reset(); + t1.join(); +} -- 2.34.1