From c593b651d1d019db087b7cc81c26b0f508083691 Mon Sep 17 00:00:00 2001 From: Andrei Bajenov Date: Wed, 19 Nov 2014 17:47:07 -0800 Subject: [PATCH] HHWheelTimer - fix scheduling timeouts within callbacks Summary: This one is a little tricky, but it explains what happened in t5388156. Basically what we observed in the stack traces is that the HHWheelTimer count_ is 0, but there is still a timeout scheduled on the AsyncTimeout. How I can see this happening is the following: - SASL times out - We fall back to insecure mode in the callback and schedule a send of a real message to the server. Here we schedule another timeout on the same HHWheelTimer object. Since count_ is 0 at this point, we schedule a timeout in AsyncTimeout. - Now the initial SASL timeout was late, so the HHWheelTimer is in a catchup state. This means that it's possible for the callback on the timeout that was just scheduled to fire on the same "tick". - This callback invokes SR's callbacks which try to detach the event base from the channel. But it can't detach because we have something scheduled on the AsyncTimeout. - We crash. Test Plan: - will wait for contbuild - Something like this repros the problem: https://phabricator.fb.com/P17220063. Note I hacked the catchup part to get it to work. Now running loadgen with security = permitted causes the crash. // hphp tests are stalled, have to do this... Reviewed By: mshneer@fb.com Subscribers: bmatheny, mshneer, sandeepkk, trunkagent, njormrod, folly-diffs@, alandau, mhorowitz, srenfro, hitesh, wstefancik FB internal diff: D1680735 Tasks: 5388156 Signature: t1:1680735:1416252123:e76668860ccda9380a87996b4fa3de957e129404 --- folly/io/async/HHWheelTimer.cpp | 16 ++++++++--- folly/io/async/HHWheelTimer.h | 11 ++++++++ folly/io/async/test/HHWheelTimerTest.cpp | 36 ++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 4 deletions(-) diff --git a/folly/io/async/HHWheelTimer.cpp b/folly/io/async/HHWheelTimer.cpp index 761d4fd2..5eec0ba4 100644 --- a/folly/io/async/HHWheelTimer.cpp +++ b/folly/io/async/HHWheelTimer.cpp @@ -56,9 +56,9 @@ void HHWheelTimer::Callback::setScheduled(HHWheelTimer* wheel, wheel_ = wheel; - if (wheel_->count_ == 0) { - wheel_->now_ = std::chrono::duration_cast( - std::chrono::steady_clock::now().time_since_epoch()); + // Only update the now_ time if we're not in a timeout expired callback + if (wheel_->count_ == 0 && !wheel_->processingCallbacksGuard_) { + wheel_->now_ = getCurTime(); } expiration_ = wheel_->now_ + timeout; @@ -83,6 +83,7 @@ HHWheelTimer::HHWheelTimer(folly::EventBase* eventBase, , count_(0) , catchupEveryN_(DEFAULT_CATCHUP_EVERY_N) , expirationsSinceCatchup_(0) + , processingCallbacksGuard_(false) { } @@ -126,7 +127,7 @@ void HHWheelTimer::scheduleTimeout(Callback* callback, callback->context_ = RequestContext::saveContext(); - if (count_ == 0) { + if (count_ == 0 && !processingCallbacksGuard_) { this->AsyncTimeout::scheduleTimeout(interval_.count()); } @@ -152,6 +153,13 @@ void HHWheelTimer::timeoutExpired() noexcept { // If destroy() is called inside timeoutExpired(), delay actual destruction // until timeoutExpired() returns DestructorGuard dg(this); + // If scheduleTimeout is called from a callback in this function, it may + // cause inconsistencies in the state of this object. As such, we need + // to treat these calls slightly differently. + processingCallbacksGuard_ = true; + auto reEntryGuard = folly::makeGuard([&] { + processingCallbacksGuard_ = false; + }); // timeoutExpired() can only be invoked directly from the event base loop. // It should never be invoked recursively. diff --git a/folly/io/async/HHWheelTimer.h b/folly/io/async/HHWheelTimer.h index b1f160c5..c0c1f8bf 100644 --- a/folly/io/async/HHWheelTimer.h +++ b/folly/io/async/HHWheelTimer.h @@ -95,6 +95,16 @@ class HHWheelTimer : private folly::AsyncTimeout, return wheel_ != nullptr; } + protected: + /** + * Don't override this unless you're doing a test. This is mainly here so + * that we can override it to simulate lag in steady_clock. + */ + virtual std::chrono::milliseconds getCurTime() { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()); + } + private: // Get the time remaining until this timeout expires std::chrono::milliseconds getTimeRemaining( @@ -236,6 +246,7 @@ class HHWheelTimer : private folly::AsyncTimeout, uint32_t catchupEveryN_; uint32_t expirationsSinceCatchup_; + bool processingCallbacksGuard_; }; } // folly diff --git a/folly/io/async/test/HHWheelTimerTest.cpp b/folly/io/async/test/HHWheelTimerTest.cpp index fb88f20a..e0452559 100644 --- a/folly/io/async/test/HHWheelTimerTest.cpp +++ b/folly/io/async/test/HHWheelTimerTest.cpp @@ -46,6 +46,16 @@ class TestTimeout : public HHWheelTimer::Callback { std::function fn; }; + +class TestTimeoutDelayed : public TestTimeout { + protected: + std::chrono::milliseconds getCurTime() override { + return std::chrono::duration_cast( + std::chrono::steady_clock::now().time_since_epoch()) - + milliseconds(5); + } +}; + /* * Test firing some simple timeouts that are fired once and never rescheduled */ @@ -86,6 +96,32 @@ TEST(HHWheelTimerTest, FireOnce) { T_CHECK_TIMEOUT(start, end, milliseconds(10)); } +/* + * Test scheduling a timeout from another timeout callback. + */ +BOOST_AUTO_TEST_CASE(CallbackSchedulingTimeout) { + TEventBase eventBase; + StackWheelTimer t(&eventBase, milliseconds(10)); + const HHWheelTimer::Callback* nullCallback = nullptr; + + TestTimeout t1; + // Delayed to simulate the steady_clock counter lagging + TestTimeoutDelayed t2; + + t.scheduleTimeout(&t1, milliseconds(500)); + t1.fn = [&] { t.scheduleTimeout(&t2, milliseconds(1)); }; + // If t is in an inconsistent state, detachEventBase should fail. + t2.fn = [&] { t.detachEventBase(); }; + + BOOST_REQUIRE_EQUAL(t.count(), 1); + + eventBase.loop(); + + BOOST_REQUIRE_EQUAL(t.count(), 0); + BOOST_REQUIRE_EQUAL(t1.timestamps.size(), 1); + BOOST_REQUIRE_EQUAL(t2.timestamps.size(), 1); +} + /* * Test cancelling a timeout when it is scheduled to be fired right away. */ -- 2.34.1