Summary:
This one is a little tricky, but it explains what happened in t5388156. Basically what we observed in the stack traces is that the HHWheelTimer count_ is 0, but there is still a timeout scheduled on the AsyncTimeout.
How I can see this happening is the following:
- SASL times out
- We fall back to insecure mode in the callback and schedule a send of a real message to the server. Here we schedule another timeout on the same HHWheelTimer object. Since count_ is 0 at this point, we schedule a timeout in AsyncTimeout.
- Now the initial SASL timeout was late, so the HHWheelTimer is in a catchup state. This means that it's possible for the callback on the timeout that was just scheduled to fire on the same "tick".
- This callback invokes SR's callbacks which try to detach the event base from the channel. But it can't detach because we have something scheduled on the AsyncTimeout.
- We crash.
Test Plan:
- will wait for contbuild
- Something like this repros the problem: https://phabricator.fb.com/P17220063. Note I hacked the catchup part to get it to work. Now running loadgen with security = permitted causes the crash.
// hphp tests are stalled, have to do this...
Reviewed By: mshneer@fb.com
Subscribers: bmatheny, mshneer, sandeepkk, trunkagent, njormrod, folly-diffs@, alandau, mhorowitz, srenfro, hitesh, wstefancik
FB internal diff:
D1680735
Tasks:
5388156
Signature: t1:
1680735:
1416252123:
e76668860ccda9380a87996b4fa3de957e129404
wheel_ = wheel;
- if (wheel_->count_ == 0) {
- wheel_->now_ = std::chrono::duration_cast<milliseconds>(
- std::chrono::steady_clock::now().time_since_epoch());
+ // Only update the now_ time if we're not in a timeout expired callback
+ if (wheel_->count_ == 0 && !wheel_->processingCallbacksGuard_) {
+ wheel_->now_ = getCurTime();
}
expiration_ = wheel_->now_ + timeout;
, count_(0)
, catchupEveryN_(DEFAULT_CATCHUP_EVERY_N)
, expirationsSinceCatchup_(0)
+ , processingCallbacksGuard_(false)
{
}
callback->context_ = RequestContext::saveContext();
- if (count_ == 0) {
+ if (count_ == 0 && !processingCallbacksGuard_) {
this->AsyncTimeout::scheduleTimeout(interval_.count());
}
// If destroy() is called inside timeoutExpired(), delay actual destruction
// until timeoutExpired() returns
DestructorGuard dg(this);
+ // If scheduleTimeout is called from a callback in this function, it may
+ // cause inconsistencies in the state of this object. As such, we need
+ // to treat these calls slightly differently.
+ processingCallbacksGuard_ = true;
+ auto reEntryGuard = folly::makeGuard([&] {
+ processingCallbacksGuard_ = false;
+ });
// timeoutExpired() can only be invoked directly from the event base loop.
// It should never be invoked recursively.
return wheel_ != nullptr;
}
+ protected:
+ /**
+ * Don't override this unless you're doing a test. This is mainly here so
+ * that we can override it to simulate lag in steady_clock.
+ */
+ virtual std::chrono::milliseconds getCurTime() {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now().time_since_epoch());
+ }
+
private:
// Get the time remaining until this timeout expires
std::chrono::milliseconds getTimeRemaining(
uint32_t catchupEveryN_;
uint32_t expirationsSinceCatchup_;
+ bool processingCallbacksGuard_;
};
} // folly
std::function<void()> fn;
};
+
+class TestTimeoutDelayed : public TestTimeout {
+ protected:
+ std::chrono::milliseconds getCurTime() override {
+ return std::chrono::duration_cast<std::chrono::milliseconds>(
+ std::chrono::steady_clock::now().time_since_epoch()) -
+ milliseconds(5);
+ }
+};
+
/*
* Test firing some simple timeouts that are fired once and never rescheduled
*/
T_CHECK_TIMEOUT(start, end, milliseconds(10));
}
+/*
+ * Test scheduling a timeout from another timeout callback.
+ */
+BOOST_AUTO_TEST_CASE(CallbackSchedulingTimeout) {
+ TEventBase eventBase;
+ StackWheelTimer t(&eventBase, milliseconds(10));
+ const HHWheelTimer::Callback* nullCallback = nullptr;
+
+ TestTimeout t1;
+ // Delayed to simulate the steady_clock counter lagging
+ TestTimeoutDelayed t2;
+
+ t.scheduleTimeout(&t1, milliseconds(500));
+ t1.fn = [&] { t.scheduleTimeout(&t2, milliseconds(1)); };
+ // If t is in an inconsistent state, detachEventBase should fail.
+ t2.fn = [&] { t.detachEventBase(); };
+
+ BOOST_REQUIRE_EQUAL(t.count(), 1);
+
+ eventBase.loop();
+
+ BOOST_REQUIRE_EQUAL(t.count(), 0);
+ BOOST_REQUIRE_EQUAL(t1.timestamps.size(), 1);
+ BOOST_REQUIRE_EQUAL(t2.timestamps.size(), 1);
+}
+
/*
* Test cancelling a timeout when it is scheduled to be fired right away.
*/