Summary:
I found a data race with TSAN while attempting to run a sanitizer version of my service:
```
WARNING: ThreadSanitizer: data race (pid=266)
Read of size 8 at 0x7b58000c0c08 by thread T370:
@ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:44
@ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
@ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
@ __tsan_thread_start_func at crtstuff.c:?
Previous write of size 8 at 0x7b58000c0c08 by thread T371:
@ folly::Codel::overloaded(std::chrono::duration<long, std::ratio<1l, 1000000000l> >) at ./folly/executors/Codel.cpp:62
@ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::Worker<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119
@ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
@ __tsan_thread_start_func at crtstuff.c:?
Location is heap block of size 744 at 0x7b58000c0c00 allocated by thread T314:
@ operator new(unsigned long) at ??:?
@ PriorityImplT at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:826
@ void __gnu_cxx::new_allocator<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > > >::construct<apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&>(apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >*, std::array<std::pair<std::shared_ptr<apache::thrift::concurrency::ThreadFactory>, unsigned long>, 5ul>&, bool&, unsigned long&)
@ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(std::array<unsigned long, 5ul> const&, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1090
@ std::shared_ptr<apache::thrift::concurrency::PriorityThreadManager> apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >(unsigned long, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1100
@ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
@ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
@ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
@ __tsan_thread_start_func at crtstuff.c:?
Thread T370 (tid=638, running) created by thread T314 at:
@ pthread_create at ??:?
@ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
@ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
@ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
@ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
@ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
@ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
@ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
@ __tsan_thread_start_func at crtstuff.c:?
Thread T371 (tid=639, running) created by thread T314 at:
@ pthread_create at ??:?
@ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
@ apache::thrift::concurrency::ThreadManager::ImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185
@ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT<folly::LifoSemImpl<std::atomic, folly::Baton<std::atomic, true, true> > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840
@ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347
@ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475
@ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186
@ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200
@ __tsan_thread_start_func at crtstuff.c:?
Thread T314 (tid=582, running) created by main thread at:
@ pthread_create at ??:?
@ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108
...
```
Looks like there is a data race in how `codelMinDelay_` is used. I couldn't get `std::atomic` to compile with `std::chrono::nanoseconds`,
so I used `std::atomic<uint64_t>` and converted between `uint64_t` and time types appropriately.
Reviewed By: yfeldblum
Differential Revision:
D5759588
fbshipit-source-id:
8213f3789808265ddfe5ab122f0f86490d0ea6ea
DEFINE_int32(codel_interval, 100, "Codel default interval time in ms");
DEFINE_int32(codel_target_delay, 5, "Target codel queueing delay in ms");
-using std::chrono::nanoseconds;
-using std::chrono::milliseconds;
+using namespace std::chrono;
namespace folly {
Codel::Codel()
- : codelMinDelay_(0),
- codelIntervalTime_(std::chrono::steady_clock::now()),
+ : codelMinDelayNs_(0),
+ codelIntervalTimeNs_(
+ duration_cast<nanoseconds>(steady_clock::now().time_since_epoch())
+ .count()),
codelResetDelay_(true),
overloaded_(false) {}
-bool Codel::overloaded(std::chrono::nanoseconds delay) {
+bool Codel::overloaded(nanoseconds delay) {
bool ret = false;
- auto now = std::chrono::steady_clock::now();
+ auto now = steady_clock::now();
// Avoid another thread updating the value at the same time we are using it
// to calculate the overloaded state
- auto minDelay = codelMinDelay_;
+ auto minDelay = nanoseconds(codelMinDelayNs_);
- if (now > codelIntervalTime_ &&
+ if (now > steady_clock::time_point(nanoseconds(codelIntervalTimeNs_)) &&
// testing before exchanging is more cacheline-friendly
(!codelResetDelay_.load(std::memory_order_acquire) &&
!codelResetDelay_.exchange(true))) {
- codelIntervalTime_ = now + getInterval();
+ codelIntervalTimeNs_ =
+ duration_cast<nanoseconds>((now + getInterval()).time_since_epoch())
+ .count();
if (minDelay > getTargetDelay()) {
overloaded_ = true;
// and that it happens after the interval reset above
if (codelResetDelay_.load(std::memory_order_acquire) &&
codelResetDelay_.exchange(false)) {
- codelMinDelay_ = delay;
+ codelMinDelayNs_ = delay.count();
// More than one request must come in during an interval before codel
// starts dropping requests
return false;
- } else if (delay < codelMinDelay_) {
- codelMinDelay_ = delay;
+ } else if (delay < nanoseconds(codelMinDelayNs_)) {
+ codelMinDelayNs_ = delay.count();
}
// Here is where we apply different logic than codel proper. Instead of
}
nanoseconds Codel::getMinDelay() {
- return codelMinDelay_;
+ return nanoseconds(codelMinDelayNs_);
}
milliseconds Codel::getInterval() {
std::chrono::milliseconds getSloughTimeout();
private:
- std::chrono::nanoseconds codelMinDelay_;
- std::chrono::time_point<std::chrono::steady_clock> codelIntervalTime_;
+ std::atomic<uint64_t> codelMinDelayNs_;
+ std::atomic<uint64_t> codelIntervalTimeNs_;
// flag to make overloaded() thread-safe, since we only want
// to reset the delay once per time period