From 889037965b441074a25426837e73acfefadebe51 Mon Sep 17 00:00:00 2001 From: Kenny Yu Date: Thu, 7 Sep 2017 10:46:48 -0700 Subject: [PATCH] Fix data race in folly/executors/Codel.cpp exposed by TSAN Summary: I found a data race with TSAN while attempting to run a sanitizer version of my service: ``` WARNING: ThreadSanitizer: data race (pid=266) Read of size 8 at 0x7b58000c0c08 by thread T370: @ folly::Codel::overloaded(std::chrono::duration >) at ./folly/executors/Codel.cpp:44 @ apache::thrift::concurrency::ThreadManager::ImplT > >::Worker > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119 @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200 @ __tsan_thread_start_func at crtstuff.c:? Previous write of size 8 at 0x7b58000c0c08 by thread T371: @ folly::Codel::overloaded(std::chrono::duration >) at ./folly/executors/Codel.cpp:62 @ apache::thrift::concurrency::ThreadManager::ImplT > >::Worker > >::run() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:119 @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200 @ __tsan_thread_start_func at crtstuff.c:? Location is heap block of size 744 at 0x7b58000c0c00 allocated by thread T314: @ operator new(unsigned long) at ??:? @ PriorityImplT at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:826 @ void __gnu_cxx::new_allocator > > >::construct > >, std::array, unsigned long>, 5ul>&, bool&, unsigned long&>(apache::thrift::concurrency::PriorityThreadManager::PriorityImplT > >*, std::array, unsigned long>, 5ul>&, bool&, unsigned long&) @ std::shared_ptr apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager > >(std::array const&, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1090 @ std::shared_ptr apache::thrift::concurrency::PriorityThreadManager::newPriorityThreadManager > >(unsigned long, bool, unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:1100 @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475 @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186 @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200 @ __tsan_thread_start_func at crtstuff.c:? Thread T370 (tid=638, running) created by thread T314 at: @ pthread_create at ??:? @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108 @ apache::thrift::concurrency::ThreadManager::ImplT > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185 @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840 @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347 @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475 @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186 @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200 @ __tsan_thread_start_func at crtstuff.c:? Thread T371 (tid=639, running) created by thread T314 at: @ pthread_create at ??:? @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108 @ apache::thrift::concurrency::ThreadManager::ImplT > >::addWorker(unsigned long) at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:185 @ apache::thrift::concurrency::PriorityThreadManager::PriorityImplT > >::start() at ./thrift/lib/cpp/concurrency/ThreadManager.tcc:840 @ apache::thrift::ThriftServer::setup() at ./thrift/lib/cpp2/server/ThriftServer.cpp:347 @ apache::thrift::ThriftServer::serve() at ./thrift/lib/cpp2/server/ThriftServer.cpp:475 @ apache::thrift::server::TServer::run() at ./thrift/lib/cpp/server/TServer.h:186 @ apache::thrift::concurrency::PthreadThread::threadMain(void*) at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:200 @ __tsan_thread_start_func at crtstuff.c:? Thread T314 (tid=582, running) created by main thread at: @ pthread_create at ??:? @ apache::thrift::concurrency::PthreadThread::start() at ./thrift/lib/cpp/concurrency/PosixThreadFactory.cpp:108 ... ``` Looks like there is a data race in how `codelMinDelay_` is used. I couldn't get `std::atomic` to compile with `std::chrono::nanoseconds`, so I used `std::atomic` and converted between `uint64_t` and time types appropriately. Reviewed By: yfeldblum Differential Revision: D5759588 fbshipit-source-id: 8213f3789808265ddfe5ab122f0f86490d0ea6ea --- folly/executors/Codel.cpp | 29 ++++++++++++++++------------- folly/executors/Codel.h | 4 ++-- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/folly/executors/Codel.cpp b/folly/executors/Codel.cpp index c84b9be3..d5da27e3 100644 --- a/folly/executors/Codel.cpp +++ b/folly/executors/Codel.cpp @@ -22,30 +22,33 @@ DEFINE_int32(codel_interval, 100, "Codel default interval time in ms"); DEFINE_int32(codel_target_delay, 5, "Target codel queueing delay in ms"); -using std::chrono::nanoseconds; -using std::chrono::milliseconds; +using namespace std::chrono; namespace folly { Codel::Codel() - : codelMinDelay_(0), - codelIntervalTime_(std::chrono::steady_clock::now()), + : codelMinDelayNs_(0), + codelIntervalTimeNs_( + duration_cast(steady_clock::now().time_since_epoch()) + .count()), codelResetDelay_(true), overloaded_(false) {} -bool Codel::overloaded(std::chrono::nanoseconds delay) { +bool Codel::overloaded(nanoseconds delay) { bool ret = false; - auto now = std::chrono::steady_clock::now(); + auto now = steady_clock::now(); // Avoid another thread updating the value at the same time we are using it // to calculate the overloaded state - auto minDelay = codelMinDelay_; + auto minDelay = nanoseconds(codelMinDelayNs_); - if (now > codelIntervalTime_ && + if (now > steady_clock::time_point(nanoseconds(codelIntervalTimeNs_)) && // testing before exchanging is more cacheline-friendly (!codelResetDelay_.load(std::memory_order_acquire) && !codelResetDelay_.exchange(true))) { - codelIntervalTime_ = now + getInterval(); + codelIntervalTimeNs_ = + duration_cast((now + getInterval()).time_since_epoch()) + .count(); if (minDelay > getTargetDelay()) { overloaded_ = true; @@ -57,12 +60,12 @@ bool Codel::overloaded(std::chrono::nanoseconds delay) { // and that it happens after the interval reset above if (codelResetDelay_.load(std::memory_order_acquire) && codelResetDelay_.exchange(false)) { - codelMinDelay_ = delay; + codelMinDelayNs_ = delay.count(); // More than one request must come in during an interval before codel // starts dropping requests return false; - } else if (delay < codelMinDelay_) { - codelMinDelay_ = delay; + } else if (delay < nanoseconds(codelMinDelayNs_)) { + codelMinDelayNs_ = delay.count(); } // Here is where we apply different logic than codel proper. Instead of @@ -84,7 +87,7 @@ int Codel::getLoad() { } nanoseconds Codel::getMinDelay() { - return codelMinDelay_; + return nanoseconds(codelMinDelayNs_); } milliseconds Codel::getInterval() { diff --git a/folly/executors/Codel.h b/folly/executors/Codel.h index 98718bb8..71370574 100644 --- a/folly/executors/Codel.h +++ b/folly/executors/Codel.h @@ -80,8 +80,8 @@ class Codel { std::chrono::milliseconds getSloughTimeout(); private: - std::chrono::nanoseconds codelMinDelay_; - std::chrono::time_point codelIntervalTime_; + std::atomic codelMinDelayNs_; + std::atomic codelIntervalTimeNs_; // flag to make overloaded() thread-safe, since we only want // to reset the delay once per time period -- 2.34.1