folly/ThreadCachedInt.h

   1 /*
   2  * Copyright 2016 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * Higher performance (up to 10x) atomic increment using thread caching.
  19  *
  20  * @author Spencer Ahrens (sahrens)
  21  */
  22
  23 #pragma once
  24
  25 #include <atomic>
  26
  27 #include <boost/noncopyable.hpp>
  28
  29 #include <folly/Likely.h>
  30 #include <folly/ThreadLocal.h>
  31
  32 namespace folly {
  33
  34
  35 // Note that readFull requires holding a lock and iterating through all of the
  36 // thread local objects with the same Tag, so if you have a lot of
  37 // ThreadCachedInt's you should considering breaking up the Tag space even
  38 // further.
  39 template <class IntT, class Tag=IntT>
  40 class ThreadCachedInt : boost::noncopyable {
  41   struct IntCache;
  42
  43  public:
  44   explicit ThreadCachedInt(IntT initialVal = 0, uint32_t cacheSize = 1000)
  45     : target_(initialVal), cacheSize_(cacheSize) {
  46   }
  47
  48   void increment(IntT inc) {
  49     auto cache = cache_.get();
  50     if (UNLIKELY(cache == nullptr || cache->parent_ == nullptr)) {
  51       cache = new IntCache(*this);
  52       cache_.reset(cache);
  53     }
  54     cache->increment(inc);
  55   }
  56
  57   // Quickly grabs the current value which may not include some cached
  58   // increments.
  59   IntT readFast() const {
  60     return target_.load(std::memory_order_relaxed);
  61   }
  62
  63   // Reads the current value plus all the cached increments.  Requires grabbing
  64   // a lock, so this is significantly slower than readFast().
  65   IntT readFull() const {
  66     IntT ret = readFast();
  67     for (const auto& cache : cache_.accessAllThreads()) {
  68       if (!cache.reset_.load(std::memory_order_acquire)) {
  69         ret += cache.val_.load(std::memory_order_relaxed);
  70       }
  71     }
  72     return ret;
  73   }
  74
  75   // Quickly reads and resets current value (doesn't reset cached increments).
  76   IntT readFastAndReset() {
  77     return target_.exchange(0, std::memory_order_release);
  78   }
  79
  80   // This function is designed for accumulating into another counter, where you
  81   // only want to count each increment once.  It can still get the count a
  82   // little off, however, but it should be much better than calling readFull()
  83   // and set(0) sequentially.
  84   IntT readFullAndReset() {
  85     IntT ret = readFastAndReset();
  86     for (auto& cache : cache_.accessAllThreads()) {
  87       if (!cache.reset_.load(std::memory_order_acquire)) {
  88         ret += cache.val_.load(std::memory_order_relaxed);
  89         cache.reset_.store(true, std::memory_order_release);
  90       }
  91     }
  92     return ret;
  93   }
  94
  95   void setCacheSize(uint32_t newSize) {
  96     cacheSize_.store(newSize, std::memory_order_release);
  97   }
  98
  99   uint32_t getCacheSize() const {
 100     return cacheSize_.load();
 101   }
 102
 103   ThreadCachedInt& operator+=(IntT inc) { increment(inc); return *this; }
 104   ThreadCachedInt& operator-=(IntT inc) { increment(-inc); return *this; }
 105   // pre-increment (we don't support post-increment)
 106   ThreadCachedInt& operator++() { increment(1); return *this; }
 107   ThreadCachedInt& operator--() { increment(-1); return *this; }
 108
 109   // Thread-safe set function.
 110   // This is a best effort implementation. In some edge cases, there could be
 111   // data loss (missing counts)
 112   void set(IntT newVal) {
 113     for (auto& cache : cache_.accessAllThreads()) {
 114       cache.reset_.store(true, std::memory_order_release);
 115     }
 116     target_.store(newVal, std::memory_order_release);
 117   }
 118
 119   // This is a little tricky - it's possible that our IntCaches are still alive
 120   // in another thread and will get destroyed after this destructor runs, so we
 121   // need to make sure we signal that this parent is dead.
 122   ~ThreadCachedInt() {
 123     for (auto& cache : cache_.accessAllThreads()) {
 124       cache.parent_ = nullptr;
 125     }
 126   }
 127
 128  private:
 129   std::atomic<IntT> target_;
 130   std::atomic<uint32_t> cacheSize_;
 131   ThreadLocalPtr<IntCache,Tag> cache_; // Must be last for dtor ordering
 132
 133   // This should only ever be modified by one thread
 134   struct IntCache {
 135     ThreadCachedInt* parent_;
 136     mutable std::atomic<IntT> val_;
 137     mutable uint32_t numUpdates_;
 138     std::atomic<bool> reset_;
 139
 140     explicit IntCache(ThreadCachedInt& parent)
 141         : parent_(&parent), val_(0), numUpdates_(0), reset_(false) {}
 142
 143     void increment(IntT inc) {
 144       if (LIKELY(!reset_.load(std::memory_order_acquire))) {
 145         // This thread is the only writer to val_, so it's fine do do
 146         // a relaxed load and do the addition non-atomically.
 147         val_.store(
 148           val_.load(std::memory_order_relaxed) + inc,
 149           std::memory_order_release
 150         );
 151       } else {
 152         val_.store(inc, std::memory_order_relaxed);
 153         reset_.store(false, std::memory_order_release);
 154       }
 155       ++numUpdates_;
 156       if (UNLIKELY(numUpdates_ >
 157                    parent_->cacheSize_.load(std::memory_order_acquire))) {
 158         flush();
 159       }
 160     }
 161
 162     void flush() const {
 163       parent_->target_.fetch_add(val_, std::memory_order_release);
 164       val_.store(0, std::memory_order_release);
 165       numUpdates_ = 0;
 166     }
 167
 168     ~IntCache() {
 169       if (parent_) {
 170         flush();
 171       }
 172     }
 173   };
 174 };
 175
 176 }