folly/detail/MemoryIdler.h

   1 /*
   2  * Copyright 2015 Facebook, Inc.
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *   http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 #ifndef FOLLY_DETAIL_MEMORYIDLER_H
  18 #define FOLLY_DETAIL_MEMORYIDLER_H
  19
  20 #include <atomic>
  21 #include <chrono>
  22 #include <folly/AtomicStruct.h>
  23 #include <folly/Hash.h>
  24 #include <folly/Traits.h>
  25 #include <folly/detail/Futex.h>
  26
  27 namespace folly {
  28
  29 // gcc 4.7 doesn't do std::is_trivial correctly, override so we can use
  30 // AtomicStruct<duration>
  31 template<>
  32 struct IsTriviallyCopyable<std::chrono::steady_clock::duration>
  33   : std::true_type {};
  34
  35 }
  36
  37 namespace folly { namespace detail {
  38
  39 /// MemoryIdler provides helper routines that allow routines to return
  40 /// some assigned memory resources back to the system.  The intended
  41 /// use is that when a thread is waiting for a long time (perhaps it
  42 /// is in a LIFO thread pool and hasn't been needed for a long time)
  43 /// it should release its thread-local malloc caches (both jemalloc and
  44 /// tcmalloc use these for better performance) and unmap the stack pages
  45 /// that contain no useful data.
  46 struct MemoryIdler {
  47
  48   /// Returns memory from thread-local allocation pools to the global
  49   /// pool, if we know how to for the current malloc implementation.
  50   /// jemalloc is supported.
  51   static void flushLocalMallocCaches();
  52
  53
  54   enum {
  55     /// This value is a tradeoff between reclaiming memory and triggering
  56     /// a page fault immediately on wakeup.  Note that the actual unit
  57     /// of idling for the stack is pages, so the actual stack that
  58     /// will be available on wakeup without a page fault is between
  59     /// kDefaultStackToRetain and kDefaultStackToRetain + PageSize -
  60     /// 1 bytes.
  61     kDefaultStackToRetain = 1024,
  62   };
  63
  64   /// Uses madvise to discard the portion of the thread's stack that
  65   /// currently doesn't hold any data, trying to ensure that no page
  66   /// faults will occur during the next retain bytes of stack allocation
  67   static void unmapUnusedStack(size_t retain = kDefaultStackToRetain);
  68
  69
  70   /// The system-wide default for the amount of time a blocking
  71   /// thread should wait before reclaiming idle memory.  Set this to
  72   /// Duration::max() to never wait.  The default value is 5 seconds.
  73   /// Endpoints using this idle timeout might randomly wait longer to
  74   /// avoid synchronizing their flushes.
  75   static AtomicStruct<std::chrono::steady_clock::duration> defaultIdleTimeout;
  76
  77   /// Selects a timeout pseudo-randomly chosen to be between
  78   /// idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to
  79   /// smooth out the behavior in a bursty system
  80   template <typename Clock = std::chrono::steady_clock>
  81   static typename Clock::duration getVariationTimeout(
  82       typename Clock::duration idleTimeout
  83           = defaultIdleTimeout.load(std::memory_order_acquire),
  84       float timeoutVariationFrac = 0.5) {
  85     if (idleTimeout.count() > 0 && timeoutVariationFrac > 0) {
  86       // hash the pthread_t and the time to get the adjustment.
  87       // Standard hash func isn't very good, so bit mix the result
  88       auto pr = std::make_pair(pthread_self(),
  89                                Clock::now().time_since_epoch().count());
  90       std::hash<decltype(pr)> hash_fn;
  91       uint64_t h = folly::hash::twang_mix64(hash_fn(pr));
  92
  93       // multiplying the duration by a floating point doesn't work, grr..
  94       auto extraFrac =
  95         timeoutVariationFrac / std::numeric_limits<uint64_t>::max() * h;
  96       uint64_t tics = idleTimeout.count() * (1 + extraFrac);
  97       idleTimeout = typename Clock::duration(tics);
  98     }
  99
 100     return idleTimeout;
 101   }
 102
 103   /// Equivalent to fut.futexWait(expected, waitMask), but calls
 104   /// flushLocalMallocCaches() and unmapUnusedStack(stackToRetain)
 105   /// after idleTimeout has passed (if it has passed).  Internally uses
 106   /// fut.futexWait and fut.futexWaitUntil.  Like futexWait, returns
 107   /// false if interrupted with a signal.  The actual timeout will be
 108   /// pseudo-randomly chosen to be between idleTimeout and idleTimeout *
 109   /// (1 + timeoutVariationFraction), to smooth out the behavior in a
 110   /// system with bursty requests.  The default is to wait up to 50%
 111   /// extra, so on average 25% extra
 112   template <template <typename> class Atom,
 113             typename Clock = std::chrono::steady_clock>
 114   static bool futexWait(
 115       Futex<Atom>& fut,
 116       uint32_t expected,
 117       uint32_t waitMask = -1,
 118       typename Clock::duration idleTimeout
 119           = defaultIdleTimeout.load(std::memory_order_acquire),
 120       size_t stackToRetain = kDefaultStackToRetain,
 121       float timeoutVariationFrac = 0.5) {
 122
 123     if (idleTimeout == Clock::duration::max()) {
 124       // no need to use futexWaitUntil if no timeout is possible
 125       return fut.futexWait(expected, waitMask);
 126     }
 127
 128     idleTimeout = getVariationTimeout(idleTimeout, timeoutVariationFrac);
 129     if (idleTimeout.count() > 0) {
 130       while (true) {
 131         auto rv = fut.futexWaitUntil(
 132           expected, Clock::now() + idleTimeout, waitMask);
 133         if (rv == FutexResult::TIMEDOUT) {
 134           // timeout is over
 135           break;
 136         }
 137         // finished before timeout hit, no flush
 138         assert(rv == FutexResult::VALUE_CHANGED || rv == FutexResult::AWOKEN ||
 139                rv == FutexResult::INTERRUPTED);
 140         return rv == FutexResult::AWOKEN;
 141       }
 142     }
 143
 144     // flush, then wait with no timeout
 145     flushLocalMallocCaches();
 146     unmapUnusedStack(stackToRetain);
 147     return fut.futexWait(expected, waitMask);
 148   }
 149 };
 150
 151 }} // namespace folly::detail
 152
 153 #endif