2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_DETAIL_MEMORYIDLER_H
18 #define FOLLY_DETAIL_MEMORYIDLER_H
22 #include <folly/AtomicStruct.h>
23 #include <folly/Hash.h>
24 #include <folly/Traits.h>
25 #include <folly/detail/Futex.h>
29 // gcc 4.7 doesn't do std::is_trivial correctly, override so we can use
30 // AtomicStruct<duration>
32 struct IsTriviallyCopyable<std::chrono::steady_clock::duration>
37 namespace folly { namespace detail {
39 /// MemoryIdler provides helper routines that allow routines to return
40 /// some assigned memory resources back to the system. The intended
41 /// use is that when a thread is waiting for a long time (perhaps it
42 /// is in a LIFO thread pool and hasn't been needed for a long time)
43 /// it should release its thread-local malloc caches (both jemalloc and
44 /// tcmalloc use these for better performance) and unmap the stack pages
45 /// that contain no useful data.
48 /// Returns memory from thread-local allocation pools to the global
49 /// pool, if we know how to for the current malloc implementation.
50 /// jemalloc is supported.
51 static void flushLocalMallocCaches();
55 /// This value is a tradeoff between reclaiming memory and triggering
56 /// a page fault immediately on wakeup. Note that the actual unit
57 /// of idling for the stack is pages, so the actual stack that
58 /// will be available on wakeup without a page fault is between
59 /// kDefaultStackToRetain and kDefaultStackToRetain + PageSize -
61 kDefaultStackToRetain = 1024,
64 /// Uses madvise to discard the portion of the thread's stack that
65 /// currently doesn't hold any data, trying to ensure that no page
66 /// faults will occur during the next retain bytes of stack allocation
67 static void unmapUnusedStack(size_t retain = kDefaultStackToRetain);
70 /// The system-wide default for the amount of time a blocking
71 /// thread should wait before reclaiming idle memory. Set this to
72 /// Duration::max() to never wait. The default value is 5 seconds.
73 /// Endpoints using this idle timeout might randomly wait longer to
74 /// avoid synchronizing their flushes.
75 static AtomicStruct<std::chrono::steady_clock::duration> defaultIdleTimeout;
77 /// Selects a timeout pseudo-randomly chosen to be between
78 /// idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to
79 /// smooth out the behavior in a bursty system
80 template <typename Clock = std::chrono::steady_clock>
81 static typename Clock::duration getVariationTimeout(
82 typename Clock::duration idleTimeout
83 = defaultIdleTimeout.load(std::memory_order_acquire),
84 float timeoutVariationFrac = 0.5) {
85 if (idleTimeout.count() > 0 && timeoutVariationFrac > 0) {
86 // hash the pthread_t and the time to get the adjustment.
87 // Standard hash func isn't very good, so bit mix the result
88 auto pr = std::make_pair(pthread_self(),
89 Clock::now().time_since_epoch().count());
90 std::hash<decltype(pr)> hash_fn;
91 uint64_t h = folly::hash::twang_mix64(hash_fn(pr));
93 // multiplying the duration by a floating point doesn't work, grr..
95 timeoutVariationFrac / std::numeric_limits<uint64_t>::max() * h;
96 uint64_t tics = idleTimeout.count() * (1 + extraFrac);
97 idleTimeout = typename Clock::duration(tics);
103 /// Equivalent to fut.futexWait(expected, waitMask), but calls
104 /// flushLocalMallocCaches() and unmapUnusedStack(stackToRetain)
105 /// after idleTimeout has passed (if it has passed). Internally uses
106 /// fut.futexWait and fut.futexWaitUntil. Like futexWait, returns
107 /// false if interrupted with a signal. The actual timeout will be
108 /// pseudo-randomly chosen to be between idleTimeout and idleTimeout *
109 /// (1 + timeoutVariationFraction), to smooth out the behavior in a
110 /// system with bursty requests. The default is to wait up to 50%
111 /// extra, so on average 25% extra
112 template <template <typename> class Atom,
113 typename Clock = std::chrono::steady_clock>
114 static bool futexWait(
117 uint32_t waitMask = -1,
118 typename Clock::duration idleTimeout
119 = defaultIdleTimeout.load(std::memory_order_acquire),
120 size_t stackToRetain = kDefaultStackToRetain,
121 float timeoutVariationFrac = 0.5) {
123 if (idleTimeout == Clock::duration::max()) {
124 // no need to use futexWaitUntil if no timeout is possible
125 return fut.futexWait(expected, waitMask);
128 idleTimeout = getVariationTimeout(idleTimeout, timeoutVariationFrac);
129 if (idleTimeout.count() > 0) {
131 auto rv = fut.futexWaitUntil(
132 expected, Clock::now() + idleTimeout, waitMask);
133 if (rv == FutexResult::TIMEDOUT) {
137 // finished before timeout hit, no flush
138 assert(rv == FutexResult::VALUE_CHANGED || rv == FutexResult::AWOKEN ||
139 rv == FutexResult::INTERRUPTED);
140 return rv == FutexResult::AWOKEN;
144 // flush, then wait with no timeout
145 flushLocalMallocCaches();
146 unmapUnusedStack(stackToRetain);
147 return fut.futexWait(expected, waitMask);
151 }} // namespace folly::detail