2 * Copyright 2014-present Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
22 #include <folly/detail/Futex.h>
23 #include <folly/hash/Hash.h>
24 #include <folly/synchronization/AtomicStruct.h>
25 #include <folly/system/ThreadId.h>
27 namespace folly { namespace detail {
29 /// MemoryIdler provides helper routines that allow routines to return
30 /// some assigned memory resources back to the system. The intended
31 /// use is that when a thread is waiting for a long time (perhaps it
32 /// is in a LIFO thread pool and hasn't been needed for a long time)
33 /// it should release its thread-local malloc caches (both jemalloc and
34 /// tcmalloc use these for better performance) and unmap the stack pages
35 /// that contain no useful data.
38 /// Returns memory from thread-local allocation pools to the global
39 /// pool, if we know how to for the current malloc implementation.
40 /// jemalloc is supported.
41 static void flushLocalMallocCaches();
45 /// This value is a tradeoff between reclaiming memory and triggering
46 /// a page fault immediately on wakeup. Note that the actual unit
47 /// of idling for the stack is pages, so the actual stack that
48 /// will be available on wakeup without a page fault is between
49 /// kDefaultStackToRetain and kDefaultStackToRetain + PageSize -
51 kDefaultStackToRetain = 1024,
54 /// Uses madvise to discard the portion of the thread's stack that
55 /// currently doesn't hold any data, trying to ensure that no page
56 /// faults will occur during the next retain bytes of stack allocation
57 static void unmapUnusedStack(size_t retain = kDefaultStackToRetain);
60 /// The system-wide default for the amount of time a blocking
61 /// thread should wait before reclaiming idle memory. Set this to
62 /// Duration::max() to never wait. The default value is 5 seconds.
63 /// Endpoints using this idle timeout might randomly wait longer to
64 /// avoid synchronizing their flushes.
65 static AtomicStruct<std::chrono::steady_clock::duration> defaultIdleTimeout;
67 /// Selects a timeout pseudo-randomly chosen to be between
68 /// idleTimeout and idleTimeout * (1 + timeoutVariationFraction), to
69 /// smooth out the behavior in a bursty system
70 template <typename Clock = std::chrono::steady_clock>
71 static typename Clock::duration getVariationTimeout(
72 typename Clock::duration idleTimeout
73 = defaultIdleTimeout.load(std::memory_order_acquire),
74 float timeoutVariationFrac = 0.5) {
75 if (idleTimeout.count() > 0 && timeoutVariationFrac > 0) {
76 // hash the pthread_t and the time to get the adjustment.
77 // Standard hash func isn't very good, so bit mix the result
78 auto pr = std::make_pair(getCurrentThreadID(),
79 Clock::now().time_since_epoch().count());
80 std::hash<decltype(pr)> hash_fn;
81 uint64_t h = folly::hash::twang_mix64(hash_fn(pr));
83 // multiplying the duration by a floating point doesn't work, grr..
85 timeoutVariationFrac / std::numeric_limits<uint64_t>::max() * h;
86 auto tics = uint64_t(idleTimeout.count() * (1 + extraFrac));
87 idleTimeout = typename Clock::duration(tics);
93 /// Equivalent to fut.futexWait(expected, waitMask), but calls
94 /// flushLocalMallocCaches() and unmapUnusedStack(stackToRetain)
95 /// after idleTimeout has passed (if it has passed). Internally uses
96 /// fut.futexWait and fut.futexWaitUntil. Like futexWait, returns
97 /// false if interrupted with a signal. The actual timeout will be
98 /// pseudo-randomly chosen to be between idleTimeout and idleTimeout *
99 /// (1 + timeoutVariationFraction), to smooth out the behavior in a
100 /// system with bursty requests. The default is to wait up to 50%
101 /// extra, so on average 25% extra
103 template <typename> class Atom,
104 typename Clock = std::chrono::steady_clock>
105 static FutexResult futexWait(
108 uint32_t waitMask = -1,
109 typename Clock::duration idleTimeout =
110 defaultIdleTimeout.load(std::memory_order_acquire),
111 size_t stackToRetain = kDefaultStackToRetain,
112 float timeoutVariationFrac = 0.5) {
113 if (idleTimeout == Clock::duration::max()) {
114 // no need to use futexWaitUntil if no timeout is possible
115 return fut.futexWait(expected, waitMask);
118 idleTimeout = getVariationTimeout(idleTimeout, timeoutVariationFrac);
119 if (idleTimeout.count() > 0) {
121 auto rv = fut.futexWaitUntil(
122 expected, Clock::now() + idleTimeout, waitMask);
123 if (rv == FutexResult::TIMEDOUT) {
127 // finished before timeout hit, no flush
128 assert(rv == FutexResult::VALUE_CHANGED || rv == FutexResult::AWOKEN ||
129 rv == FutexResult::INTERRUPTED);
134 // flush, then wait with no timeout
135 flushLocalMallocCaches();
136 unmapUnusedStack(stackToRetain);
137 return fut.futexWait(expected, waitMask);
141 } // namespace detail