12 #include "allocator.h"
20 template <bool> friend class scoped_rcu_base;
23 typedef uint64_t epoch_t;
25 typedef void (*deleter_t)(void *);
30 inline delete_entry(void* ptr, size_t sz)
31 : ptr(ptr), action(-sz) {
32 INVARIANT(action < 0);
34 inline delete_entry(void* ptr, deleter_t fn)
35 : ptr(ptr), action(reinterpret_cast<uintptr_t>(fn)) {
36 INVARIANT(action > 0);
38 void run(rcu::sync& s) {
40 s.dealloc(ptr, -action);
42 (*reinterpret_cast<deleter_t>(action))(ptr);
44 bool operator==(const delete_entry& x) const {
45 return ptr == x.ptr && action == x.action;
47 bool operator!=(const delete_entry& x) const {
50 bool operator<(const delete_entry& x) const {
51 return ptr < x.ptr || (ptr == x.ptr && action < x.action);
54 typedef basic_px_queue<delete_entry, 4096> px_queue;
65 deleter_array(void *p)
70 #ifdef CHECK_INVARIANTS
71 static const uint64_t EpochTimeMultiplier = 10; /* 10 * 1 ms */
73 static const uint64_t EpochTimeMultiplier = 25; /* 25 * 40 ms */
76 static_assert(EpochTimeMultiplier >= 1, "XX");
79 static const uint64_t EpochTimeUsec = ticker::tick_us * EpochTimeMultiplier;
80 static const uint64_t EpochTimeNsec = EpochTimeUsec * 1000;
82 static const size_t NQueueGroups = 32;
84 // all RCU threads interact w/ the RCU subsystem via
87 // this is also serving as a memory allocator for the time being
90 template <bool> friend class scoped_rcu_base;
94 unsigned depth_; // 0 indicates no rcu region
95 unsigned last_reaped_epoch_;
96 #ifdef ENABLE_EVENT_COUNTERS
97 uint64_t last_reaped_timestamp_us_;
98 uint64_t last_release_timestamp_us_;
104 // local memory allocator
106 void *arenas_[allocator::MAX_ARENAS];
107 size_t deallocs_[allocator::MAX_ARENAS]; // keeps track of the number of
108 // un-released deallocations
114 , last_reaped_epoch_(0)
115 #ifdef ENABLE_EVENT_COUNTERS
116 , last_reaped_timestamp_us_(0)
117 , last_release_timestamp_us_(0)
122 ALWAYS_ASSERT(((uintptr_t)this % CACHELINE_SIZE) == 0);
123 queue_.alloc_freelist(NQueueGroups);
124 scratch_.alloc_freelist(NQueueGroups);
125 NDB_MEMSET(&arenas_[0], 0, sizeof(arenas_));
126 NDB_MEMSET(&deallocs_[0], 0, sizeof(deallocs_));
130 set_pin_cpu(size_t cpu)
141 // allocate a block of memory of size sz. caller needs to remember
142 // the size of the allocation when calling free
143 void *alloc(size_t sz);
145 // allocates a block of memory of size sz, with the intention of never
146 // free-ing it. is meant for reasonably large allocations (order of pages)
147 void *alloc_static(size_t sz);
149 void dealloc(void *p, size_t sz);
150 void dealloc_rcu(void *p, size_t sz);
152 // try to release local arenas back to the allocator based on some simple
153 // thresholding heuristics-- is relative expensive operation. returns true
154 // if a release was actually performed, false otherwise
159 inline unsigned depth() const { return depth_; }
166 ensure_arena(size_t arena)
168 if (likely(arenas_[arena]))
170 INVARIANT(pin_cpu_ >= 0);
171 arenas_[arena] = allocator::AllocateArenas(pin_cpu_, arena);
179 return mysync().alloc(sz);
183 alloc_static(size_t sz)
185 return mysync().alloc_static(sz);
188 // this releases memory back to the allocator subsystem
189 // this should NOT be used to free objects!
191 dealloc(void *p, size_t sz)
193 return mysync().dealloc(p, sz);
196 void dealloc_rcu(void *p, size_t sz);
201 return mysync().try_release();
207 mysync().do_cleanup();
210 void free_with_fn(void *p, deleter_t fn);
212 template <typename T>
216 free_with_fn(p, deleter<T>);
219 template <typename T>
223 free_with_fn(p, deleter_array<T>);
226 // the tick is in units of rcu ticks
228 in_rcu_region(uint64_t &rcu_tick) const
230 const sync *s = syncs_.myview();
233 const bool is_guarded = ticker::s_instance.is_locally_guarded(rcu_tick);
234 const bool has_depth = s->depth();
235 if (has_depth && !is_guarded)
237 rcu_tick = to_rcu_ticks(rcu_tick);
242 in_rcu_region() const
245 return in_rcu_region(rcu_tick);
248 // all threads have moved at least to the cleaning tick, so any pointers <
249 // the cleaning tick can be safely purged
251 cleaning_rcu_tick_exclusive() const
253 return to_rcu_ticks(ticker::s_instance.global_last_tick_exclusive());
256 // pin the current thread to CPU.
258 // this CPU number corresponds to the ones exposed by
259 // sched.h. note that we currently pin to the numa node
260 // associated with the cpu. memory allocation, however, is
262 void pin_current_thread(size_t cpu);
266 static rcu s_instance CACHE_ALIGNED; // system wide instance
272 rcu(); // private ctor to enforce singleton
274 static inline uint64_t constexpr
275 to_rcu_ticks(uint64_t ticks)
277 return ticks / EpochTimeMultiplier;
280 inline sync &mysync() { return syncs_.my(this); }
282 percore_lazy<sync> syncs_;
285 template <bool DoCleanup>
286 class scoped_rcu_base {
289 // movable, but not copy-constructable
290 scoped_rcu_base(scoped_rcu_base &&) = default;
291 scoped_rcu_base(const scoped_rcu_base &) = delete;
292 scoped_rcu_base &operator=(const scoped_rcu_base &) = delete;
295 : sync_(&rcu::s_instance.mysync()),
296 guard_(ticker::s_instance)
303 INVARIANT(sync_->depth_);
304 const unsigned new_depth = --sync_->depth_;
306 if (new_depth || !DoCleanup)
308 // out of RCU region now, check if we need to run cleaner
312 inline ticker::guard *
326 unmanaged<ticker::guard> guard_;
329 typedef scoped_rcu_base<true> scoped_rcu_region;
331 class disabled_rcu_region {};