2 * Copyright 2017 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
24 #include <folly/String.h>
25 #include <folly/stats/BucketedTimeSeries.h>
26 #include <glog/logging.h>
31 * This class represents a timeseries which keeps several levels of data
32 * granularity (similar in principle to the loads reported by the UNIX
33 * 'uptime' command). It uses several instances (one per level) of
34 * BucketedTimeSeries as the underlying storage.
36 * This can easily be used to track sums (and thus rates or averages) over
37 * several predetermined time periods, as well as all-time sums. For example,
38 * you would use to it to track query rate or response speed over the last
39 * 5, 15, 30, and 60 minutes.
41 * The MultiLevelTimeSeries takes a list of level durations as an input; the
42 * durations must be strictly increasing. Furthermore a special level can be
43 * provided with a duration of '0' -- this will be an "all-time" level. If
44 * an all-time level is provided, it MUST be the last level present.
46 * The class assumes that time advances forward -- you can't retroactively add
47 * values for events in the past -- the 'now' argument is provided for better
48 * efficiency and ease of unittesting.
50 * The class is not thread-safe -- use your own synchronization!
52 template <typename VT, typename CT = LegacyStatsClock<std::chrono::seconds>>
53 class MultiLevelTimeSeries {
57 using Duration = typename Clock::duration;
58 using TimePoint = typename Clock::time_point;
59 using Level = folly::BucketedTimeSeries<ValueType, Clock>;
62 * Create a new MultiLevelTimeSeries.
64 * This creates a new MultiLevelTimeSeries that tracks time series data at the
65 * specified time durations (level). The time series data tracked at each
66 * level is then further divided by numBuckets for memory efficiency.
68 * The durations must be strictly increasing. Furthermore a special level can
69 * be provided with a duration of '0' -- this will be an "all-time" level. If
70 * an all-time level is provided, it MUST be the last level present.
75 const Duration levelDurations[]);
79 std::initializer_list<Duration> durations);
82 * Return the number of buckets used to track time series at each level.
84 size_t numBuckets() const {
85 // The constructor ensures that levels_ has at least one item
86 return levels_[0].numBuckets();
90 * Return the number of levels tracked by MultiLevelTimeSeries.
92 size_t numLevels() const { return levels_.size(); }
95 * Get the BucketedTimeSeries backing the specified level.
97 * Note: you should generally call update() or flush() before accessing the
98 * data. Otherwise you may be reading stale data if update() or flush() has
99 * not been called recently.
101 const Level& getLevel(size_t level) const {
102 CHECK_LT(level, levels_.size());
103 return levels_[level];
107 * Get the highest granularity level that is still large enough to contain
108 * data going back to the specified start time.
110 * Note: you should generally call update() or flush() before accessing the
111 * data. Otherwise you may be reading stale data if update() or flush() has
112 * not been called recently.
114 const Level& getLevel(TimePoint start) const {
115 for (const auto& level : levels_) {
116 if (level.isAllTime()) {
119 // Note that we use duration() here rather than elapsed().
120 // If duration is large enough to contain the start time then this level
121 // is good enough, even if elapsed() indicates that no data was recorded
122 // before the specified start time.
123 if (level.getLatestTime() - level.duration() <= start) {
127 // We should always have an all-time level, so this is never reached.
128 LOG(FATAL) << "No level of timeseries covers internval"
129 << " from " << start.time_since_epoch().count() << " to now";
130 return levels_.back();
134 * Get the BucketedTimeSeries backing the specified level.
136 * Note: you should generally call update() or flush() before accessing the
137 * data. Otherwise you may be reading stale data if update() or flush() has
138 * not been called recently.
140 const Level& getLevelByDuration(Duration duration) const {
141 // since the number of levels is expected to be small (less than 5 in most
142 // cases), a simple linear scan would be efficient and is intentionally
143 // chosen here over other alternatives for lookup.
144 for (const auto& level : levels_) {
145 if (level.duration() == duration) {
149 throw std::out_of_range(folly::to<std::string>(
150 "No level of duration ", duration.count(), " found"));
154 * Return the sum of all the data points currently tracked at this level.
156 * Note: you should generally call update() or flush() before accessing the
157 * data. Otherwise you may be reading stale data if update() or flush() has
158 * not been called recently.
160 ValueType sum(size_t level) const {
161 return getLevel(level).sum();
165 * Return the average (sum / count) of all the data points currently tracked
168 * The return type may be specified to control whether floating-point or
169 * integer division should be performed.
171 * Note: you should generally call update() or flush() before accessing the
172 * data. Otherwise you may be reading stale data if update() or flush() has
173 * not been called recently.
175 template <typename ReturnType = double>
176 ReturnType avg(size_t level) const {
177 return getLevel(level).template avg<ReturnType>();
181 * Return the rate (sum divided by elaspsed time) of the all data points
182 * currently tracked at this level.
184 * Note: you should generally call update() or flush() before accessing the
185 * data. Otherwise you may be reading stale data if update() or flush() has
186 * not been called recently.
188 template <typename ReturnType = double, typename Interval = Duration>
189 ReturnType rate(size_t level) const {
190 return getLevel(level).template rate<ReturnType, Interval>();
194 * Return the number of data points currently tracked at this level.
196 * Note: you should generally call update() or flush() before accessing the
197 * data. Otherwise you may be reading stale data if update() or flush() has
198 * not been called recently.
200 uint64_t count(size_t level) const {
201 return getLevel(level).count();
205 * Return the count divided by the elapsed time tracked at this level.
207 * Note: you should generally call update() or flush() before accessing the
208 * data. Otherwise you may be reading stale data if update() or flush() has
209 * not been called recently.
211 template <typename ReturnType = double, typename Interval = Duration>
212 ReturnType countRate(size_t level) const {
213 return getLevel(level).template countRate<ReturnType, Interval>();
217 * Return the sum of all the data points currently tracked at this level.
219 * This method is identical to sum(size_t level) above but takes in the
220 * duration that the user is interested in querying as the parameter.
222 * Note: you should generally call update() or flush() before accessing the
223 * data. Otherwise you may be reading stale data if update() or flush() has
224 * not been called recently.
226 ValueType sum(Duration duration) const {
227 return getLevelByDuration(duration).sum();
231 * Return the average (sum / count) of all the data points currently tracked
234 * This method is identical to avg(size_t level) above but takes in the
235 * duration that the user is interested in querying as the parameter.
237 * Note: you should generally call update() or flush() before accessing the
238 * data. Otherwise you may be reading stale data if update() or flush() has
239 * not been called recently.
241 template <typename ReturnType = double>
242 ReturnType avg(Duration duration) const {
243 return getLevelByDuration(duration).template avg<ReturnType>();
247 * Return the rate (sum divided by elaspsed time) of the all data points
248 * currently tracked at this level.
250 * This method is identical to rate(size_t level) above but takes in the
251 * duration that the user is interested in querying as the parameter.
253 * Note: you should generally call update() or flush() before accessing the
254 * data. Otherwise you may be reading stale data if update() or flush() has
255 * not been called recently.
257 template <typename ReturnType = double, typename Interval = Duration>
258 ReturnType rate(Duration duration) const {
259 return getLevelByDuration(duration).template rate<ReturnType, Interval>();
263 * Return the number of data points currently tracked at this level.
265 * This method is identical to count(size_t level) above but takes in the
266 * duration that the user is interested in querying as the parameter.
268 * Note: you should generally call update() or flush() before accessing the
269 * data. Otherwise you may be reading stale data if update() or flush() has
270 * not been called recently.
272 uint64_t count(Duration duration) const {
273 return getLevelByDuration(duration).count();
277 * Return the count divided by the elapsed time tracked at this level.
279 * This method is identical to countRate(size_t level) above but takes in the
280 * duration that the user is interested in querying as the parameter.
282 * Note: you should generally call update() or flush() before accessing the
283 * data. Otherwise you may be reading stale data if update() or flush() has
284 * not been called recently.
286 template <typename ReturnType = double, typename Interval = Duration>
287 ReturnType countRate(Duration duration) const {
288 return getLevelByDuration(duration)
289 .template countRate<ReturnType, Interval>();
293 * Estimate the sum of the data points that occurred in the specified time
294 * period at this level.
296 * The range queried is [start, end).
297 * That is, start is inclusive, and end is exclusive.
299 * Note that data outside of the timeseries duration will no longer be
300 * available for use in the estimation. Specifying a start time earlier than
301 * getEarliestTime() will not have much effect, since only data points after
302 * that point in time will be counted.
304 * Note that the value returned is an estimate, and may not be precise.
306 * Note: you should generally call update() or flush() before accessing the
307 * data. Otherwise you may be reading stale data if update() or flush() has
308 * not been called recently.
310 ValueType sum(TimePoint start, TimePoint end) const {
311 return getLevel(start).sum(start, end);
315 * Estimate the average value during the specified time period.
317 * The same caveats documented in the sum(TimePoint start, TimePoint end)
318 * comments apply here as well.
320 * Note: you should generally call update() or flush() before accessing the
321 * data. Otherwise you may be reading stale data if update() or flush() has
322 * not been called recently.
324 template <typename ReturnType = double>
325 ReturnType avg(TimePoint start, TimePoint end) const {
326 return getLevel(start).template avg<ReturnType>(start, end);
330 * Estimate the rate during the specified time period.
332 * The same caveats documented in the sum(TimePoint start, TimePoint end)
333 * comments apply here as well.
335 * Note: you should generally call update() or flush() before accessing the
336 * data. Otherwise you may be reading stale data if update() or flush() has
337 * not been called recently.
339 template <typename ReturnType = double>
340 ReturnType rate(TimePoint start, TimePoint end) const {
341 return getLevel(start).template rate<ReturnType>(start, end);
345 * Estimate the count during the specified time period.
347 * The same caveats documented in the sum(TimePoint start, TimePoint end)
348 * comments apply here as well.
350 * Note: you should generally call update() or flush() before accessing the
351 * data. Otherwise you may be reading stale data if update() or flush() has
352 * not been called recently.
354 uint64_t count(TimePoint start, TimePoint end) const {
355 return getLevel(start).count(start, end);
359 * Adds the value 'val' at time 'now' to all levels.
361 * Data points added at the same time point is cached internally here and not
362 * propagated to the underlying levels until either flush() is called or when
363 * update from a different time comes.
365 * This function expects time to always move forwards: it cannot be used to
366 * add historical data points that have occurred in the past. If now is
367 * older than the another timestamp that has already been passed to
368 * addValue() or update(), now will be ignored and the latest timestamp will
371 void addValue(TimePoint now, const ValueType& val);
374 * Adds the value 'val' at time 'now' to all levels.
376 void addValue(TimePoint now, const ValueType& val, uint64_t times);
379 * Adds the value 'total' at time 'now' to all levels as the sum of
380 * 'nsamples' samples.
383 addValueAggregated(TimePoint now, const ValueType& total, uint64_t nsamples);
386 * Update all the levels to the specified time, doing all the necessary
387 * work to rotate the buckets and remove any stale data points.
389 * When reading data from the timeseries, you should make sure to manually
390 * call update() before accessing the data. Otherwise you may be reading
391 * stale data if update() has not been called recently.
393 void update(TimePoint now);
396 * Reset all the timeseries to an empty state as if no data points have ever
402 * Flush all cached updates.
407 * Legacy APIs that accept a Duration parameters rather than TimePoint.
409 * These treat the Duration as relative to the clock epoch.
410 * Prefer using the correct TimePoint-based APIs instead. These APIs will
411 * eventually be deprecated and removed.
413 void update(Duration now) {
414 update(TimePoint(now));
416 void addValue(Duration now, const ValueType& value) {
417 addValue(TimePoint(now), value);
419 void addValue(Duration now, const ValueType& value, uint64_t times) {
420 addValue(TimePoint(now), value, times);
423 addValueAggregated(Duration now, const ValueType& total, uint64_t nsamples) {
424 addValueAggregated(TimePoint(now), total, nsamples);
428 std::vector<Level> levels_;
430 // Updates within the same time interval are cached
431 // They are flushed out when updates from a different time comes,
432 // or flush() is called.
433 TimePoint cachedTime_;
434 ValueType cachedSum_;
435 uint64_t cachedCount_;