2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #ifndef FOLLY_STATS_MULTILEVELTIMESERIES_H_
18 #define FOLLY_STATS_MULTILEVELTIMESERIES_H_
24 #include <glog/logging.h>
25 #include <folly/stats/BucketedTimeSeries.h>
30 * This class represents a timeseries which keeps several levels of data
31 * granularity (similar in principle to the loads reported by the UNIX
32 * 'uptime' command). It uses several instances (one per level) of
33 * BucketedTimeSeries as the underlying storage.
35 * This can easily be used to track sums (and thus rates or averages) over
36 * several predetermined time periods, as well as all-time sums. For example,
37 * you would use to it to track query rate or response speed over the last
38 * 5, 15, 30, and 60 minutes.
40 * The MultiLevelTimeSeries takes a list of level durations as an input; the
41 * durations must be strictly increasing. Furthermore a special level can be
42 * provided with a duration of '0' -- this will be an "all-time" level. If
43 * an all-time level is provided, it MUST be the last level present.
45 * The class assumes that time advances forward -- you can't retroactively add
46 * values for events in the past -- the 'now' argument is provided for better
47 * efficiency and ease of unittesting.
49 * The class is not thread-safe -- use your own synchronization!
51 template <typename VT, typename TT=std::chrono::seconds>
52 class MultiLevelTimeSeries {
56 typedef folly::BucketedTimeSeries<ValueType, TimeType> Level;
59 * Create a new MultiLevelTimeSeries.
61 * This creates a new MultiLevelTimeSeries that tracks time series data at the
62 * specified time durations (level). The time series data tracked at each
63 * level is then further divided by numBuckets for memory efficiency.
65 * The durations must be strictly increasing. Furthermore a special level can
66 * be provided with a duration of '0' -- this will be an "all-time" level. If
67 * an all-time level is provided, it MUST be the last level present.
69 MultiLevelTimeSeries(size_t numBuckets,
71 const TimeType levelDurations[]);
74 * Return the number of buckets used to track time series at each level.
76 size_t numBuckets() const {
77 // The constructor ensures that levels_ has at least one item
78 return levels_[0].numBuckets();
82 * Return the number of levels tracked by MultiLevelTimeSeries.
84 size_t numLevels() const { return levels_.size(); }
87 * Get the BucketedTimeSeries backing the specified level.
89 * Note: you should generally call update() or flush() before accessing the
90 * data. Otherwise you may be reading stale data if update() or flush() has
91 * not been called recently.
93 const Level& getLevel(int level) const {
95 CHECK_LT(level, levels_.size());
96 return levels_[level];
100 * Get the highest granularity level that is still large enough to contain
101 * data going back to the specified start time.
103 * Note: you should generally call update() or flush() before accessing the
104 * data. Otherwise you may be reading stale data if update() or flush() has
105 * not been called recently.
107 const Level& getLevel(TimeType start) const {
108 for (const auto& level : levels_) {
109 if (level.isAllTime()) {
112 // Note that we use duration() here rather than elapsed().
113 // If duration is large enough to contain the start time then this level
114 // is good enough, even if elapsed() indicates that no data was recorded
115 // before the specified start time.
116 if (level.getLatestTime() - level.duration() <= start) {
120 // We should always have an all-time level, so this is never reached.
121 LOG(FATAL) << "No level of timeseries covers internval"
122 << " from " << start.count() << " to now";
123 return levels_.back();
127 * Return the sum of all the data points currently tracked at this level.
129 * Note: you should generally call update() or flush() before accessing the
130 * data. Otherwise you may be reading stale data if update() or flush() has
131 * not been called recently.
133 ValueType sum(int level) const {
134 return getLevel(level).sum();
138 * Return the average (sum / count) of all the data points currently tracked
141 * The return type may be specified to control whether floating-point or
142 * integer division should be performed.
144 * Note: you should generally call update() or flush() before accessing the
145 * data. Otherwise you may be reading stale data if update() or flush() has
146 * not been called recently.
148 template <typename ReturnType=double>
149 ReturnType avg(int level) const {
150 return getLevel(level).template avg<ReturnType>();
154 * Return the rate (sum divided by elaspsed time) of the all data points
155 * currently tracked at this level.
157 * Note: you should generally call update() or flush() before accessing the
158 * data. Otherwise you may be reading stale data if update() or flush() has
159 * not been called recently.
161 template <typename ReturnType=double, typename Interval=TimeType>
162 ValueType rate(int level) const {
163 return getLevel(level).template rate<ReturnType, Interval>();
167 * Return the number of data points currently tracked at this level.
169 * Note: you should generally call update() or flush() before accessing the
170 * data. Otherwise you may be reading stale data if update() or flush() has
171 * not been called recently.
173 int64_t count(int level) const {
174 return getLevel(level).count();
178 * Return the count divided by the elapsed time tracked at this level.
180 * Note: you should generally call update() or flush() before accessing the
181 * data. Otherwise you may be reading stale data if update() or flush() has
182 * not been called recently.
184 template <typename ReturnType=double, typename Interval=TimeType>
185 ReturnType countRate(int level) const {
186 return getLevel(level).template countRate<ReturnType, Interval>();
190 * Estimate the sum of the data points that occurred in the specified time
191 * period at this level.
193 * The range queried is [start, end).
194 * That is, start is inclusive, and end is exclusive.
196 * Note that data outside of the timeseries duration will no longer be
197 * available for use in the estimation. Specifying a start time earlier than
198 * getEarliestTime() will not have much effect, since only data points after
199 * that point in time will be counted.
201 * Note that the value returned is an estimate, and may not be precise.
203 * Note: you should generally call update() or flush() before accessing the
204 * data. Otherwise you may be reading stale data if update() or flush() has
205 * not been called recently.
207 ValueType sum(TimeType start, TimeType end) const {
208 return getLevel(start).sum(start, end);
212 * Estimate the average value during the specified time period.
214 * The same caveats documented in the sum(TimeType start, TimeType end)
215 * comments apply here as well.
217 * Note: you should generally call update() or flush() before accessing the
218 * data. Otherwise you may be reading stale data if update() or flush() has
219 * not been called recently.
221 template <typename ReturnType=double>
222 ReturnType avg(TimeType start, TimeType end) const {
223 return getLevel(start).template avg<ReturnType>(start, end);
227 * Estimate the rate during the specified time period.
229 * The same caveats documented in the sum(TimeType start, TimeType end)
230 * comments apply here as well.
232 * Note: you should generally call update() or flush() before accessing the
233 * data. Otherwise you may be reading stale data if update() or flush() has
234 * not been called recently.
236 template <typename ReturnType=double>
237 ReturnType rate(TimeType start, TimeType end) const {
238 return getLevel(start).template rate<ReturnType>(start, end);
242 * Estimate the count during the specified time period.
244 * The same caveats documented in the sum(TimeType start, TimeType end)
245 * comments apply here as well.
247 * Note: you should generally call update() or flush() before accessing the
248 * data. Otherwise you may be reading stale data if update() or flush() has
249 * not been called recently.
251 int64_t count(TimeType start, TimeType end) const {
252 return getLevel(start).count(start, end);
256 * Adds the value 'val' at time 'now' to all levels.
258 * Data points added at the same time point is cached internally here and not
259 * propagated to the underlying levels until either flush() is called or when
260 * update from a different time comes.
262 * This function expects time to always move forwards: it cannot be used to
263 * add historical data points that have occurred in the past. If now is
264 * older than the another timestamp that has already been passed to
265 * addValue() or update(), now will be ignored and the latest timestamp will
268 void addValue(TimeType now, const ValueType& val);
271 * Adds the value 'val' at time 'now' to all levels.
273 void addValue(TimeType now, const ValueType& val, int64_t times);
276 * Adds the value 'val' at time 'now' to all levels as the sum of 'nsamples'
279 void addValueAggregated(TimeType now, const ValueType& sum, int64_t nsamples);
282 * Update all the levels to the specified time, doing all the necessary
283 * work to rotate the buckets and remove any stale data points.
285 * When reading data from the timeseries, you should make sure to manually
286 * call update() before accessing the data. Otherwise you may be reading
287 * stale data if update() has not been called recently.
289 void update(TimeType now);
292 * Reset all the timeseries to an empty state as if no data points have ever
298 * Flush all cached updates.
303 std::vector<Level> levels_;
305 // Updates within the same time interval are cached
306 // They are flushed out when updates from a different time comes,
307 // or flush() is called.
308 TimeType cachedTime_;
309 ValueType cachedSum_;
315 #endif // FOLLY_STATS_MULTILEVELTIMESERIES_H_