2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 #define __STDC_FORMAT_MACROS
18 #include <folly/io/async/test/TimeUtil.h>
20 #include <folly/Conv.h>
23 #include <sys/types.h>
25 #include <sys/syscall.h>
26 #include <sys/utsname.h>
28 #include <glog/logging.h>
35 using namespace std::chrono;
40 * glibc doesn't provide gettid(), so define it ourselves.
42 static pid_t gettid() {
43 return syscall(SYS_gettid);
47 * The /proc/<pid>/schedstat file reports time values in jiffies.
49 * Determine how many jiffies are in a second.
50 * Returns -1 if the number of jiffies/second cannot be determined.
52 static int64_t determineJiffiesHZ() {
53 // It seems like the only real way to figure out the CONFIG_HZ value used by
54 // this kernel is to look it up in the config file.
56 // Look in /boot/config-<kernel_release>
57 struct utsname unameInfo;
58 if (uname(&unameInfo) != 0) {
59 LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
65 snprintf(configPath, sizeof(configPath), "/boot/config-%s",
68 FILE* f = fopen(configPath, "r");
70 LOG(ERROR) << "unable to determine jiffies/second: "
71 "cannot open kernel config file %s" << configPath;
77 while (fgets(buf, sizeof(buf), f) != nullptr) {
78 if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
79 // schedstat info seems to be reported in nanoseconds on tickless
82 // The CONFIG_HZ value doesn't matter for our purposes,
83 // so return as soon as we see CONFIG_NO_HZ.
86 } else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
88 } else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
90 } else if (strcmp(buf, "CONFIG_HZ=250\n") == 0) {
92 } else if (strcmp(buf, "CONFIG_HZ=100\n") == 0) {
99 LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
100 "found in %s" << configPath;
108 * Determine how long this process has spent waiting to get scheduled on the
111 * Returns the number of milliseconds spent waiting, or -1 if the amount of
112 * time cannot be determined.
114 static milliseconds getTimeWaitingMS(pid_t tid) {
115 static int64_t jiffiesHZ = 0;
116 if (jiffiesHZ == 0) {
117 jiffiesHZ = determineJiffiesHZ();
121 // We couldn't figure out how many jiffies there are in a second.
122 // Don't bother reading the schedstat info if we can't interpret it.
123 return milliseconds(0);
128 char schedstatFile[256];
129 snprintf(schedstatFile, sizeof(schedstatFile),
130 "/proc/%d/schedstat", tid);
131 fd = open(schedstatFile, O_RDONLY);
133 throw std::runtime_error(
134 folly::to<string>("failed to open process schedstat file", errno));
138 ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1);
139 if (bytesReadRet <= 0) {
140 throw std::runtime_error(
141 folly::to<string>("failed to read process schedstat file", errno));
143 size_t bytesRead = size_t(bytesReadRet);
145 if (buf[bytesRead - 1] != '\n') {
146 throw std::runtime_error("expected newline at end of schedstat data");
148 assert(bytesRead < sizeof(buf));
149 buf[bytesRead] = '\0';
151 uint64_t activeJiffies = 0;
152 uint64_t waitingJiffies = 0;
153 uint64_t numTasks = 0;
154 int rc = sscanf(buf, "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
155 &activeJiffies, &waitingJiffies, &numTasks);
157 throw std::runtime_error("failed to parse schedstat data");
161 return milliseconds((waitingJiffies * 1000) / jiffiesHZ);
162 } catch (const std::runtime_error& e) {
166 LOG(ERROR) << "error determining process wait time: %s" << e.what();
167 return milliseconds(0);
171 void TimePoint::reset() {
172 // Remember the current time
173 timeStart_ = system_clock::now();
175 // Remember how long this process has spent waiting to be scheduled
177 timeWaiting_ = getTimeWaitingMS(tid_);
179 // In case it took a while to read the schedstat info,
180 // also record the time after the schedstat check
181 timeEnd_ = system_clock::now();
184 std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) {
185 os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count()
186 << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", "
187 << timePoint.getTimeWaiting().count() << ")";
192 checkTimeout(const TimePoint& start, const TimePoint& end,
193 milliseconds expectedMS, bool allowSmaller,
194 milliseconds tolerance) {
195 auto elapsedMS = end.getTimeStart() - start.getTimeEnd();
198 // Timeouts should never fire before the time was up.
199 // Allow 1ms of wiggle room for rounding errors.
200 if (elapsedMS < expectedMS - milliseconds(1)) {
205 // Check that the event fired within a reasonable time of the timout.
207 // If the system is under heavy load, our process may have had to wait for a
208 // while to be run. The time spent waiting for the processor shouldn't
209 // count against us, so exclude this time from the check.
210 milliseconds excludedMS;
211 if (end.getTid() != start.getTid()) {
212 // We can only correctly compute the amount of time waiting to be scheduled
213 // if both TimePoints were set in the same thread.
214 excludedMS = milliseconds(0);
216 excludedMS = end.getTimeWaiting() - start.getTimeWaiting();
217 assert(end.getTimeWaiting() >= start.getTimeWaiting());
218 // Add a tolerance here due to precision issues on linux, see below note.
219 assert( (elapsedMS + tolerance) >= excludedMS);
222 milliseconds effectiveElapsedMS = milliseconds(0);
223 if (elapsedMS > excludedMS) {
224 effectiveElapsedMS = duration_cast<milliseconds>(elapsedMS) - excludedMS;
227 // On x86 Linux, sleep calls generally have precision only to the nearest
228 // millisecond. The tolerance parameter lets users allow a few ms of slop.
229 milliseconds overrun = effectiveElapsedMS - expectedMS;
230 if (overrun > tolerance) {