2 * Copyright 2016 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 #ifndef __STDC_FORMAT_MACROS
17 #define __STDC_FORMAT_MACROS
20 #include <folly/io/async/test/TimeUtil.h>
22 #include <folly/Conv.h>
23 #include <folly/portability/SysSyscall.h>
24 #include <folly/portability/Unistd.h>
25 #include <folly/portability/Windows.h>
28 #include <sys/types.h>
31 #include <glog/logging.h>
37 #include <sys/utsname.h>
41 using namespace std::chrono;
46 static pid_t gettid() {
47 return pid_t(GetCurrentThreadId());
51 * glibc doesn't provide gettid(), so define it ourselves.
53 static pid_t gettid() {
54 return syscall(FOLLY_SYS_gettid);
58 * The /proc/<pid>/schedstat file reports time values in jiffies.
60 * Determine how many jiffies are in a second.
61 * Returns -1 if the number of jiffies/second cannot be determined.
63 static int64_t determineJiffiesHZ() {
64 // It seems like the only real way to figure out the CONFIG_HZ value used by
65 // this kernel is to look it up in the config file.
67 // Look in /boot/config-<kernel_release>
68 struct utsname unameInfo;
69 if (uname(&unameInfo) != 0) {
70 LOG(ERROR) << "unable to determine jiffies/second: uname failed: %s"
76 snprintf(configPath, sizeof(configPath), "/boot/config-%s",
79 FILE* f = fopen(configPath, "r");
81 LOG(ERROR) << "unable to determine jiffies/second: "
82 "cannot open kernel config file %s" << configPath;
88 while (fgets(buf, sizeof(buf), f) != nullptr) {
89 if (strcmp(buf, "CONFIG_NO_HZ=y\n") == 0) {
90 // schedstat info seems to be reported in nanoseconds on tickless
93 // The CONFIG_HZ value doesn't matter for our purposes,
94 // so return as soon as we see CONFIG_NO_HZ.
97 } else if (strcmp(buf, "CONFIG_HZ=1000\n") == 0) {
99 } else if (strcmp(buf, "CONFIG_HZ=300\n") == 0) {
101 } else if (strcmp(buf, "CONFIG_HZ=250\n") == 0) {
103 } else if (strcmp(buf, "CONFIG_HZ=100\n") == 0) {
110 LOG(ERROR) << "unable to determine jiffies/second: no CONFIG_HZ setting "
111 "found in %s" << configPath;
120 * Determine how long this process has spent waiting to get scheduled on the
123 * Returns the number of milliseconds spent waiting, or -1 if the amount of
124 * time cannot be determined.
126 static milliseconds getTimeWaitingMS(pid_t tid) {
128 return milliseconds(0);
130 static int64_t jiffiesHZ = 0;
131 if (jiffiesHZ == 0) {
132 jiffiesHZ = determineJiffiesHZ();
136 // We couldn't figure out how many jiffies there are in a second.
137 // Don't bother reading the schedstat info if we can't interpret it.
138 return milliseconds(0);
143 char schedstatFile[256];
144 snprintf(schedstatFile, sizeof(schedstatFile),
145 "/proc/%d/schedstat", tid);
146 fd = open(schedstatFile, O_RDONLY);
148 throw std::runtime_error(
149 folly::to<string>("failed to open process schedstat file", errno));
153 ssize_t bytesReadRet = read(fd, buf, sizeof(buf) - 1);
154 if (bytesReadRet <= 0) {
155 throw std::runtime_error(
156 folly::to<string>("failed to read process schedstat file", errno));
158 size_t bytesRead = size_t(bytesReadRet);
160 if (buf[bytesRead - 1] != '\n') {
161 throw std::runtime_error("expected newline at end of schedstat data");
163 assert(bytesRead < sizeof(buf));
164 buf[bytesRead] = '\0';
166 uint64_t activeJiffies = 0;
167 uint64_t waitingJiffies = 0;
168 uint64_t numTasks = 0;
169 int rc = sscanf(buf, "%" PRIu64 " %" PRIu64 " %" PRIu64 "\n",
170 &activeJiffies, &waitingJiffies, &numTasks);
172 throw std::runtime_error("failed to parse schedstat data");
176 return milliseconds((waitingJiffies * 1000) / jiffiesHZ);
177 } catch (const std::runtime_error& e) {
181 LOG(ERROR) << "error determining process wait time: %s" << e.what();
182 return milliseconds(0);
187 void TimePoint::reset() {
188 // Remember the current time
189 timeStart_ = system_clock::now();
191 // Remember how long this process has spent waiting to be scheduled
193 timeWaiting_ = getTimeWaitingMS(tid_);
195 // In case it took a while to read the schedstat info,
196 // also record the time after the schedstat check
197 timeEnd_ = system_clock::now();
200 std::ostream& operator<<(std::ostream& os, const TimePoint& timePoint) {
201 os << "TimePoint(" << timePoint.getTimeStart().time_since_epoch().count()
202 << ", " << timePoint.getTimeEnd().time_since_epoch().count() << ", "
203 << timePoint.getTimeWaiting().count() << ")";
208 checkTimeout(const TimePoint& start, const TimePoint& end,
209 milliseconds expectedMS, bool allowSmaller,
210 milliseconds tolerance) {
211 auto elapsedMS = end.getTimeStart() - start.getTimeEnd();
214 // Timeouts should never fire before the time was up.
215 // Allow 1ms of wiggle room for rounding errors.
216 if (elapsedMS < expectedMS - milliseconds(1)) {
221 // Check that the event fired within a reasonable time of the timout.
223 // If the system is under heavy load, our process may have had to wait for a
224 // while to be run. The time spent waiting for the processor shouldn't
225 // count against us, so exclude this time from the check.
226 milliseconds excludedMS;
227 if (end.getTid() != start.getTid()) {
228 // We can only correctly compute the amount of time waiting to be scheduled
229 // if both TimePoints were set in the same thread.
230 excludedMS = milliseconds(0);
232 excludedMS = end.getTimeWaiting() - start.getTimeWaiting();
233 assert(end.getTimeWaiting() >= start.getTimeWaiting());
234 // Add a tolerance here due to precision issues on linux, see below note.
235 assert( (elapsedMS + tolerance) >= excludedMS);
238 milliseconds effectiveElapsedMS = milliseconds(0);
239 if (elapsedMS > excludedMS) {
240 effectiveElapsedMS = duration_cast<milliseconds>(elapsedMS) - excludedMS;
243 // On x86 Linux, sleep calls generally have precision only to the nearest
244 // millisecond. The tolerance parameter lets users allow a few ms of slop.
245 milliseconds overrun = effectiveElapsedMS - expectedMS;
246 if (overrun > tolerance) {