2 * Copyright 2015 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Subprocess library, modeled after Python's subprocess module
19 * (http://docs.python.org/2/library/subprocess.html)
21 * This library defines one class (Subprocess) which represents a child
22 * process. Subprocess has two constructors: one that takes a vector<string>
23 * and executes the given executable without using the shell, and one
24 * that takes a string and executes the given command using the shell.
25 * Subprocess allows you to redirect the child's standard input, standard
26 * output, and standard error to/from child descriptors in the parent,
27 * or to create communication pipes between the child and the parent.
29 * The simplest example is a thread-safe version of the system() library
31 * Subprocess(cmd).wait();
32 * which executes the command using the default shell and waits for it
33 * to complete, returning the exit status.
35 * A thread-safe version of popen() (type="r", to read from the child):
36 * Subprocess proc(cmd, Subprocess::pipeStdout());
37 * // read from proc.stdout()
40 * A thread-safe version of popen() (type="w", to write to the child):
41 * Subprocess proc(cmd, Subprocess::pipeStdin());
42 * // write to proc.stdin()
45 * If you want to redirect both stdin and stdout to pipes, you can, but
46 * note that you're subject to a variety of deadlocks. You'll want to use
47 * nonblocking I/O; look at the implementation of communicate() for an example.
49 * communicate() is a way to communicate to a child via its standard input,
50 * standard output, and standard error. It buffers everything in memory,
51 * so it's not great for large amounts of data (or long-running processes),
52 * but it insulates you from the deadlocks mentioned above.
54 #ifndef FOLLY_SUBPROCESS_H_
55 #define FOLLY_SUBPROCESS_H_
57 #include <sys/types.h>
69 #include <boost/container/flat_map.hpp>
70 #include <boost/operators.hpp>
71 #include <boost/noncopyable.hpp>
73 #include <folly/FileUtil.h>
74 #include <folly/gen/String.h>
75 #include <folly/io/IOBufQueue.h>
76 #include <folly/MapUtil.h>
77 #include <folly/Portability.h>
78 #include <folly/Range.h>
83 * Class to wrap a process return code.
86 class ProcessReturnCode {
87 friend class Subprocess;
97 * Process state. One of:
98 * NOT_STARTED: process hasn't been started successfully
99 * RUNNING: process is currently running
100 * EXITED: process exited (successfully or not)
101 * KILLED: process was killed by a signal.
106 * Helper wrappers around state().
108 bool notStarted() const { return state() == NOT_STARTED; }
109 bool running() const { return state() == RUNNING; }
110 bool exited() const { return state() == EXITED; }
111 bool killed() const { return state() == KILLED; }
114 * Exit status. Only valid if state() == EXITED; throws otherwise.
116 int exitStatus() const;
119 * Signal that caused the process's termination. Only valid if
120 * state() == KILLED; throws otherwise.
122 int killSignal() const;
125 * Was a core file generated? Only valid if state() == KILLED; throws
128 bool coreDumped() const;
131 * String representation; one of
134 * "exited with status <status>"
135 * "killed by signal <signal>"
136 * "killed by signal <signal> (core dumped)"
138 std::string str() const;
141 * Helper function to enforce a precondition based on this.
142 * Throws std::logic_error if in an unexpected state.
144 void enforce(State state) const;
146 explicit ProcessReturnCode(int rv) : rawStatus_(rv) { }
147 static constexpr int RV_NOT_STARTED = -2;
148 static constexpr int RV_RUNNING = -1;
154 * Base exception thrown by the Subprocess methods.
156 class SubprocessError : public std::exception {};
159 * Exception thrown by *Checked methods of Subprocess.
161 class CalledProcessError : public SubprocessError {
163 explicit CalledProcessError(ProcessReturnCode rc);
164 ~CalledProcessError() throw() { }
165 const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
166 ProcessReturnCode returnCode() const { return returnCode_; }
168 ProcessReturnCode returnCode_;
173 * Exception thrown if the subprocess cannot be started.
175 class SubprocessSpawnError : public SubprocessError {
177 SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
178 ~SubprocessSpawnError() throw() {}
179 const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
180 int errnoValue() const { return errnoValue_; }
190 class Subprocess : private boost::noncopyable {
192 static const int CLOSE = -1;
193 static const int PIPE = -2;
194 static const int PIPE_IN = -3;
195 static const int PIPE_OUT = -4;
198 * Class representing various options: file descriptor behavior, and
199 * whether to use $PATH for searching for the executable,
201 * By default, we don't use $PATH, file descriptors are closed if
202 * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
205 class Options : private boost::orable<Options> {
206 friend class Subprocess;
208 Options() {} // E.g. https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58328
211 * Change action for file descriptor fd.
213 * "action" may be another file descriptor number (dup2()ed before the
214 * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
216 * CLOSE: close the file descriptor in the child
217 * PIPE_IN: open a pipe *from* the child
218 * PIPE_OUT: open a pipe *to* the child
220 * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
221 * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
222 * other file descriptors.
224 Options& fd(int fd, int action);
227 * Shortcut to change the action for standard input.
229 Options& stdin(int action) { return fd(STDIN_FILENO, action); }
232 * Shortcut to change the action for standard output.
234 Options& stdout(int action) { return fd(STDOUT_FILENO, action); }
237 * Shortcut to change the action for standard error.
238 * Note that stderr(1) will redirect the standard error to the same
239 * file descriptor as standard output; the equivalent of bash's "2>&1"
241 Options& stderr(int action) { return fd(STDERR_FILENO, action); }
243 Options& pipeStdin() { return fd(STDIN_FILENO, PIPE_IN); }
244 Options& pipeStdout() { return fd(STDOUT_FILENO, PIPE_OUT); }
245 Options& pipeStderr() { return fd(STDERR_FILENO, PIPE_OUT); }
248 * Close all other fds (other than standard input, output, error,
249 * and file descriptors explicitly specified with fd()).
251 * This is potentially slow; it's generally a better idea to
252 * set the close-on-exec flag on all file descriptors that shouldn't
253 * be inherited by the child.
255 * Even with this option set, standard input, output, and error are
256 * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
259 Options& closeOtherFds() { closeOtherFds_ = true; return *this; }
262 * Use the search path ($PATH) when searching for the executable.
264 Options& usePath() { usePath_ = true; return *this; }
267 * Change the child's working directory, after the vfork.
269 Options& chdir(const std::string& dir) { childDir_ = dir; return *this; }
273 * Child will receive a signal when the parent exits.
275 Options& parentDeathSignal(int sig) {
276 parentDeathSignal_ = sig;
282 * Child will be made a process group leader when it starts. Upside: one
283 * can reliably all its kill non-daemonizing descendants. Downside: the
284 * child will not receive Ctrl-C etc during interactive use.
286 Options& processGroupLeader() {
287 processGroupLeader_ = true;
292 * Helpful way to combine Options.
294 Options& operator|=(const Options& other);
297 typedef boost::container::flat_map<int, int> FdMap;
299 bool closeOtherFds_{false};
300 bool usePath_{false};
301 std::string childDir_; // "" keeps the parent's working directory
303 int parentDeathSignal_{0};
305 bool processGroupLeader_{false};
308 static Options pipeStdin() { return Options().stdin(PIPE); }
309 static Options pipeStdout() { return Options().stdout(PIPE); }
310 static Options pipeStderr() { return Options().stderr(PIPE); }
313 * Create a subprocess from the given arguments. argv[0] must be listed.
314 * If not-null, executable must be the actual executable
315 * being used (otherwise it's the same as argv[0]).
317 * If env is not-null, it must contain name=value strings to be used
318 * as the child's environment; otherwise, we inherit the environment
319 * from the parent. env must be null if options.usePath is set.
322 const std::vector<std::string>& argv,
323 const Options& options = Options(),
324 const char* executable = nullptr,
325 const std::vector<std::string>* env = nullptr);
329 * Create a subprocess run as a shell command (as shell -c 'command')
331 * The shell to use is taken from the environment variable $SHELL,
332 * or /bin/sh if $SHELL is unset.
335 const std::string& cmd,
336 const Options& options = Options(),
337 const std::vector<std::string>* env = nullptr);
340 * Communicate with the child until all pipes to/from the child are closed.
342 * The input buffer is written to the process' stdin pipe, and data is read
343 * from the stdout and stderr pipes. Non-blocking I/O is performed on all
344 * pipes simultaneously to avoid deadlocks.
346 * The stdin pipe will be closed after the full input buffer has been written.
347 * An error will be thrown if a non-empty input buffer is supplied but stdin
348 * was not configured as a pipe.
350 * Returns a pair of buffers containing the data read from stdout and stderr.
351 * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
352 * for the respective buffer.
354 * Note that communicate() and communicateIOBuf() both return when all
355 * pipes to/from the child are closed; the child might stay alive after
356 * that, so you must still wait().
358 * communicateIOBuf() uses IOBufQueue for buffering (which has the
359 * advantage that it won't try to allocate all data at once), but it does
360 * store the subprocess's entire output in memory before returning.
362 * communicate() uses strings for simplicity.
364 std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
365 IOBufQueue input = IOBufQueue());
367 std::pair<std::string, std::string> communicate(
368 StringPiece input = StringPiece());
371 * Communicate with the child until all pipes to/from the child are closed.
373 * readCallback(pfd, cfd) will be called whenever there's data available
374 * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
375 * in the parent (that you use to read from); cfd is the file descriptor
376 * in the child (used for identifying the stream; 1 = child's standard
377 * output, 2 = child's standard error, etc)
379 * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
380 * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
381 * use to write to); cfd is the file descriptor in the child (used for
382 * identifying the stream; 0 = child's standard input, etc)
384 * The read and write callbacks must read from / write to pfd and return
385 * false during normal operation. Return true to tell communicate() to
386 * close the pipe. For readCallback, this might send SIGPIPE to the
387 * child, or make its writes fail with EPIPE, so you should generally
388 * avoid returning true unless you've reached end-of-file.
390 * NOTE that you MUST consume all data passed to readCallback (or return
391 * true to close the pipe). Similarly, you MUST write to a writable pipe
392 * (or return true to close the pipe). To do otherwise is an error that
393 * can result in a deadlock. You must do this even for pipes you are not
396 * Note that pfd is nonblocking, so be prepared for read() / write() to
397 * return -1 and set errno to EAGAIN (in which case you should return
398 * false). Use readNoInt() from FileUtil.h to handle interrupted reads
401 * Note that communicate() returns when all pipes to/from the child are
402 * closed; the child might stay alive after that, so you must still wait().
404 * Most users won't need to use this; the simpler version of communicate
405 * (which buffers data in memory) will probably work fine.
407 * See ReadLinesCallback for an easy way to consume the child's output
408 * streams line-by-line (or tokenized by another delimiter).
410 typedef std::function<bool(int, int)> FdCallback;
411 void communicate(FdCallback readCallback, FdCallback writeCallback);
414 * A readCallback for Subprocess::communicate() that helps you consume
415 * lines (or other delimited pieces) from your subprocess's file
416 * descriptors. Use the readLinesCallback() helper to get template
417 * deduction. For example:
419 * auto read_cb = Subprocess::readLinesCallback(
420 * [](int fd, folly::StringPiece s) {
421 * std::cout << fd << " said: " << s;
422 * return false; // Keep reading from the child
425 * subprocess.communicate(
426 * // ReadLinesCallback contains StreamSplitter contains IOBuf, making
427 * // it noncopyable, whereas std::function must be copyable. So, we
428 * // keep the callback in a local, and instead pass a reference.
430 * [](int pdf, int cfd){ return true; } // Don't write to the child
433 * If a file line exceeds maxLineLength, your callback will get some
434 * initial chunks of maxLineLength with no trailing delimiters. The final
435 * chunk of a line is delimiter-terminated iff the delimiter was present
436 * in the input. In particular, the last line in a file always lacks a
437 * delimiter -- so if a file ends on a delimiter, the final line is empty.
439 * Like a regular communicate() callback, your fdLineCb() normally returns
440 * false. It may return true to tell Subprocess to close the underlying
441 * file descriptor. The child process may then receive SIGPIPE or get
442 * EPIPE errors on writes.
444 template <class Callback>
445 class ReadLinesCallback {
447 // Binds an FD to the client-provided FD+line callback
448 struct StreamSplitterCallback {
449 StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) { }
450 // The return value semantics are inverted vs StreamSplitter
451 bool operator()(StringPiece s) { return !cb_(fd_, s); }
455 typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
457 explicit ReadLinesCallback(
459 uint64_t maxLineLength = 0, // No line length limit by default
460 char delimiter = '\n',
461 uint64_t bufSize = 1024
462 ) : fdLineCb_(std::move(fdLineCb)),
463 maxLineLength_(maxLineLength),
464 delimiter_(delimiter),
467 bool operator()(int pfd, int cfd) {
468 // Make a splitter for this cfd if it doesn't already exist
469 auto it = fdToSplitter_.find(cfd);
470 auto& splitter = (it != fdToSplitter_.end()) ? it->second
471 : fdToSplitter_.emplace(cfd, LineSplitter(
472 delimiter_, StreamSplitterCallback(fdLineCb_, cfd), maxLineLength_
474 // Read as much as we can from this FD
477 ssize_t ret = readNoInt(pfd, buf, bufSize_);
478 if (ret == -1 && errno == EAGAIN) { // No more data for now
481 if (ret == 0) { // Reached end-of-file
482 splitter.flush(); // Ignore return since the file is over anyway
485 if (!splitter(StringPiece(buf, ret))) {
486 return true; // The callback told us to stop
493 const uint64_t maxLineLength_;
494 const char delimiter_;
495 const uint64_t bufSize_;
496 // We lazily make splitters for all cfds that get used.
497 std::unordered_map<int, LineSplitter> fdToSplitter_;
500 // Helper to enable template deduction
501 template <class Callback>
502 static ReadLinesCallback<Callback> readLinesCallback(
504 uint64_t maxLineLength = 0, // No line length limit by default
505 char delimiter = '\n',
506 uint64_t bufSize = 1024) {
507 return ReadLinesCallback<Callback>(
508 std::move(fdLineCb), maxLineLength, delimiter, bufSize
513 * Enable notifications (callbacks) for one pipe to/from child. By default,
514 * all are enabled. Useful for "chatty" communication -- you want to disable
515 * write callbacks until you receive the expected message.
517 void enableNotifications(int childFd, bool enabled);
520 * Are notifications for one pipe to/from child enabled?
522 bool notificationsEnabled(int childFd) const;
525 * Return the child's pid, or -1 if the child wasn't successfully spawned
526 * or has already been wait()ed upon.
531 * Return the child's status (as per wait()) if the process has already
532 * been waited on, -1 if the process is still running, or -2 if the process
533 * hasn't been successfully started. NOTE that this does not poll, but
534 * returns the status stored in the Subprocess object.
536 ProcessReturnCode returnCode() const { return returnCode_; }
539 * Poll the child's status and return it, return -1 if the process
540 * is still running. NOTE that it is illegal to call poll again after
541 * poll indicated that the process has terminated, or to call poll on a
542 * process that hasn't been successfully started (the constructor threw an
545 ProcessReturnCode poll();
548 * Poll the child's status. If the process is still running, return false.
549 * Otherwise, return true if the process exited with status 0 (success),
550 * or throw CalledProcessError if the process exited with a non-zero status.
555 * Wait for the process to terminate and return its status.
556 * Similarly to poll, it is illegal to call wait after the process
557 * has already been reaped or if the process has not successfully started.
559 ProcessReturnCode wait();
562 * Wait for the process to terminate, throw if unsuccessful.
567 * Set all pipes from / to child non-blocking. communicate() does
570 void setAllNonBlocking();
573 * Get parent file descriptor corresponding to the given file descriptor
574 * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
575 * Do not close() the return file descriptor; use closeParentFd, below.
577 int parentFd(int childFd) const {
578 return pipes_[findByChildFd(childFd)].parentFd;
580 int stdin() const { return parentFd(0); }
581 int stdout() const { return parentFd(1); }
582 int stderr() const { return parentFd(2); }
585 * Close the parent file descriptor given a file descriptor in the child.
587 void closeParentFd(int childFd);
590 * Send a signal to the child. Shortcuts for the commonly used Unix
593 void sendSignal(int signal);
594 void terminate() { sendSignal(SIGTERM); }
595 void kill() { sendSignal(SIGKILL); }
598 static const int RV_RUNNING = ProcessReturnCode::RV_RUNNING;
599 static const int RV_NOT_STARTED = ProcessReturnCode::RV_NOT_STARTED;
601 // spawn() sets up a pipe to read errors from the child,
602 // then calls spawnInternal() to do the bulk of the work. Once
603 // spawnInternal() returns it reads the error pipe to see if the child
604 // encountered any errors.
606 std::unique_ptr<const char*[]> argv,
607 const char* executable,
608 const Options& options,
609 const std::vector<std::string>* env);
611 std::unique_ptr<const char*[]> argv,
612 const char* executable,
614 const std::vector<std::string>* env,
617 // Actions to run in child.
618 // Note that this runs after vfork(), so tread lightly.
619 // Returns 0 on success, or an errno value on failure.
620 int prepareChild(const Options& options,
621 const sigset_t* sigmask,
622 const char* childDir) const;
623 int runChild(const char* executable, char** argv, char** env,
624 const Options& options) const;
627 * Read from the error pipe, and throw SubprocessSpawnError if the child
628 * failed before calling exec().
630 void readChildErrorPipe(int pfd, const char* executable);
633 * Close all file descriptors.
637 // return index in pipes_
638 int findByChildFd(int childFd) const;
641 ProcessReturnCode returnCode_;
643 // The number of pipes between parent and child is assumed to be small,
644 // so we're happy with a vector here, even if it means linear erase.
646 struct PipeInfo : private boost::totally_ordered<PipeInfo> {
649 int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
652 bool operator<(const PipeInfo& other) const {
653 return childFd < other.childFd;
655 bool operator==(const PipeInfo& other) const {
656 return childFd == other.childFd;
659 std::vector<PipeInfo> pipes_;
662 inline Subprocess::Options& Subprocess::Options::operator|=(
663 const Subprocess::Options& other) {
664 if (this == &other) return *this;
666 for (auto& p : other.fdActions_) {
667 fdActions_[p.first] = p.second;
669 closeOtherFds_ |= other.closeOtherFds_;
670 usePath_ |= other.usePath_;
671 processGroupLeader_ |= other.processGroupLeader_;
677 #endif /* FOLLY_SUBPROCESS_H_ */