2 * Copyright 2014 Facebook, Inc.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * Subprocess library, modeled after Python's subprocess module
19 * (http://docs.python.org/2/library/subprocess.html)
21 * This library defines one class (Subprocess) which represents a child
22 * process. Subprocess has two constructors: one that takes a vector<string>
23 * and executes the given executable without using the shell, and one
24 * that takes a string and executes the given command using the shell.
25 * Subprocess allows you to redirect the child's standard input, standard
26 * output, and standard error to/from child descriptors in the parent,
27 * or to create communication pipes between the child and the parent.
29 * The simplest example is a thread-safe version of the system() library
31 * Subprocess(cmd).wait();
32 * which executes the command using the default shell and waits for it
33 * to complete, returning the exit status.
35 * A thread-safe version of popen() (type="r", to read from the child):
36 * Subprocess proc(cmd, Subprocess::pipeStdout());
37 * // read from proc.stdout()
40 * A thread-safe version of popen() (type="w", to write to the child):
41 * Subprocess proc(cmd, Subprocess::pipeStdin());
42 * // write to proc.stdin()
45 * If you want to redirect both stdin and stdout to pipes, you can, but
46 * note that you're subject to a variety of deadlocks. You'll want to use
47 * nonblocking I/O; look at the implementation of communicate() for an example.
49 * communicate() is a way to communicate to a child via its standard input,
50 * standard output, and standard error. It buffers everything in memory,
51 * so it's not great for large amounts of data (or long-running processes),
52 * but it insulates you from the deadlocks mentioned above.
54 #ifndef FOLLY_SUBPROCESS_H_
55 #define FOLLY_SUBPROCESS_H_
57 #include <sys/types.h>
69 #include <boost/container/flat_map.hpp>
70 #include <boost/operators.hpp>
71 #include <boost/noncopyable.hpp>
73 #include <folly/FileUtil.h>
74 #include <folly/gen/String.h>
75 #include <folly/io/IOBufQueue.h>
76 #include <folly/MapUtil.h>
77 #include <folly/Portability.h>
78 #include <folly/Range.h>
83 * Class to wrap a process return code.
86 class ProcessReturnCode {
87 friend class Subprocess;
97 * Process state. One of:
98 * NOT_STARTED: process hasn't been started successfully
99 * RUNNING: process is currently running
100 * EXITED: process exited (successfully or not)
101 * KILLED: process was killed by a signal.
106 * Helper wrappers around state().
108 bool notStarted() const { return state() == NOT_STARTED; }
109 bool running() const { return state() == RUNNING; }
110 bool exited() const { return state() == EXITED; }
111 bool killed() const { return state() == KILLED; }
114 * Exit status. Only valid if state() == EXITED; throws otherwise.
116 int exitStatus() const;
119 * Signal that caused the process's termination. Only valid if
120 * state() == KILLED; throws otherwise.
122 int killSignal() const;
125 * Was a core file generated? Only valid if state() == KILLED; throws
128 bool coreDumped() const;
131 * String representation; one of
134 * "exited with status <status>"
135 * "killed by signal <signal>"
136 * "killed by signal <signal> (core dumped)"
138 std::string str() const;
141 * Helper function to enforce a precondition based on this.
142 * Throws std::logic_error if in an unexpected state.
144 void enforce(State state) const;
146 explicit ProcessReturnCode(int rv) : rawStatus_(rv) { }
147 static constexpr int RV_NOT_STARTED = -2;
148 static constexpr int RV_RUNNING = -1;
154 * Base exception thrown by the Subprocess methods.
156 class SubprocessError : public std::exception {};
159 * Exception thrown by *Checked methods of Subprocess.
161 class CalledProcessError : public SubprocessError {
163 explicit CalledProcessError(ProcessReturnCode rc);
164 ~CalledProcessError() throw() { }
165 const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
166 ProcessReturnCode returnCode() const { return returnCode_; }
168 ProcessReturnCode returnCode_;
173 * Exception thrown if the subprocess cannot be started.
175 class SubprocessSpawnError : public SubprocessError {
177 SubprocessSpawnError(const char* executable, int errCode, int errnoValue);
178 ~SubprocessSpawnError() throw() {}
179 const char* what() const throw() FOLLY_OVERRIDE { return what_.c_str(); }
180 int errnoValue() const { return errnoValue_; }
190 class Subprocess : private boost::noncopyable {
192 static const int CLOSE = -1;
193 static const int PIPE = -2;
194 static const int PIPE_IN = -3;
195 static const int PIPE_OUT = -4;
198 * Class representing various options: file descriptor behavior, and
199 * whether to use $PATH for searching for the executable,
201 * By default, we don't use $PATH, file descriptors are closed if
202 * the close-on-exec flag is set (fcntl FD_CLOEXEC) and inherited
205 class Options : private boost::orable<Options> {
206 friend class Subprocess;
209 : closeOtherFds_(false),
214 * Change action for file descriptor fd.
216 * "action" may be another file descriptor number (dup2()ed before the
217 * child execs), or one of CLOSE, PIPE_IN, and PIPE_OUT.
219 * CLOSE: close the file descriptor in the child
220 * PIPE_IN: open a pipe *from* the child
221 * PIPE_OUT: open a pipe *to* the child
223 * PIPE is a shortcut; same as PIPE_IN for stdin (fd 0), same as
224 * PIPE_OUT for stdout (fd 1) or stderr (fd 2), and an error for
225 * other file descriptors.
227 Options& fd(int fd, int action);
230 * Shortcut to change the action for standard input.
232 Options& stdin(int action) { return fd(STDIN_FILENO, action); }
235 * Shortcut to change the action for standard output.
237 Options& stdout(int action) { return fd(STDOUT_FILENO, action); }
240 * Shortcut to change the action for standard error.
241 * Note that stderr(1) will redirect the standard error to the same
242 * file descriptor as standard output; the equivalent of bash's "2>&1"
244 Options& stderr(int action) { return fd(STDERR_FILENO, action); }
246 Options& pipeStdin() { return fd(STDIN_FILENO, PIPE_IN); }
247 Options& pipeStdout() { return fd(STDOUT_FILENO, PIPE_OUT); }
248 Options& pipeStderr() { return fd(STDERR_FILENO, PIPE_OUT); }
251 * Close all other fds (other than standard input, output, error,
252 * and file descriptors explicitly specified with fd()).
254 * This is potentially slow; it's generally a better idea to
255 * set the close-on-exec flag on all file descriptors that shouldn't
256 * be inherited by the child.
258 * Even with this option set, standard input, output, and error are
259 * not closed; use stdin(CLOSE), stdout(CLOSE), stderr(CLOSE) if you
262 Options& closeOtherFds() { closeOtherFds_ = true; return *this; }
265 * Use the search path ($PATH) when searching for the executable.
267 Options& usePath() { usePath_ = true; return *this; }
270 * Change the child's working directory, after the vfork.
272 Options& chdir(const std::string& dir) { childDir_ = dir; return *this; }
276 * Child will receive a signal when the parent exits.
278 Options& parentDeathSignal(int sig) {
279 parentDeathSignal_ = sig;
285 * Helpful way to combine Options.
287 Options& operator|=(const Options& other);
290 typedef boost::container::flat_map<int, int> FdMap;
294 std::string childDir_; // "" keeps the parent's working directory
296 int parentDeathSignal_{0};
300 static Options pipeStdin() { return Options().stdin(PIPE); }
301 static Options pipeStdout() { return Options().stdout(PIPE); }
302 static Options pipeStderr() { return Options().stderr(PIPE); }
305 * Create a subprocess from the given arguments. argv[0] must be listed.
306 * If not-null, executable must be the actual executable
307 * being used (otherwise it's the same as argv[0]).
309 * If env is not-null, it must contain name=value strings to be used
310 * as the child's environment; otherwise, we inherit the environment
311 * from the parent. env must be null if options.usePath is set.
314 const std::vector<std::string>& argv,
315 const Options& options = Options(),
316 const char* executable = nullptr,
317 const std::vector<std::string>* env = nullptr);
321 * Create a subprocess run as a shell command (as shell -c 'command')
323 * The shell to use is taken from the environment variable $SHELL,
324 * or /bin/sh if $SHELL is unset.
327 const std::string& cmd,
328 const Options& options = Options(),
329 const std::vector<std::string>* env = nullptr);
332 * Communicate with the child until all pipes to/from the child are closed.
334 * The input buffer is written to the process' stdin pipe, and data is read
335 * from the stdout and stderr pipes. Non-blocking I/O is performed on all
336 * pipes simultaneously to avoid deadlocks.
338 * The stdin pipe will be closed after the full input buffer has been written.
339 * An error will be thrown if a non-empty input buffer is supplied but stdin
340 * was not configured as a pipe.
342 * Returns a pair of buffers containing the data read from stdout and stderr.
343 * If stdout or stderr is not a pipe, an empty IOBuf queue will be returned
344 * for the respective buffer.
346 * Note that communicate() and communicateIOBuf() both return when all
347 * pipes to/from the child are closed; the child might stay alive after
348 * that, so you must still wait().
350 * communicateIOBuf() uses IOBufQueue for buffering (which has the
351 * advantage that it won't try to allocate all data at once), but it does
352 * store the subprocess's entire output in memory before returning.
354 * communicate() uses strings for simplicity.
356 std::pair<IOBufQueue, IOBufQueue> communicateIOBuf(
357 IOBufQueue input = IOBufQueue());
359 std::pair<std::string, std::string> communicate(
360 StringPiece input = StringPiece());
363 * Communicate with the child until all pipes to/from the child are closed.
365 * readCallback(pfd, cfd) will be called whenever there's data available
366 * on any pipe *from* the child (PIPE_OUT). pfd is the file descriptor
367 * in the parent (that you use to read from); cfd is the file descriptor
368 * in the child (used for identifying the stream; 1 = child's standard
369 * output, 2 = child's standard error, etc)
371 * writeCallback(pfd, cfd) will be called whenever a pipe *to* the child is
372 * writable (PIPE_IN). pfd is the file descriptor in the parent (that you
373 * use to write to); cfd is the file descriptor in the child (used for
374 * identifying the stream; 0 = child's standard input, etc)
376 * The read and write callbacks must read from / write to pfd and return
377 * false during normal operation. Return true to tell communicate() to
378 * close the pipe. For readCallback, this might send SIGPIPE to the
379 * child, or make its writes fail with EPIPE, so you should generally
380 * avoid returning true unless you've reached end-of-file.
382 * NOTE that you MUST consume all data passed to readCallback (or return
383 * true to close the pipe). Similarly, you MUST write to a writable pipe
384 * (or return true to close the pipe). To do otherwise is an error that
385 * can result in a deadlock. You must do this even for pipes you are not
388 * Note that pfd is nonblocking, so be prepared for read() / write() to
389 * return -1 and set errno to EAGAIN (in which case you should return
390 * false). Use readNoInt() from FileUtil.h to handle interrupted reads
393 * Note that communicate() returns when all pipes to/from the child are
394 * closed; the child might stay alive after that, so you must still wait().
396 * Most users won't need to use this; the simpler version of communicate
397 * (which buffers data in memory) will probably work fine.
399 * See ReadLinesCallback for an easy way to consume the child's output
400 * streams line-by-line (or tokenized by another delimiter).
402 typedef std::function<bool(int, int)> FdCallback;
403 void communicate(FdCallback readCallback, FdCallback writeCallback);
406 * A readCallback for Subprocess::communicate() that helps you consume
407 * lines (or other delimited pieces) from your subprocess's file
408 * descriptors. Use the readLinesCallback() helper to get template
409 * deduction. For example:
411 * auto read_cb = Subprocess::readLinesCallback(
412 * [](int fd, folly::StringPiece s) {
413 * std::cout << fd << " said: " << s;
414 * return false; // Keep reading from the child
417 * subprocess.communicate(
418 * // ReadLinesCallback contains StreamSplitter contains IOBuf, making
419 * // it noncopyable, whereas std::function must be copyable. So, we
420 * // keep the callback in a local, and instead pass a reference.
422 * [](int pdf, int cfd){ return true; } // Don't write to the child
425 * If a file line exceeds maxLineLength, your callback will get some
426 * initial chunks of maxLineLength with no trailing delimiters. The final
427 * chunk of a line is delimiter-terminated iff the delimiter was present
428 * in the input. In particular, the last line in a file always lacks a
429 * delimiter -- so if a file ends on a delimiter, the final line is empty.
431 * Like a regular communicate() callback, your fdLineCb() normally returns
432 * false. It may return true to tell Subprocess to close the underlying
433 * file descriptor. The child process may then receive SIGPIPE or get
434 * EPIPE errors on writes.
436 template <class Callback>
437 class ReadLinesCallback {
439 // Binds an FD to the client-provided FD+line callback
440 struct StreamSplitterCallback {
441 StreamSplitterCallback(Callback& cb, int fd) : cb_(cb), fd_(fd) { }
442 // The return value semantics are inverted vs StreamSplitter
443 bool operator()(StringPiece s) { return !cb_(fd_, s); }
447 typedef gen::StreamSplitter<StreamSplitterCallback> LineSplitter;
449 explicit ReadLinesCallback(
451 uint64_t maxLineLength = 0, // No line length limit by default
452 char delimiter = '\n',
453 uint64_t bufSize = 1024
454 ) : fdLineCb_(std::move(fdLineCb)),
455 maxLineLength_(maxLineLength),
456 delimiter_(delimiter),
459 bool operator()(int pfd, int cfd) {
460 // Make a splitter for this cfd if it doesn't already exist
461 auto it = fdToSplitter_.find(cfd);
462 auto& splitter = (it != fdToSplitter_.end()) ? it->second
463 : fdToSplitter_.emplace(cfd, LineSplitter(
464 delimiter_, StreamSplitterCallback(fdLineCb_, cfd), maxLineLength_
466 // Read as much as we can from this FD
469 ssize_t ret = readNoInt(pfd, buf, bufSize_);
470 if (ret == -1 && errno == EAGAIN) { // No more data for now
473 if (ret == 0) { // Reached end-of-file
474 splitter.flush(); // Ignore return since the file is over anyway
477 if (!splitter(StringPiece(buf, ret))) {
478 return true; // The callback told us to stop
485 const uint64_t maxLineLength_;
486 const char delimiter_;
487 const uint64_t bufSize_;
488 // We lazily make splitters for all cfds that get used.
489 std::unordered_map<int, LineSplitter> fdToSplitter_;
492 // Helper to enable template deduction
493 template <class Callback>
494 static ReadLinesCallback<Callback> readLinesCallback(
496 uint64_t maxLineLength = 0, // No line length limit by default
497 char delimiter = '\n',
498 uint64_t bufSize = 1024) {
499 return ReadLinesCallback<Callback>(
500 std::move(fdLineCb), maxLineLength, delimiter, bufSize
505 * Enable notifications (callbacks) for one pipe to/from child. By default,
506 * all are enabled. Useful for "chatty" communication -- you want to disable
507 * write callbacks until you receive the expected message.
509 void enableNotifications(int childFd, bool enabled);
512 * Are notifications for one pipe to/from child enabled?
514 bool notificationsEnabled(int childFd) const;
517 * Return the child's pid, or -1 if the child wasn't successfully spawned
518 * or has already been wait()ed upon.
523 * Return the child's status (as per wait()) if the process has already
524 * been waited on, -1 if the process is still running, or -2 if the process
525 * hasn't been successfully started. NOTE that this does not poll, but
526 * returns the status stored in the Subprocess object.
528 ProcessReturnCode returnCode() const { return returnCode_; }
531 * Poll the child's status and return it, return -1 if the process
532 * is still running. NOTE that it is illegal to call poll again after
533 * poll indicated that the process has terminated, or to call poll on a
534 * process that hasn't been successfully started (the constructor threw an
537 ProcessReturnCode poll();
540 * Poll the child's status. If the process is still running, return false.
541 * Otherwise, return true if the process exited with status 0 (success),
542 * or throw CalledProcessError if the process exited with a non-zero status.
547 * Wait for the process to terminate and return its status.
548 * Similarly to poll, it is illegal to call wait after the process
549 * has already been reaped or if the process has not successfully started.
551 ProcessReturnCode wait();
554 * Wait for the process to terminate, throw if unsuccessful.
559 * Set all pipes from / to child non-blocking. communicate() does
562 void setAllNonBlocking();
565 * Get parent file descriptor corresponding to the given file descriptor
566 * in the child. Throws if childFd isn't a pipe (PIPE_IN / PIPE_OUT).
567 * Do not close() the return file descriptor; use closeParentFd, below.
569 int parentFd(int childFd) const {
570 return pipes_[findByChildFd(childFd)].parentFd;
572 int stdin() const { return parentFd(0); }
573 int stdout() const { return parentFd(1); }
574 int stderr() const { return parentFd(2); }
577 * Close the parent file descriptor given a file descriptor in the child.
579 void closeParentFd(int childFd);
582 * Send a signal to the child. Shortcuts for the commonly used Unix
585 void sendSignal(int signal);
586 void terminate() { sendSignal(SIGTERM); }
587 void kill() { sendSignal(SIGKILL); }
590 static const int RV_RUNNING = ProcessReturnCode::RV_RUNNING;
591 static const int RV_NOT_STARTED = ProcessReturnCode::RV_NOT_STARTED;
593 // spawn() sets up a pipe to read errors from the child,
594 // then calls spawnInternal() to do the bulk of the work. Once
595 // spawnInternal() returns it reads the error pipe to see if the child
596 // encountered any errors.
598 std::unique_ptr<const char*[]> argv,
599 const char* executable,
600 const Options& options,
601 const std::vector<std::string>* env);
603 std::unique_ptr<const char*[]> argv,
604 const char* executable,
606 const std::vector<std::string>* env,
609 // Actions to run in child.
610 // Note that this runs after vfork(), so tread lightly.
611 // Returns 0 on success, or an errno value on failure.
612 int prepareChild(const Options& options,
613 const sigset_t* sigmask,
614 const char* childDir) const;
615 int runChild(const char* executable, char** argv, char** env,
616 const Options& options) const;
619 * Read from the error pipe, and throw SubprocessSpawnError if the child
620 * failed before calling exec().
622 void readChildErrorPipe(int pfd, const char* executable);
625 * Close all file descriptors.
629 // return index in pipes_
630 int findByChildFd(int childFd) const;
633 ProcessReturnCode returnCode_;
635 // The number of pipes between parent and child is assumed to be small,
636 // so we're happy with a vector here, even if it means linear erase.
638 struct PipeInfo : private boost::totally_ordered<PipeInfo> {
641 int direction = PIPE_IN; // one of PIPE_IN / PIPE_OUT
644 bool operator<(const PipeInfo& other) const {
645 return childFd < other.childFd;
647 bool operator==(const PipeInfo& other) const {
648 return childFd == other.childFd;
651 std::vector<PipeInfo> pipes_;
654 inline Subprocess::Options& Subprocess::Options::operator|=(
655 const Subprocess::Options& other) {
656 if (this == &other) return *this;
658 for (auto& p : other.fdActions_) {
659 fdActions_[p.first] = p.second;
661 closeOtherFds_ |= other.closeOtherFds_;
662 usePath_ |= other.usePath_;
668 #endif /* FOLLY_SUBPROCESS_H_ */