1 /*
2  * Copyright (c) Facebook, Inc. and its affiliates.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #ifndef _GNU_SOURCE
18 #define _GNU_SOURCE
19 #endif
20 
21 #include <folly/Subprocess.h>
22 
23 #if defined(__linux__)
24 #include <sys/prctl.h>
25 #endif
26 #include <fcntl.h>
27 
28 #include <algorithm>
29 #include <array>
30 #include <system_error>
31 #include <thread>
32 
33 #include <boost/container/flat_set.hpp>
34 #include <boost/range/adaptors.hpp>
35 
36 #include <folly/Conv.h>
37 #include <folly/Exception.h>
38 #include <folly/ScopeGuard.h>
39 #include <folly/String.h>
40 #include <folly/detail/AtFork.h>
41 #include <folly/io/Cursor.h>
42 #include <folly/lang/Assume.h>
43 #include <folly/logging/xlog.h>
44 #include <folly/portability/Fcntl.h>
45 #include <folly/portability/Sockets.h>
46 #include <folly/portability/Stdlib.h>
47 #include <folly/portability/SysSyscall.h>
48 #include <folly/portability/Unistd.h>
49 #include <folly/system/Shell.h>
50 
51 constexpr int kExecFailure = 127;
52 constexpr int kChildFailure = 126;
53 
54 namespace folly {
55 
make(int status)56 ProcessReturnCode ProcessReturnCode::make(int status) {
57   if (!WIFEXITED(status) && !WIFSIGNALED(status)) {
58     throw std::runtime_error(
59         to<std::string>("Invalid ProcessReturnCode: ", status));
60   }
61   return ProcessReturnCode(status);
62 }
63 
ProcessReturnCode(ProcessReturnCode && p)64 ProcessReturnCode::ProcessReturnCode(ProcessReturnCode&& p) noexcept
65     : rawStatus_(p.rawStatus_) {
66   p.rawStatus_ = ProcessReturnCode::RV_NOT_STARTED;
67 }
68 
operator =(ProcessReturnCode && p)69 ProcessReturnCode& ProcessReturnCode::operator=(
70     ProcessReturnCode&& p) noexcept {
71   rawStatus_ = p.rawStatus_;
72   p.rawStatus_ = ProcessReturnCode::RV_NOT_STARTED;
73   return *this;
74 }
75 
state() const76 ProcessReturnCode::State ProcessReturnCode::state() const {
77   if (rawStatus_ == RV_NOT_STARTED) {
78     return NOT_STARTED;
79   }
80   if (rawStatus_ == RV_RUNNING) {
81     return RUNNING;
82   }
83   if (WIFEXITED(rawStatus_)) {
84     return EXITED;
85   }
86   if (WIFSIGNALED(rawStatus_)) {
87     return KILLED;
88   }
89   assume_unreachable();
90 }
91 
enforce(State expected) const92 void ProcessReturnCode::enforce(State expected) const {
93   State s = state();
94   if (s != expected) {
95     throw std::logic_error(to<std::string>(
96         "Bad use of ProcessReturnCode; state is ", s, " expected ", expected));
97   }
98 }
99 
exitStatus() const100 int ProcessReturnCode::exitStatus() const {
101   enforce(EXITED);
102   return WEXITSTATUS(rawStatus_);
103 }
104 
killSignal() const105 int ProcessReturnCode::killSignal() const {
106   enforce(KILLED);
107   return WTERMSIG(rawStatus_);
108 }
109 
coreDumped() const110 bool ProcessReturnCode::coreDumped() const {
111   enforce(KILLED);
112   return WCOREDUMP(rawStatus_);
113 }
114 
str() const115 std::string ProcessReturnCode::str() const {
116   switch (state()) {
117     case NOT_STARTED:
118       return "not started";
119     case RUNNING:
120       return "running";
121     case EXITED:
122       return to<std::string>("exited with status ", exitStatus());
123     case KILLED:
124       return to<std::string>(
125           "killed by signal ",
126           killSignal(),
127           (coreDumped() ? " (core dumped)" : ""));
128   }
129   assume_unreachable();
130 }
131 
CalledProcessError(ProcessReturnCode rc)132 CalledProcessError::CalledProcessError(ProcessReturnCode rc)
133     : SubprocessError(rc.str()), returnCode_(rc) {}
134 
toSubprocessSpawnErrorMessage(char const * executable,int errCode,int errnoValue)135 static inline std::string toSubprocessSpawnErrorMessage(
136     char const* executable, int errCode, int errnoValue) {
137   auto prefix = errCode == kExecFailure ? "failed to execute "
138                                         : "error preparing to execute ";
139   return to<std::string>(prefix, executable, ": ", errnoStr(errnoValue));
140 }
141 
SubprocessSpawnError(const char * executable,int errCode,int errnoValue)142 SubprocessSpawnError::SubprocessSpawnError(
143     const char* executable, int errCode, int errnoValue)
144     : SubprocessError(
145           toSubprocessSpawnErrorMessage(executable, errCode, errnoValue)),
146       errnoValue_(errnoValue) {}
147 
148 namespace {
149 
150 // Copy pointers to the given strings in a format suitable for posix_spawn
cloneStrings(const std::vector<std::string> & s)151 std::unique_ptr<const char* []> cloneStrings(
152     const std::vector<std::string>& s) {
153   std::unique_ptr<const char*[]> d(new const char*[s.size() + 1]);
154   for (size_t i = 0; i < s.size(); i++) {
155     d[i] = s[i].c_str();
156   }
157   d[s.size()] = nullptr;
158   return d;
159 }
160 
161 // Check a wait() status, throw on non-successful
checkStatus(ProcessReturnCode returnCode)162 void checkStatus(ProcessReturnCode returnCode) {
163   if (returnCode.state() != ProcessReturnCode::EXITED ||
164       returnCode.exitStatus() != 0) {
165     throw CalledProcessError(returnCode);
166   }
167 }
168 
169 } // namespace
170 
fd(int fd,int action)171 Subprocess::Options& Subprocess::Options::fd(int fd, int action) {
172   if (action == Subprocess::PIPE) {
173     if (fd == 0) {
174       action = Subprocess::PIPE_IN;
175     } else if (fd == 1 || fd == 2) {
176       action = Subprocess::PIPE_OUT;
177     } else {
178       throw std::invalid_argument(
179           to<std::string>("Only fds 0, 1, 2 are valid for action=PIPE: ", fd));
180     }
181   }
182   fdActions_[fd] = action;
183   return *this;
184 }
185 
186 Subprocess::Subprocess() = default;
187 
Subprocess(const std::vector<std::string> & argv,const Options & options,const char * executable,const std::vector<std::string> * env)188 Subprocess::Subprocess(
189     const std::vector<std::string>& argv,
190     const Options& options,
191     const char* executable,
192     const std::vector<std::string>* env)
193     : destroyOkWhileRunning_(options.allowDestructionWhileProcessRunning_) {
194   if (argv.empty()) {
195     throw std::invalid_argument("argv must not be empty");
196   }
197   if (!executable) {
198     executable = argv[0].c_str();
199   }
200   spawn(cloneStrings(argv), executable, options, env);
201 }
202 
Subprocess(const std::string & cmd,const Options & options,const std::vector<std::string> * env)203 Subprocess::Subprocess(
204     const std::string& cmd,
205     const Options& options,
206     const std::vector<std::string>* env)
207     : destroyOkWhileRunning_(options.allowDestructionWhileProcessRunning_) {
208   if (options.usePath_) {
209     throw std::invalid_argument("usePath() not allowed when running in shell");
210   }
211 
212   std::vector<std::string> argv = {"/bin/sh", "-c", cmd};
213   spawn(cloneStrings(argv), argv[0].c_str(), options, env);
214 }
215 
fromExistingProcess(pid_t pid)216 Subprocess Subprocess::fromExistingProcess(pid_t pid) {
217   Subprocess sp;
218   sp.pid_ = pid;
219   sp.destroyOkWhileRunning_ = false;
220   sp.returnCode_ = ProcessReturnCode::makeRunning();
221   return sp;
222 }
223 
~Subprocess()224 Subprocess::~Subprocess() {
225   if (!destroyOkWhileRunning_) {
226     CHECK_NE(returnCode_.state(), ProcessReturnCode::RUNNING)
227         << "Subprocess destroyed without reaping child";
228   } else if (returnCode_.state() == ProcessReturnCode::RUNNING) {
229     XLOG(DBG) << "Subprocess destroyed without reaping child process";
230   }
231 }
232 
233 namespace {
234 
235 struct ChildErrorInfo {
236   int errCode;
237   int errnoValue;
238 };
239 
childError(int errFd,int errCode,int errnoValue)240 [[noreturn]] void childError(int errFd, int errCode, int errnoValue) {
241   ChildErrorInfo info = {errCode, errnoValue};
242   // Write the error information over the pipe to our parent process.
243   // We can't really do anything else if this write call fails.
244   writeNoInt(errFd, &info, sizeof(info));
245   // exit
246   _exit(errCode);
247 }
248 
249 } // namespace
250 
setAllNonBlocking()251 void Subprocess::setAllNonBlocking() {
252   for (auto& p : pipes_) {
253     int fd = p.pipe.fd();
254     int flags = ::fcntl(fd, F_GETFL);
255     checkUnixError(flags, "fcntl");
256     int r = ::fcntl(fd, F_SETFL, flags | O_NONBLOCK);
257     checkUnixError(r, "fcntl");
258   }
259 }
260 
spawn(std::unique_ptr<const char * []> argv,const char * executable,const Options & optionsIn,const std::vector<std::string> * env)261 void Subprocess::spawn(
262     std::unique_ptr<const char*[]> argv,
263     const char* executable,
264     const Options& optionsIn,
265     const std::vector<std::string>* env) {
266   if (optionsIn.usePath_ && env) {
267     throw std::invalid_argument(
268         "usePath() not allowed when overriding environment");
269   }
270 
271   // Make a copy, we'll mutate options
272   Options options(optionsIn);
273 
274   // On error, close all pipes_ (ignoring errors, but that seems fine here).
275   auto pipesGuard = makeGuard([this] { pipes_.clear(); });
276 
277   // Create a pipe to use to receive error information from the child,
278   // in case it fails before calling exec()
279   int errFds[2];
280 #if FOLLY_HAVE_PIPE2
281   checkUnixError(::pipe2(errFds, O_CLOEXEC), "pipe2");
282 #else
283   checkUnixError(::pipe(errFds), "pipe");
284 #endif
285   SCOPE_EXIT {
286     CHECK_ERR(::close(errFds[0]));
287     if (errFds[1] >= 0) {
288       CHECK_ERR(::close(errFds[1]));
289     }
290   };
291 
292 #if !FOLLY_HAVE_PIPE2
293   // Ask the child to close the read end of the error pipe.
294   checkUnixError(fcntl(errFds[0], F_SETFD, FD_CLOEXEC), "set FD_CLOEXEC");
295   // Set the close-on-exec flag on the write side of the pipe.
296   // This way the pipe will be closed automatically in the child if execve()
297   // succeeds.  If the exec fails the child can write error information to the
298   // pipe.
299   checkUnixError(fcntl(errFds[1], F_SETFD, FD_CLOEXEC), "set FD_CLOEXEC");
300 #endif
301 
302   // Perform the actual work of setting up pipes then forking and
303   // executing the child.
304   spawnInternal(std::move(argv), executable, options, env, errFds[1]);
305 
306   // After spawnInternal() returns the child is alive.  We have to be very
307   // careful about throwing after this point.  We are inside the constructor,
308   // so if we throw the Subprocess object will have never existed, and the
309   // destructor will never be called.
310   //
311   // We should only throw if we got an error via the errFd, and we know the
312   // child has exited and can be immediately waited for.  In all other cases,
313   // we have no way of cleaning up the child.
314 
315   // Close writable side of the errFd pipe in the parent process
316   CHECK_ERR(::close(errFds[1]));
317   errFds[1] = -1;
318 
319   // Read from the errFd pipe, to tell if the child ran into any errors before
320   // calling exec()
321   readChildErrorPipe(errFds[0], executable);
322 
323   // If we spawned a detached child, wait on the intermediate child process.
324   // It always exits immediately.
325   if (options.detach_) {
326     wait();
327   }
328 
329   // We have fully succeeded now, so release the guard on pipes_
330   pipesGuard.dismiss();
331 }
332 
333 // With -Wclobbered, gcc complains about vfork potentially cloberring the
334 // childDir variable, even though we only use it on the child side of the
335 // vfork.
336 
337 FOLLY_PUSH_WARNING
338 FOLLY_GCC_DISABLE_WARNING("-Wclobbered")
spawnInternal(std::unique_ptr<const char * []> argv,const char * executable,Options & options,const std::vector<std::string> * env,int errFd)339 void Subprocess::spawnInternal(
340     std::unique_ptr<const char*[]> argv,
341     const char* executable,
342     Options& options,
343     const std::vector<std::string>* env,
344     int errFd) {
345   // Parent work, pre-fork: create pipes
346   std::vector<int> childFds;
347   // Close all of the childFds as we leave this scope
348   SCOPE_EXIT {
349     // These are only pipes, closing them shouldn't fail
350     for (int cfd : childFds) {
351       CHECK_ERR(::close(cfd));
352     }
353   };
354 
355   int r;
356   for (auto& p : options.fdActions_) {
357     if (p.second == PIPE_IN || p.second == PIPE_OUT) {
358       int fds[2];
359       // We're setting both ends of the pipe as close-on-exec. The child
360       // doesn't need to reset the flag on its end, as we always dup2() the fd,
361       // and dup2() fds don't share the close-on-exec flag.
362 #if FOLLY_HAVE_PIPE2
363       // If possible, set close-on-exec atomically. Otherwise, a concurrent
364       // Subprocess invocation can fork() between "pipe" and "fnctl",
365       // causing FDs to leak.
366       r = ::pipe2(fds, O_CLOEXEC);
367       checkUnixError(r, "pipe2");
368 #else
369       r = ::pipe(fds);
370       checkUnixError(r, "pipe");
371       r = fcntl(fds[0], F_SETFD, FD_CLOEXEC);
372       checkUnixError(r, "set FD_CLOEXEC");
373       r = fcntl(fds[1], F_SETFD, FD_CLOEXEC);
374       checkUnixError(r, "set FD_CLOEXEC");
375 #endif
376       pipes_.emplace_back();
377       Pipe& pipe = pipes_.back();
378       pipe.direction = p.second;
379       int cfd;
380       if (p.second == PIPE_IN) {
381         // Child gets reading end
382         pipe.pipe = folly::File(fds[1], /*ownsFd=*/true);
383         cfd = fds[0];
384       } else {
385         pipe.pipe = folly::File(fds[0], /*ownsFd=*/true);
386         cfd = fds[1];
387       }
388       p.second = cfd; // ensure it gets dup2()ed
389       pipe.childFd = p.first;
390       childFds.push_back(cfd);
391     }
392   }
393 
394   // This should already be sorted, as options.fdActions_ is
395   DCHECK(std::is_sorted(pipes_.begin(), pipes_.end()));
396 
397   // Note that the const casts below are legit, per
398   // http://pubs.opengroup.org/onlinepubs/009695399/functions/exec.html
399 
400   auto argVec = const_cast<char**>(argv.get());
401 
402   // Set up environment
403   std::unique_ptr<const char*[]> envHolder;
404   char** envVec;
405   if (env) {
406     envHolder = cloneStrings(*env);
407     envVec = const_cast<char**>(envHolder.get());
408   } else {
409     envVec = environ;
410   }
411 
412   // Block all signals around vfork; see http://ewontfix.com/7/.
413   //
414   // As the child may run in the same address space as the parent until
415   // the actual execve() system call, any (custom) signal handlers that
416   // the parent has might alter parent's memory if invoked in the child,
417   // with undefined results.  So we block all signals in the parent before
418   // vfork(), which will cause them to be blocked in the child as well (we
419   // rely on the fact that Linux, just like all sane implementations, only
420   // clones the calling thread).  Then, in the child, we reset all signals
421   // to their default dispositions (while still blocked), and unblock them
422   // (so the exec()ed process inherits the parent's signal mask)
423   //
424   // The parent also unblocks all signals as soon as vfork() returns.
425   sigset_t allBlocked;
426   r = sigfillset(&allBlocked);
427   checkUnixError(r, "sigfillset");
428   sigset_t oldSignals;
429 
430   r = pthread_sigmask(SIG_SETMASK, &allBlocked, &oldSignals);
431   checkPosixError(r, "pthread_sigmask");
432   SCOPE_EXIT {
433     // Restore signal mask
434     r = pthread_sigmask(SIG_SETMASK, &oldSignals, nullptr);
435     CHECK_EQ(r, 0) << "pthread_sigmask: " << errnoStr(r); // shouldn't fail
436   };
437 
438   // Call c_str() here, as it's not necessarily safe after fork.
439   const char* childDir =
440       options.childDir_.empty() ? nullptr : options.childDir_.c_str();
441 
442   pid_t pid;
443 #ifdef __linux__
444   if (options.cloneFlags_) {
445     pid = syscall(SYS_clone, *options.cloneFlags_, 0, nullptr, nullptr);
446   } else {
447 #endif
448     if (options.detach_) {
449       // If we are detaching we must use fork() instead of vfork() for the first
450       // fork, since we aren't going to simply call exec() in the child.
451       pid = detail::AtFork::forkInstrumented(fork);
452     } else {
453       if (kIsSanitizeThread) {
454         // TSAN treats vfork as fork, so use the instrumented version
455         // instead
456         pid = detail::AtFork::forkInstrumented(fork);
457       } else {
458         pid = vfork();
459       }
460     }
461 #ifdef __linux__
462   }
463 #endif
464   checkUnixError(pid, errno, "failed to fork");
465   if (pid == 0) {
466     // Fork a second time if detach_ was requested.
467     // This must be done before signals are restored in prepareChild()
468     if (options.detach_) {
469 #ifdef __linux__
470       if (options.cloneFlags_) {
471         pid = syscall(SYS_clone, *options.cloneFlags_, 0, nullptr, nullptr);
472       } else {
473 #endif
474         if (kIsSanitizeThread) {
475           // TSAN treats vfork as fork, so use the instrumented version
476           // instead
477           pid = detail::AtFork::forkInstrumented(fork);
478         } else {
479           pid = vfork();
480         }
481 #ifdef __linux__
482       }
483 #endif
484       if (pid == -1) {
485         // Inform our parent process of the error so it can throw in the parent.
486         childError(errFd, kChildFailure, errno);
487       } else if (pid != 0) {
488         // We are the intermediate process.  Exit immediately.
489         // Our child will still inform the original parent of success/failure
490         // through errFd.  The pid of the grandchild process never gets
491         // propagated back up to the original parent.  In the future we could
492         // potentially send it back using errFd if we needed to.
493         _exit(0);
494       }
495     }
496 
497     int errnoValue = prepareChild(options, &oldSignals, childDir);
498     if (errnoValue != 0) {
499       childError(errFd, kChildFailure, errnoValue);
500     }
501 
502     errnoValue = runChild(executable, argVec, envVec, options);
503     // If we get here, exec() failed.
504     childError(errFd, kExecFailure, errnoValue);
505   }
506 
507   // Child is alive.  We have to be very careful about throwing after this
508   // point.  We are inside the constructor, so if we throw the Subprocess
509   // object will have never existed, and the destructor will never be called.
510   //
511   // We should only throw if we got an error via the errFd, and we know the
512   // child has exited and can be immediately waited for.  In all other cases,
513   // we have no way of cleaning up the child.
514   pid_ = pid;
515   returnCode_ = ProcessReturnCode::makeRunning();
516 }
517 FOLLY_POP_WARNING
518 
prepareChild(const Options & options,const sigset_t * sigmask,const char * childDir) const519 int Subprocess::prepareChild(
520     const Options& options,
521     const sigset_t* sigmask,
522     const char* childDir) const {
523   // While all signals are blocked, we must reset their
524   // dispositions to default.
525   for (int sig = 1; sig < NSIG; ++sig) {
526     ::signal(sig, SIG_DFL);
527   }
528 
529   {
530     // Unblock signals; restore signal mask.
531     int r = pthread_sigmask(SIG_SETMASK, sigmask, nullptr);
532     if (r != 0) {
533       return r; // pthread_sigmask() returns an errno value
534     }
535   }
536 
537   // Change the working directory, if one is given
538   if (childDir) {
539     if (::chdir(childDir) == -1) {
540       return errno;
541     }
542   }
543 
544 #ifdef __linux__
545   // Best effort
546   if (options.cpuSet_.hasValue()) {
547     const auto& cpuSet = options.cpuSet_.value();
548     ::sched_setaffinity(0, sizeof(cpuSet), &cpuSet);
549   }
550 #endif
551 
552   // We don't have to explicitly close the parent's end of all pipes,
553   // as they all have the FD_CLOEXEC flag set and will be closed at
554   // exec time.
555 
556   // Redirect requested FDs to /dev/null or NUL
557   // dup2 any explicitly specified FDs
558   for (auto& p : options.fdActions_) {
559     if (p.second == DEV_NULL) {
560       // folly/portability/Fcntl provides an impl of open that will
561       // map this to NUL on Windows.
562       auto devNull = ::open("/dev/null", O_RDWR | O_CLOEXEC);
563       if (devNull == -1) {
564         return errno;
565       }
566       // note: dup2 will not set CLOEXEC on the destination
567       if (::dup2(devNull, p.first) == -1) {
568         // explicit close on error to avoid leaking fds
569         ::close(devNull);
570         return errno;
571       }
572       ::close(devNull);
573     } else if (p.second != p.first) {
574       if (::dup2(p.second, p.first) == -1) {
575         return errno;
576       }
577     }
578   }
579 
580   // If requested, close all other file descriptors.  Don't close
581   // any fds in options.fdActions_, and don't touch stdin, stdout, stderr.
582   // Ignore errors.
583   if (options.closeOtherFds_) {
584     for (int fd = sysconf(_SC_OPEN_MAX) - 1; fd >= 3; --fd) {
585       if (options.fdActions_.count(fd) == 0) {
586         ::close(fd);
587       }
588     }
589   }
590 
591 #if defined(__linux__)
592   // Opt to receive signal on parent death, if requested
593   if (options.parentDeathSignal_ != 0) {
594     const auto parentDeathSignal =
595         static_cast<unsigned long>(options.parentDeathSignal_);
596     if (prctl(PR_SET_PDEATHSIG, parentDeathSignal, 0, 0, 0) == -1) {
597       return errno;
598     }
599   }
600 #endif
601 
602   if (options.processGroupLeader_) {
603 #if !defined(__FreeBSD__) && !defined(__DragonFly__)
604     if (setpgrp() == -1) {
605 #else
606     if (setpgrp(getpid(), getpgrp()) == -1) {
607 #endif
608       return errno;
609     }
610   }
611 
612   // The user callback comes last, so that the child is otherwise all set up.
613   if (options.dangerousPostForkPreExecCallback_) {
614     if (int error = (*options.dangerousPostForkPreExecCallback_)()) {
615       return error;
616     }
617   }
618 
619   return 0;
620 }
621 
622 int Subprocess::runChild(
623     const char* executable,
624     char** argv,
625     char** env,
626     const Options& options) const {
627   // Now, finally, exec.
628   if (options.usePath_) {
629     ::execvp(executable, argv);
630   } else {
631     ::execve(executable, argv, env);
632   }
633   return errno;
634 }
635 
636 void Subprocess::readChildErrorPipe(int pfd, const char* executable) {
637   ChildErrorInfo info;
638   auto rc = readNoInt(pfd, &info, sizeof(info));
639   if (rc == 0) {
640     // No data means the child executed successfully, and the pipe
641     // was closed due to the close-on-exec flag being set.
642     return;
643   } else if (rc != sizeof(ChildErrorInfo)) {
644     // An error occurred trying to read from the pipe, or we got a partial read.
645     // Neither of these cases should really occur in practice.
646     //
647     // We can't get any error data from the child in this case, and we don't
648     // know if it is successfully running or not.  All we can do is to return
649     // normally, as if the child executed successfully.  If something bad
650     // happened the caller should at least get a non-normal exit status from
651     // the child.
652     XLOGF(
653         ERR,
654         "unexpected error trying to read from child error pipe rc={}, errno={}",
655         rc,
656         errno);
657     return;
658   }
659 
660   // We got error data from the child.  The child should exit immediately in
661   // this case, so wait on it to clean up.
662   wait();
663 
664   // Throw to signal the error
665   throw SubprocessSpawnError(executable, info.errCode, info.errnoValue);
666 }
667 
668 ProcessReturnCode Subprocess::poll(struct rusage* ru) {
669   returnCode_.enforce(ProcessReturnCode::RUNNING);
670   DCHECK_GT(pid_, 0);
671   int status;
672   pid_t found = ::wait4(pid_, &status, WNOHANG, ru);
673   // The spec guarantees that EINTR does not occur with WNOHANG, so the only
674   // two remaining errors are ECHILD (other code reaped the child?), or
675   // EINVAL (cosmic rays?), both of which merit an abort:
676   PCHECK(found != -1) << "waitpid(" << pid_ << ", &status, WNOHANG)";
677   if (found != 0) {
678     // Though the child process had quit, this call does not close the pipes
679     // since its descendants may still be using them.
680     returnCode_ = ProcessReturnCode::make(status);
681     pid_ = -1;
682   }
683   return returnCode_;
684 }
685 
686 bool Subprocess::pollChecked() {
687   if (poll().state() == ProcessReturnCode::RUNNING) {
688     return false;
689   }
690   checkStatus(returnCode_);
691   return true;
692 }
693 
694 ProcessReturnCode Subprocess::wait() {
695   returnCode_.enforce(ProcessReturnCode::RUNNING);
696   DCHECK_GT(pid_, 0);
697   int status;
698   pid_t found;
699   do {
700     found = ::waitpid(pid_, &status, 0);
701   } while (found == -1 && errno == EINTR);
702   // The only two remaining errors are ECHILD (other code reaped the
703   // child?), or EINVAL (cosmic rays?), and both merit an abort:
704   PCHECK(found != -1) << "waitpid(" << pid_ << ", &status, 0)";
705   // Though the child process had quit, this call does not close the pipes
706   // since its descendants may still be using them.
707   DCHECK_EQ(found, pid_);
708   returnCode_ = ProcessReturnCode::make(status);
709   pid_ = -1;
710   return returnCode_;
711 }
712 
713 void Subprocess::waitChecked() {
714   wait();
715   checkStatus(returnCode_);
716 }
717 
718 ProcessReturnCode Subprocess::waitTimeout(TimeoutDuration timeout) {
719   returnCode_.enforce(ProcessReturnCode::RUNNING);
720   DCHECK_GT(pid_, 0) << "The subprocess has been waited already";
721 
722   auto pollUntil = std::chrono::steady_clock::now() + timeout;
723   auto sleepDuration = std::chrono::milliseconds{2};
724   constexpr auto maximumSleepDuration = std::chrono::milliseconds{100};
725 
726   for (;;) {
727     // Always call waitpid once after the full timeout has elapsed.
728     auto now = std::chrono::steady_clock::now();
729 
730     int status;
731     pid_t found;
732     do {
733       found = ::waitpid(pid_, &status, WNOHANG);
734     } while (found == -1 && errno == EINTR);
735     PCHECK(found != -1) << "waitpid(" << pid_ << ", &status, WNOHANG)";
736     if (found) {
737       // Just on the safe side, make sure it's the actual pid we are waiting.
738       DCHECK_EQ(found, pid_);
739       returnCode_ = ProcessReturnCode::make(status);
740       // Change pid_ to -1 to detect programming error like calling
741       // this method multiple times.
742       pid_ = -1;
743       return returnCode_;
744     }
745     if (now > pollUntil) {
746       // Timed out: still running().
747       return returnCode_;
748     }
749     // The subprocess is still running, sleep for increasing periods of time.
750     std::this_thread::sleep_for(sleepDuration);
751     sleepDuration =
752         std::min(maximumSleepDuration, sleepDuration + sleepDuration);
753   }
754 }
755 
756 void Subprocess::sendSignal(int signal) {
757   returnCode_.enforce(ProcessReturnCode::RUNNING);
758   int r = ::kill(pid_, signal);
759   checkUnixError(r, "kill");
760 }
761 
762 ProcessReturnCode Subprocess::waitOrTerminateOrKill(
763     TimeoutDuration waitTimeout, TimeoutDuration sigtermTimeout) {
764   returnCode_.enforce(ProcessReturnCode::RUNNING);
765   DCHECK_GT(pid_, 0) << "The subprocess has been waited already";
766 
767   this->waitTimeout(waitTimeout);
768 
769   if (returnCode_.running()) {
770     return terminateOrKill(sigtermTimeout);
771   }
772   return returnCode_;
773 }
774 
775 ProcessReturnCode Subprocess::terminateOrKill(TimeoutDuration sigtermTimeout) {
776   returnCode_.enforce(ProcessReturnCode::RUNNING);
777   DCHECK_GT(pid_, 0) << "The subprocess has been waited already";
778   // 1. Send SIGTERM to kill the process
779   terminate();
780   // 2. check whether subprocess has terminated using non-blocking waitpid
781   waitTimeout(sigtermTimeout);
782   if (!returnCode_.running()) {
783     return returnCode_;
784   }
785   // 3. If we are at this point, we have waited enough time after
786   // sending SIGTERM, we have to use nuclear option SIGKILL to kill
787   // the subprocess.
788   XLOGF(INFO, "Send SIGKILL to {}", pid_);
789   kill();
790   // 4. SIGKILL should kill the process otherwise there must be
791   // something seriously wrong, just use blocking wait to wait for the
792   // subprocess to finish.
793   return wait();
794 }
795 
796 pid_t Subprocess::pid() const {
797   return pid_;
798 }
799 
800 namespace {
801 
802 ByteRange queueFront(const IOBufQueue& queue) {
803   auto* p = queue.front();
804   if (!p) {
805     return ByteRange{};
806   }
807   return io::Cursor(p).peekBytes();
808 }
809 
810 // fd write
811 bool handleWrite(int fd, IOBufQueue& queue) {
812   for (;;) {
813     auto b = queueFront(queue);
814     if (b.empty()) {
815       return true; // EOF
816     }
817 
818     ssize_t n = writeNoInt(fd, b.data(), b.size());
819     if (n == -1 && errno == EAGAIN) {
820       return false;
821     }
822     checkUnixError(n, "write");
823     queue.trimStart(n);
824   }
825 }
826 
827 // fd read
828 bool handleRead(int fd, IOBufQueue& queue) {
829   for (;;) {
830     auto p = queue.preallocate(100, 65000);
831     ssize_t n = readNoInt(fd, p.first, p.second);
832     if (n == -1 && errno == EAGAIN) {
833       return false;
834     }
835     checkUnixError(n, "read");
836     if (n == 0) {
837       return true;
838     }
839     queue.postallocate(n);
840   }
841 }
842 
843 bool discardRead(int fd) {
844   static const size_t bufSize = 65000;
845   // Thread unsafe, but it doesn't matter.
846   static std::unique_ptr<char[]> buf(new char[bufSize]);
847 
848   for (;;) {
849     ssize_t n = readNoInt(fd, buf.get(), bufSize);
850     if (n == -1 && errno == EAGAIN) {
851       return false;
852     }
853     checkUnixError(n, "read");
854     if (n == 0) {
855       return true;
856     }
857   }
858 }
859 
860 } // namespace
861 
862 std::pair<std::string, std::string> Subprocess::communicate(StringPiece input) {
863   IOBufQueue inputQueue;
864   inputQueue.wrapBuffer(input.data(), input.size());
865 
866   auto outQueues = communicateIOBuf(std::move(inputQueue));
867   auto outBufs =
868       std::make_pair(outQueues.first.move(), outQueues.second.move());
869   std::pair<std::string, std::string> out;
870   if (outBufs.first) {
871     outBufs.first->coalesce();
872     out.first.assign(
873         reinterpret_cast<const char*>(outBufs.first->data()),
874         outBufs.first->length());
875   }
876   if (outBufs.second) {
877     outBufs.second->coalesce();
878     out.second.assign(
879         reinterpret_cast<const char*>(outBufs.second->data()),
880         outBufs.second->length());
881   }
882   return out;
883 }
884 
885 std::pair<IOBufQueue, IOBufQueue> Subprocess::communicateIOBuf(
886     IOBufQueue input) {
887   // If the user supplied a non-empty input buffer, make sure
888   // that stdin is a pipe so we can write the data.
889   if (!input.empty()) {
890     // findByChildFd() will throw std::invalid_argument if no pipe for
891     // STDIN_FILENO exists
892     findByChildFd(STDIN_FILENO);
893   }
894 
895   std::pair<IOBufQueue, IOBufQueue> out;
896 
897   auto readCallback = [&](int pfd, int cfd) -> bool {
898     if (cfd == STDOUT_FILENO) {
899       return handleRead(pfd, out.first);
900     } else if (cfd == STDERR_FILENO) {
901       return handleRead(pfd, out.second);
902     } else {
903       // Don't close the file descriptor, the child might not like SIGPIPE,
904       // just read and throw the data away.
905       return discardRead(pfd);
906     }
907   };
908 
909   auto writeCallback = [&](int pfd, int cfd) -> bool {
910     if (cfd == STDIN_FILENO) {
911       return handleWrite(pfd, input);
912     } else {
913       // If we don't want to write to this fd, just close it.
914       return true;
915     }
916   };
917 
918   communicate(std::move(readCallback), std::move(writeCallback));
919 
920   return out;
921 }
922 
923 void Subprocess::communicate(
924     FdCallback readCallback, FdCallback writeCallback) {
925   // This serves to prevent wait() followed by communicate(), but if you
926   // legitimately need that, send a patch to delete this line.
927   returnCode_.enforce(ProcessReturnCode::RUNNING);
928   setAllNonBlocking();
929 
930   std::vector<pollfd> fds;
931   fds.reserve(pipes_.size());
932   std::vector<size_t> toClose; // indexes into pipes_
933   toClose.reserve(pipes_.size());
934 
935   while (!pipes_.empty()) {
936     fds.clear();
937     toClose.clear();
938 
939     for (auto& p : pipes_) {
940       pollfd pfd;
941       pfd.fd = p.pipe.fd();
942       // Yes, backwards, PIPE_IN / PIPE_OUT are defined from the
943       // child's point of view.
944       if (!p.enabled) {
945         // Still keeping fd in watched set so we get notified of POLLHUP /
946         // POLLERR
947         pfd.events = 0;
948       } else if (p.direction == PIPE_IN) {
949         pfd.events = POLLOUT;
950       } else {
951         pfd.events = POLLIN;
952       }
953       fds.push_back(pfd);
954     }
955 
956     int r;
957     do {
958       r = ::poll(fds.data(), fds.size(), -1);
959     } while (r == -1 && errno == EINTR);
960     checkUnixError(r, "poll");
961 
962     for (size_t i = 0; i < pipes_.size(); ++i) {
963       auto& p = pipes_[i];
964       auto parentFd = p.pipe.fd();
965       DCHECK_EQ(fds[i].fd, parentFd);
966       short events = fds[i].revents;
967 
968       bool closed = false;
969       if (events & POLLOUT) {
970         DCHECK(!(events & POLLIN));
971         if (writeCallback(parentFd, p.childFd)) {
972           toClose.push_back(i);
973           closed = true;
974         }
975       }
976 
977       // Call read callback on POLLHUP, to give it a chance to read (and act
978       // on) end of file
979       if (events & (POLLIN | POLLHUP)) {
980         DCHECK(!(events & POLLOUT));
981         if (readCallback(parentFd, p.childFd)) {
982           toClose.push_back(i);
983           closed = true;
984         }
985       }
986 
987       if ((events & (POLLHUP | POLLERR)) && !closed) {
988         toClose.push_back(i);
989         closed = true;
990       }
991     }
992 
993     // Close the fds in reverse order so the indexes hold after erase()
994     for (int idx : boost::adaptors::reverse(toClose)) {
995       auto pos = pipes_.begin() + idx;
996       pos->pipe.close(); // Throws on error
997       pipes_.erase(pos);
998     }
999   }
1000 }
1001 
1002 void Subprocess::enableNotifications(int childFd, bool enabled) {
1003   pipes_[findByChildFd(childFd)].enabled = enabled;
1004 }
1005 
1006 bool Subprocess::notificationsEnabled(int childFd) const {
1007   return pipes_[findByChildFd(childFd)].enabled;
1008 }
1009 
1010 size_t Subprocess::findByChildFd(int childFd) const {
1011   auto pos = std::lower_bound(
1012       pipes_.begin(), pipes_.end(), childFd, [](const Pipe& pipe, int fd) {
1013         return pipe.childFd < fd;
1014       });
1015   if (pos == pipes_.end() || pos->childFd != childFd) {
1016     throw std::invalid_argument(
1017         folly::to<std::string>("child fd not found ", childFd));
1018   }
1019   return pos - pipes_.begin();
1020 }
1021 
1022 void Subprocess::closeParentFd(int childFd) {
1023   int idx = findByChildFd(childFd);
1024   pipes_[idx].pipe.close(); // May throw
1025   pipes_.erase(pipes_.begin() + idx);
1026 }
1027 
1028 std::vector<Subprocess::ChildPipe> Subprocess::takeOwnershipOfPipes() {
1029   std::vector<Subprocess::ChildPipe> pipes;
1030   for (auto& p : pipes_) {
1031     pipes.emplace_back(p.childFd, std::move(p.pipe));
1032   }
1033   // release memory
1034   std::vector<Pipe>().swap(pipes_);
1035   return pipes;
1036 }
1037 
1038 namespace {
1039 
1040 class Initializer {
1041  public:
1042   Initializer() {
1043     // We like EPIPE, thanks.
1044     ::signal(SIGPIPE, SIG_IGN);
1045   }
1046 };
1047 
1048 Initializer initializer;
1049 
1050 } // namespace
1051 
1052 } // namespace folly
1053