1 /* ===========================================================================
2  *
3  *                            PUBLIC DOMAIN NOTICE
4  *               National Center for Biotechnology Information
5  *
6  *  This software/database is a "United States Government Work" under the
7  *  terms of the United States Copyright Act.  It was written as part of
8  *  the author's official duties as a United States Government employee and
9  *  thus cannot be copyrighted.  This software/database is freely available
10  *  to the public for use. The National Library of Medicine and the U.S.
11  *  Government have not placed any restriction on its use or reproduction.
12  *
13  *  Although all reasonable efforts have been taken to ensure the accuracy
14  *  and reliability of the software and data, the NLM and the U.S.
15  *  Government do not and cannot warrant the performance or results that
16  *  may be obtained by using this software or data. The NLM and the U.S.
17  *  Government disclaim all warranties, express or implied, including
18  *  warranties of performance, merchantability or fitness for any particular
19  *  purpose.
20  *
21  *  Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Project:
26  *  sratools command line tool
27  *
28  * Purpose:
29  *  process stuff, like fork and exec
30  *
31  */
32 
33 #if DEBUG || _DEBUGGING
34 #define USE_DEBUGGER 1
35 #endif
36 
37 #include <string>
38 #include <vector>
39 #include <iostream>
40 #include <memory>
41 #include <functional>
42 #include <atomic>
43 
44 #include <cstdlib>
45 #include <cstdio>
46 
47 #include <unistd.h>
48 #include <sys/stat.h>
49 #include <sysexits.h>
50 #include <signal.h>
51 
52 #include "debug.hpp"
53 #include "proc.hpp"
54 #include "globals.hpp"
55 #include "util.hpp"
56 #include "env_vars.h"
57 #include "constants.hpp"
58 
59 /// @brief c++ and const-friendly wrapper
60 ///
61 /// execve is declare to take non-const, but any modification
62 /// would only happen after this process was replaced.
63 /// So from the PoV of this process, execve doesn't modify its arguments.
execve(char const * path,char const * const * argv,char const * const * env=environ)64 static inline int execve(char const *path, char const *const *argv, char const *const *env = environ)
65 {
66     return ::execve(path, (char **)((void *)argv), (char **)((void *)env));
67 }
68 
69 #if USE_DEBUGGER
70 
71 /// @brief run child tool in a debugger
72 ///
73 /// @note uses $SHELL; it squotes all elements of argv, escaping squote and backslash
74 ///
75 /// @example With gdb: SRATOOLS_IMPERSONATE=sam-dump SRATOOLS_DEBUG_CMD="gdb --args" sratools SRR000001 --output-file /dev/null \par
76 /// With lldb: SRATOOLS_IMPERSONATE=sam-dump SRATOOLS_DEBUG_CMD="lldb --" sratools SRR000001 --output-file /dev/null
exec_debugger(char const * const debugger,char const * const * const argv)77 static void exec_debugger [[noreturn]] (  char const *const debugger
78                                         , char const * const *const argv)
79 {
80     auto const shell_envar = getenv("SHELL");
81     auto const shell = (shell_envar && *shell_envar) ? shell_envar : "/bin/sh";
82     auto cmd = std::string(debugger);
83 
84     for (auto arg = argv; ; ) {
85         auto cp = *arg++;
86         if (cp == nullptr)
87             break;
88 
89         cmd += " '";
90         for ( ; ; ) {
91             auto const ch = *cp++;
92             if (ch == '\0')
93                 break;
94             if (ch == '\'' || ch == '\\') // these need to be escaped
95                 cmd += '\\';
96             cmd += ch;
97         }
98         cmd += "'";
99     }
100     char const *new_argv[] = { shell, "-c", cmd.c_str(), nullptr };
101 
102     fprintf(stderr, "%s %s %s", new_argv[0], new_argv[1], new_argv[2]);
103     execve(shell, new_argv);
104     throw_system_error("failed to exec debugger");
105 }
106 #endif
107 
108 
debugPrintDryRun(char const * const toolpath,char const * const toolname,char const * const * const argv)109 static void debugPrintDryRun(  char const *const toolpath
110                              , char const *const toolname
111                              , char const *const *const argv)
112 {
113     switch (logging_state::testing_level()) {
114     case 5:
115         for (auto name : make_sequence(constants::env_var::names(), constants::env_var::END_ENUM)) {
116             debugPrintEnvVarName(name);
117         }
118         exit(0);
119     case 4:
120         for (auto name : make_sequence(constants::env_var::names(), constants::env_var::END_ENUM)) {
121             debugPrintEnvVar(name, true);
122         }
123         debugPrintEnvVar(ENV_VAR_SESSION_ID, true);
124         std::cerr << toolpath;
125         for (auto i = 1; argv[i]; ++i)
126             std::cerr << ' ' << argv[i];
127         std::cerr << std::endl;
128         exit(0);
129     case 3:
130         std::cerr << "would exec '" << toolpath << "' as:\n";
131         for (auto i = 0; argv[i]; ++i)
132             std::cerr << ' ' << argv[i];
133         {
134             std::cerr << "\nwith environment:\n";
135             for (auto name : make_sequence(constants::env_var::names(), constants::env_var::END_ENUM)) {
136                 debugPrintEnvVar(name);
137             }
138             debugPrintEnvVar(ENV_VAR_SESSION_ID);
139             std::cerr << std::endl;
140         }
141         exit(0);
142         break;
143     case 2:
144         std::cerr << toolname;
145         for (auto i = 1; argv[i]; ++i)
146             std::cerr << ' ' << argv[i];
147         std::cerr << std::endl;
148         exit(0);
149         break;
150     default:
151         break;
152     }
153 }
154 
155 /// @brief calls exec; does not return
156 ///
157 /// @param toolpath the full path to the tool, e.g. /path/to/fastq-dump-orig
158 /// @param toolname the user-centric name of the tool, e.g. fastq-dump
159 /// @param argv argv
160 ///
161 /// @throw system_error if exec fails
exec(char const * const toolpath,char const * const toolname,char const * const * const argv)162 static void exec [[noreturn]] (  char const *const toolpath
163                                , char const *const toolname
164                                , char const *const *const argv)
165 {
166 #if USE_DEBUGGER
167     auto const envar = getenv("SRATOOLS_DEBUG_CMD");
168     if (envar && *envar) {
169         exec_debugger(envar, argv);
170     }
171 #endif
172     debugPrintDryRun(toolpath, toolname, argv);
173     execve(toolpath, argv);
174     throw_system_error(std::string("failed to exec ")+toolname);
175 }
176 
177 /// @brief calls exec; does not return; no debugging or dry run
178 ///
179 /// @param toolpath the full path to the tool, e.g. /path/to/fastq-dump-orig
180 /// @param toolname the user-centric name of the tool, e.g. fastq-dump
181 /// @param argv argv
182 ///
183 /// @throw system_error if exec fails
exec_really(char const * const toolpath,char const * const toolname,char const * const * const argv)184 static void exec_really [[noreturn]] (  char const *const toolpath
185                                       , char const *const toolname
186                                       , char const *const *const argv)
187 {
188     execve(toolpath, argv);
189     throw_system_error(std::string("failed to exec ")+toolname);
190 }
191 
192 static pid_t forward_target_pid;
sig_handler_for_waiting(int sig)193 static void sig_handler_for_waiting(int sig)
194 {
195     kill(forward_target_pid, sig);
196 }
197 
waitpid_with_signal_forwarding(pid_t const pid,int * const status)198 static int waitpid_with_signal_forwarding(pid_t const pid, int *const status)
199 {
200     struct sigaction act, old;
201 
202     // we are not reentrant
203     static std::atomic_flag lock = ATOMIC_FLAG_INIT;
204     auto const was_locked = lock.test_and_set();
205     assert(was_locked == false);
206     if (was_locked)
207         throw std::logic_error("NOT REENTRANT!!!");
208 
209     // set up signal forwarding
210     forward_target_pid = pid;
211 
212     act.sa_handler = sig_handler_for_waiting;
213     sigemptyset(&act.sa_mask);
214     act.sa_flags = 0;
215 
216     // set the signal handler
217     if (sigaction(SIGINT, &act, &old) < 0)
218         throw_system_error("sigaction failed");
219 
220     auto const rc = waitpid(pid, status, 0);
221 
222     // restore signal handler to old state
223     if (sigaction(SIGINT, &old, nullptr))
224         throw_system_error("sigaction failed");
225 
226     lock.clear();
227 
228     return rc;
229 }
230 
231 namespace sratools {
232 
wait() const233 process::exit_status process::wait() const
234 {
235     assert(pid != 0); ///< you can't wait on yourself
236     if (pid == 0)
237         throw std::logic_error("you can't wait on yourself!");
238 
239     do { // loop if wait is interrupted
240         auto status = int(0);
241         auto const rc = waitpid_with_signal_forwarding(pid, &status);
242 
243         if (rc > 0) {
244             assert(rc == pid);
245             return exit_status(status); ///< normal return is here
246         }
247 
248         assert(rc != 0); // only happens if WNOHANG is given
249         if (rc == 0)
250             std::unexpected();
251     } while (errno == EINTR);
252 
253     assert(errno != ECHILD); // you already waited on this!
254     if (errno == ECHILD)
255         throw std::logic_error("child process was already reaped");
256 
257     throw_system_error("waitpid failed");
258 }
259 
run_child(char const * toolpath,char const * toolname,char const ** argv,Dictionary const & env)260 void process::run_child(char const *toolpath, char const *toolname, char const **argv, Dictionary const &env)
261 {
262     for (auto && v : env) {
263         setenv(v.first.c_str(), v.second.c_str(), 1);
264     }
265     exec(toolpath, toolname, argv);
266 }
267 
run_child_and_wait(char const * toolpath,char const * toolname,char const ** argv,Dictionary const & env)268 process::exit_status process::run_child_and_wait(char const *toolpath, char const *toolname, char const **argv, Dictionary const &env)
269 {
270     auto const pid = ::fork();
271     if (pid < 0)
272         throw_system_error("fork failed");
273     if (pid == 0) {
274         run_child(toolpath, toolname, argv, env);
275     }
276     return process(pid).wait();
277 }
278 
run_child_and_get_stdout(std::string * out,char const * toolpath,char const * toolname,char const ** argv,bool const for_real,Dictionary const & env)279 process::exit_status process::run_child_and_get_stdout(std::string *out, char const *toolpath, char const *toolname, char const **argv, bool const for_real, Dictionary const &env)
280 {
281     int fds[2];
282 
283     if (::pipe(fds) < 0)
284         throw_system_error("pipe failed");
285 
286     auto const pid = ::fork();
287     if (pid < 0)
288         throw_system_error("fork failed");
289     if (pid == 0) {
290         close(fds[0]);
291 
292         auto const newfd = dup2(fds[1], 1);
293         close(fds[1]);
294 
295         if (newfd < 0)
296             throw_system_error("dup2 failed");
297 
298         for (auto && v : env) {
299             setenv(v.first.c_str(), v.second.c_str(), 1);
300         }
301         if (for_real)
302             exec_really(toolpath, toolname, argv);
303         else
304             exec(toolpath, toolname, argv);
305         assert(!"reachable");
306         throw std::logic_error("child must not return");
307     }
308     close(fds[1]);
309 
310     *out = std::string();
311     char buffer[4096];
312     ssize_t nread = 0;
313 
314     for ( ; ; ) {
315         while ((nread = ::read(fds[0], buffer, sizeof(buffer))) > 0) {
316             out->append(buffer, nread);
317         }
318         if (nread == 0) {
319             close(fds[0]);
320             return process(pid).wait();
321         }
322         auto const error = error_code_from_errno();
323         if (error != std::errc::interrupted)
324             throw std::system_error(error, "read failed");
325     }
326 }
327 
328 } // namespace sratools
329