1 // This supervisor program keeps track of a process (normally the parent
2 // process) and receives process IDs (called children) on standard input. If
3 // the supervisor process receives a SIGINT (Ctrl-C) or SIGTERM, or if it
4 // detects that the parent process has died, it will kill all the child
5 // processes.
6 //
7 // Every 0.2 seconds, it does the following:
8 // * Checks for any new process IDs on standard input, and adds them to the list
9 //   of child processes to track. If the PID is negative, as in "-1234", then
10 //   that value will be negated and removed from the list of processes to track.
11 // * Checks if any child processes have died. If so, remove them from the list
12 //   of child processes to track.
13 // * Checks if the parent process has died. If so, kill all children and exit.
14 //
15 // To test it out in verbose mode, run:
16 //   gcc supervisor.c -o supervisor
17 //   ./supervisor -v -p [parent_pid]
18 //
19 // The [parent_pid] is optional. If not supplied, the supervisor will auto-
20 // detect the parent process.
21 //
22 // After it is started, you can enter pids for child processes. Then you can
23 // do any of the following to test it out:
24 // * Press Ctrl-C.
25 // * Send a SIGTERM to the supervisor with `killall supervisor`.
26 // * Kill the parent processes.
27 // * Kill a child process.
28 
29 #ifdef __INTEL_COMPILER
30 #define _BSD_SOURCE 1
31 #define _POSIX_C_SOURCE  200809L
32 #endif
33 
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdbool.h>
38 #include <stdarg.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <fcntl.h>
42 #include <sys/types.h>
43 #include <time.h>
44 #include <signal.h>
45 #include <unistd.h>
46 
47 #ifdef WIN32
48 #include "windows.h"
49 #endif
50 
51 #include "utils.h"
52 
53 // Constants ------------------------------------------------------------------
54 
55 // Size of stdin input buffer
56 #define INPUT_BUF_LEN 1024
57 // Maximum number of children to keep track of
58 #define MAX_CHILDREN 1024
59 // Milliseconds to sleep in polling loop
60 #define POLL_MS 200
61 
62 // Globals --------------------------------------------------------------------
63 
64 // Child processes to track
65 int children[MAX_CHILDREN];
66 int n_children = 0;
67 
68 int sigint_received  = false;
69 int sigterm_received = false;
70 
71 // Utility functions ----------------------------------------------------------
72 
73 // Cross-platform sleep function
74 #ifdef WIN32
75 #include <windows.h>
76 #elif _POSIX_C_SOURCE >= 199309L
77 #include <time.h>   // for nanosleep
78 #else
79 #include <unistd.h> // for usleep
80 #endif
81 
sleep_ms(int milliseconds)82 void sleep_ms(int milliseconds) {
83 #ifdef WIN32
84     Sleep(milliseconds);
85 #elif _POSIX_C_SOURCE >= 199309L
86     struct timespec ts;
87     ts.tv_sec = milliseconds / 1000;
88     ts.tv_nsec = (milliseconds % 1000) * 1000000;
89     nanosleep(&ts, NULL);
90 #else
91     usleep(milliseconds * 1000);
92 #endif
93 }
94 
95 
96 // Given a string of format "102", return 102. If conversion fails because it
97 // is out of range, or because the string can't be parsed, return 0.
extract_pid(char * buf,int len)98 int extract_pid(char* buf, int len) {
99     long pid = strtol(buf, NULL, 10);
100 
101     // Out of range: errno is ERANGE if it's out of range for a long. We're
102     // going to cast to int, so we also need to make sure that it's within
103     // range for int.
104     if (errno == ERANGE || pid > INT_MAX || pid < INT_MIN) {
105         return 0;
106     }
107 
108     return (int)pid;
109 }
110 
111 
112 // Check if a process is running. Returns 1 if yes, 0 if no.
pid_is_running(pid_t pid)113 bool pid_is_running(pid_t pid) {
114     #ifdef WIN32
115     HANDLE h_process = OpenProcess(PROCESS_QUERY_INFORMATION, false, pid);
116     if (h_process == NULL) {
117         printf("Unable to check if process %d is running.\n", (int)pid);
118         return false;
119     }
120 
121     DWORD exit_code;
122     if (!GetExitCodeProcess(h_process, &exit_code)) {
123         printf("Unable to check if process %d is running.\n", (int)pid);
124         return false;
125     }
126 
127     if (exit_code == STILL_ACTIVE) {
128         return true;
129     } else {
130         return false;
131     }
132 
133     #else
134     int res = kill(pid, 0);
135     if (res == -1 && errno == ESRCH) {
136         return false;
137     }
138     return true;
139     #endif
140 }
141 
142 // Send a soft kill signal to all children, wait 5 seconds, then hard kill any
143 // remaining processes.
kill_children()144 void kill_children() {
145     if (n_children == 0)
146         return;
147 
148     verbose_printf("Sending close signal to children: ");
149     for (int i=0; i<n_children; i++) {
150         verbose_printf("%d ", children[i]);
151 
152         #ifdef WIN32
153         sendCtrlC(children[i]);
154         sendWmClose(children[i]);
155         #else
156         kill(children[i], SIGTERM);
157         #endif
158     }
159     verbose_printf("\n");
160 
161 
162     // Poll, checking that child processes have exited. Using `time()` isn't
163     // the most accurate way to get time, since it only has a resolution of 1
164     // second, but it is cross-platform and good enough for this purpose.
165     time_t stop_time = time(NULL) + 5;
166 
167     do {
168         sleep_ms(POLL_MS);
169 
170         verbose_printf("Checking status of children: ");
171         for (int i=0; i<n_children; i++) {
172             if (pid_is_running(children[i])) {
173                 verbose_printf("%d ", children[i]);
174             } else {
175                 verbose_printf("%d(stopped) ", children[i]);
176                 n_children = remove_element(children, n_children, i);
177             }
178         }
179         verbose_printf("\n");
180 
181         if (n_children == 0) {
182             return;
183         }
184     } while(time(NULL) < stop_time);
185 
186 
187     // Hard-kill any remaining processes
188     bool kill_message_shown = false;
189 
190     for (int i=0; i<n_children; i++) {
191         if (pid_is_running(children[i])) {
192 
193             if (!kill_message_shown) {
194                 verbose_printf("Sending kill signal to children: ");
195                 kill_message_shown = true;
196             }
197 
198             verbose_printf("%d ", children[i]);
199 
200             #ifdef WIN32
201             kill_pid(children[i]);
202             #else
203             kill(children[i], SIGKILL);
204             #endif
205         }
206     }
207 
208     if (kill_message_shown)
209         verbose_printf("\n");
210 }
211 
212 
sig_handler(int signum)213 static void sig_handler(int signum) {
214     char* signame;
215     if (signum == SIGTERM) {
216         signame = "SIGTERM";
217         sigterm_received = true;
218 
219     } else if (signum == SIGINT) {
220         signame = "SIGINT";
221         sigint_received = true;
222 
223     } else {
224         signame = "Unknown signal";
225     }
226 
227     verbose_printf("%s received.\n", signame);
228 }
229 
230 
main(int argc,char ** argv)231 int main(int argc, char **argv) {
232 
233     int parent_pid;
234     int parent_pid_arg = 0;
235     int parent_pid_detected;
236     char* input_pipe_name = NULL;
237 
238     // Process arguments ------------------------------------------------------
239     if (argc >= 2) {
240         for (int i=1; i<argc; i++) {
241             if (strcmp(argv[i], "-v") == 0) {
242                 verbose_mode = true;
243 
244             } else if (strcmp(argv[i], "-p") == 0) {
245                 i++;
246                 if (i >= argc) {
247                     printf("-p must be followed with a process ID.");
248                     exit(1);
249                 }
250 
251                 parent_pid_arg = extract_pid(argv[i], (int) strlen(argv[i]));
252                 if (parent_pid_arg == 0) {
253                     printf("Invalid parent process ID: %s\n", argv[i]);
254                     exit(1);
255                 }
256 
257             } else if (strcmp(argv[i], "-i") == 0) {
258                 i++;
259                 if (i >= argc) {
260                     printf("-i must be followed with the name of a pipe.");
261                     exit(1);
262                 }
263 
264                 input_pipe_name = argv[i];
265 
266             } else {
267                 printf("Unknown argument: %s\n", argv[i]);
268                 exit(1);
269             }
270         }
271     }
272 
273     printf("PID: %d\n", getpid());
274     fflush(stdout);
275 
276     parent_pid_detected = getppid();
277     verbose_printf("Parent PID (detected): %d\n", parent_pid_detected);
278 
279     if (parent_pid_arg != 0) {
280         verbose_printf("Parent PID (argument): %d\n", parent_pid_arg);
281         parent_pid = parent_pid_arg;
282 
283         // This check is really only useful for testing.
284         if (parent_pid_arg != parent_pid_detected) {
285             verbose_printf("Note: detected parent PID differs from argument parent PID.\n");
286             verbose_printf("Using parent PID from argument (%d).\n", parent_pid_arg);
287         }
288     } else {
289         parent_pid = parent_pid_detected;
290     }
291 
292     if (input_pipe_name != NULL) {
293         verbose_printf("Reading input from %s.\n", input_pipe_name);
294     }
295 
296 
297     // Open and configure input source ----------------------------------------
298 
299     // Input buffer for messages from the R process
300     char readbuf[INPUT_BUF_LEN];
301 
302     #ifdef WIN32
303 
304     HANDLE h_input;
305 
306     if (input_pipe_name == NULL) {
307         h_input = open_stdin();
308     } else {
309         h_input = open_named_pipe(input_pipe_name);
310     }
311 
312     configure_input_handle(h_input);
313 
314     #else
315 
316     FILE* fp_input;
317 
318     if (input_pipe_name == NULL) {
319         fp_input = stdin;
320 
321     } else {
322         printf("fopen.\n");
323 
324         fp_input = fopen(input_pipe_name, "r");
325         printf("fopened.\n");
326         if (fp_input == NULL) {
327             printf("Unable to open %s for reading.\n", input_pipe_name);
328             exit(1);
329         }
330     }
331 
332     if (fcntl(fileno(fp_input), F_SETFL, O_NONBLOCK) == -1) {
333         printf("Error setting input to non-blocking mode.\n");
334         exit(1);
335     }
336 
337     #endif
338 
339     printf("Ready\n");
340     fflush(stdout);
341 
342 
343     // Register signal handler ------------------------------------------------
344     #ifdef WIN32
345 
346     signal(SIGINT, sig_handler);
347     signal(SIGTERM, sig_handler);
348 
349     #else
350 
351     struct sigaction sa;
352     memset(&sa, 0, sizeof(sa));
353     sa.sa_handler = sig_handler;
354     sigemptyset(&sa.sa_mask);
355     if (sigaction(SIGINT, &sa, NULL)  == -1 ||
356         sigaction(SIGTERM, &sa, NULL) == -1) {
357         printf("Error setting up signal handler.\n");
358         exit(1);
359     }
360 
361     #endif
362 
363 
364     // Poll -------------------------------------------------------------------
365     while(1) {
366 
367         // Check if a sigint or sigterm has been received. If so, then kill
368         // the child processes and quit. Do the work here instead of in the
369         // signal handler, because the signal handler can itself be
370         // interrupted by another call to the same handler if another signal
371         // is received, and that could result in some unsafe operations.
372         if (sigint_received || sigterm_received) {
373             kill_children();
374             verbose_printf("\nExiting.\n");
375             exit(0);
376         }
377 
378 
379         // Look for any new processes IDs from the input
380         char* res = NULL;
381 
382         // Read in the input buffer. There could be multiple lines so we'll
383         // keep reading lines until there's no more content.
384         while(1) {
385             #ifdef WIN32
386             res = get_line_nonblock(readbuf, INPUT_BUF_LEN, h_input);
387             #else
388             res = fgets(readbuf, INPUT_BUF_LEN, fp_input);
389             #endif
390 
391             if (res == NULL)
392                 break;
393 
394             if (strncmp(readbuf, "kill", 4) == 0) {
395                 verbose_printf("\'kill' command received.\n");
396                 kill_children();
397                 verbose_printf("\nExiting.\n");
398                 return 0;
399             }
400             int pid = extract_pid(readbuf, INPUT_BUF_LEN);
401             if (pid > 0) {
402                 if (n_children == MAX_CHILDREN) {
403                     printf(
404                         "Number of child processes to watch has exceeded limit of %d.",
405                         MAX_CHILDREN
406                     );
407 
408                 } else if (array_contains(children, n_children, pid)) {
409                     verbose_printf("Not adding (already present):%d\n", pid);
410 
411                 } else {
412                     verbose_printf("Adding:%d\n", pid);
413                     children[n_children] = pid;
414                     n_children++;
415                 }
416 
417             } else if (pid < 0) {
418                 // Remove pids that start with '-'
419                 pid = -pid;
420                 for (int i=0; i<n_children; i++) {
421                     if (children[i] == pid) {
422                         verbose_printf("Removing:%d\n", pid);
423                         n_children = remove_element(children, n_children, i);
424                         break;
425                     }
426                 }
427             }
428         }
429 
430         // Remove any children from list that are no longer running.
431         verbose_printf("Children: ");
432         for (int i=0; i<n_children; i++) {
433             if (pid_is_running(children[i])) {
434                 verbose_printf("%d ", children[i]);
435             } else {
436                 verbose_printf("%d(stopped) ", children[i]);
437                 n_children = remove_element(children, n_children, i);
438             }
439         }
440         verbose_printf("\n");
441 
442         // Check that parent is still running. If not, kill children.
443         if (!pid_is_running(parent_pid)) {
444             verbose_printf("Parent (%d) is no longer running.\n", parent_pid);
445             kill_children();
446             verbose_printf("\nExiting.\n");
447             return 0;
448         }
449 
450         sleep_ms(POLL_MS);
451     }
452 
453     return 0;
454 }
455