1 // This supervisor program keeps track of a process (normally the parent
2 // process) and receives process IDs (called children) on standard input. If
3 // the supervisor process receives a SIGINT (Ctrl-C) or SIGTERM, or if it
4 // detects that the parent process has died, it will kill all the child
5 // processes.
6 //
7 // Every 0.2 seconds, it does the following:
8 // * Checks for any new process IDs on standard input, and adds them to the list
9 // of child processes to track. If the PID is negative, as in "-1234", then
10 // that value will be negated and removed from the list of processes to track.
11 // * Checks if any child processes have died. If so, remove them from the list
12 // of child processes to track.
13 // * Checks if the parent process has died. If so, kill all children and exit.
14 //
15 // To test it out in verbose mode, run:
16 // gcc supervisor.c -o supervisor
17 // ./supervisor -v -p [parent_pid]
18 //
19 // The [parent_pid] is optional. If not supplied, the supervisor will auto-
20 // detect the parent process.
21 //
22 // After it is started, you can enter pids for child processes. Then you can
23 // do any of the following to test it out:
24 // * Press Ctrl-C.
25 // * Send a SIGTERM to the supervisor with `killall supervisor`.
26 // * Kill the parent processes.
27 // * Kill a child process.
28
29 #ifdef __INTEL_COMPILER
30 #define _BSD_SOURCE 1
31 #define _POSIX_C_SOURCE 200809L
32 #endif
33
34 #include <stdio.h>
35 #include <stdlib.h>
36 #include <string.h>
37 #include <stdbool.h>
38 #include <stdarg.h>
39 #include <errno.h>
40 #include <limits.h>
41 #include <fcntl.h>
42 #include <sys/types.h>
43 #include <time.h>
44 #include <signal.h>
45 #include <unistd.h>
46
47 #ifdef WIN32
48 #include "windows.h"
49 #endif
50
51 #include "utils.h"
52
53 // Constants ------------------------------------------------------------------
54
55 // Size of stdin input buffer
56 #define INPUT_BUF_LEN 1024
57 // Maximum number of children to keep track of
58 #define MAX_CHILDREN 1024
59 // Milliseconds to sleep in polling loop
60 #define POLL_MS 200
61
62 // Globals --------------------------------------------------------------------
63
64 // Child processes to track
65 int children[MAX_CHILDREN];
66 int n_children = 0;
67
68 int sigint_received = false;
69 int sigterm_received = false;
70
71 // Utility functions ----------------------------------------------------------
72
73 // Cross-platform sleep function
74 #ifdef WIN32
75 #include <windows.h>
76 #elif _POSIX_C_SOURCE >= 199309L
77 #include <time.h> // for nanosleep
78 #else
79 #include <unistd.h> // for usleep
80 #endif
81
sleep_ms(int milliseconds)82 void sleep_ms(int milliseconds) {
83 #ifdef WIN32
84 Sleep(milliseconds);
85 #elif _POSIX_C_SOURCE >= 199309L
86 struct timespec ts;
87 ts.tv_sec = milliseconds / 1000;
88 ts.tv_nsec = (milliseconds % 1000) * 1000000;
89 nanosleep(&ts, NULL);
90 #else
91 usleep(milliseconds * 1000);
92 #endif
93 }
94
95
96 // Given a string of format "102", return 102. If conversion fails because it
97 // is out of range, or because the string can't be parsed, return 0.
extract_pid(char * buf,int len)98 int extract_pid(char* buf, int len) {
99 long pid = strtol(buf, NULL, 10);
100
101 // Out of range: errno is ERANGE if it's out of range for a long. We're
102 // going to cast to int, so we also need to make sure that it's within
103 // range for int.
104 if (errno == ERANGE || pid > INT_MAX || pid < INT_MIN) {
105 return 0;
106 }
107
108 return (int)pid;
109 }
110
111
112 // Check if a process is running. Returns 1 if yes, 0 if no.
pid_is_running(pid_t pid)113 bool pid_is_running(pid_t pid) {
114 #ifdef WIN32
115 HANDLE h_process = OpenProcess(PROCESS_QUERY_INFORMATION, false, pid);
116 if (h_process == NULL) {
117 printf("Unable to check if process %d is running.\n", (int)pid);
118 return false;
119 }
120
121 DWORD exit_code;
122 if (!GetExitCodeProcess(h_process, &exit_code)) {
123 printf("Unable to check if process %d is running.\n", (int)pid);
124 return false;
125 }
126
127 if (exit_code == STILL_ACTIVE) {
128 return true;
129 } else {
130 return false;
131 }
132
133 #else
134 int res = kill(pid, 0);
135 if (res == -1 && errno == ESRCH) {
136 return false;
137 }
138 return true;
139 #endif
140 }
141
142 // Send a soft kill signal to all children, wait 5 seconds, then hard kill any
143 // remaining processes.
kill_children()144 void kill_children() {
145 if (n_children == 0)
146 return;
147
148 verbose_printf("Sending close signal to children: ");
149 for (int i=0; i<n_children; i++) {
150 verbose_printf("%d ", children[i]);
151
152 #ifdef WIN32
153 sendCtrlC(children[i]);
154 sendWmClose(children[i]);
155 #else
156 kill(children[i], SIGTERM);
157 #endif
158 }
159 verbose_printf("\n");
160
161
162 // Poll, checking that child processes have exited. Using `time()` isn't
163 // the most accurate way to get time, since it only has a resolution of 1
164 // second, but it is cross-platform and good enough for this purpose.
165 time_t stop_time = time(NULL) + 5;
166
167 do {
168 sleep_ms(POLL_MS);
169
170 verbose_printf("Checking status of children: ");
171 for (int i=0; i<n_children; i++) {
172 if (pid_is_running(children[i])) {
173 verbose_printf("%d ", children[i]);
174 } else {
175 verbose_printf("%d(stopped) ", children[i]);
176 n_children = remove_element(children, n_children, i);
177 }
178 }
179 verbose_printf("\n");
180
181 if (n_children == 0) {
182 return;
183 }
184 } while(time(NULL) < stop_time);
185
186
187 // Hard-kill any remaining processes
188 bool kill_message_shown = false;
189
190 for (int i=0; i<n_children; i++) {
191 if (pid_is_running(children[i])) {
192
193 if (!kill_message_shown) {
194 verbose_printf("Sending kill signal to children: ");
195 kill_message_shown = true;
196 }
197
198 verbose_printf("%d ", children[i]);
199
200 #ifdef WIN32
201 kill_pid(children[i]);
202 #else
203 kill(children[i], SIGKILL);
204 #endif
205 }
206 }
207
208 if (kill_message_shown)
209 verbose_printf("\n");
210 }
211
212
sig_handler(int signum)213 static void sig_handler(int signum) {
214 char* signame;
215 if (signum == SIGTERM) {
216 signame = "SIGTERM";
217 sigterm_received = true;
218
219 } else if (signum == SIGINT) {
220 signame = "SIGINT";
221 sigint_received = true;
222
223 } else {
224 signame = "Unknown signal";
225 }
226
227 verbose_printf("%s received.\n", signame);
228 }
229
230
main(int argc,char ** argv)231 int main(int argc, char **argv) {
232
233 int parent_pid;
234 int parent_pid_arg = 0;
235 int parent_pid_detected;
236 char* input_pipe_name = NULL;
237
238 // Process arguments ------------------------------------------------------
239 if (argc >= 2) {
240 for (int i=1; i<argc; i++) {
241 if (strcmp(argv[i], "-v") == 0) {
242 verbose_mode = true;
243
244 } else if (strcmp(argv[i], "-p") == 0) {
245 i++;
246 if (i >= argc) {
247 printf("-p must be followed with a process ID.");
248 exit(1);
249 }
250
251 parent_pid_arg = extract_pid(argv[i], (int) strlen(argv[i]));
252 if (parent_pid_arg == 0) {
253 printf("Invalid parent process ID: %s\n", argv[i]);
254 exit(1);
255 }
256
257 } else if (strcmp(argv[i], "-i") == 0) {
258 i++;
259 if (i >= argc) {
260 printf("-i must be followed with the name of a pipe.");
261 exit(1);
262 }
263
264 input_pipe_name = argv[i];
265
266 } else {
267 printf("Unknown argument: %s\n", argv[i]);
268 exit(1);
269 }
270 }
271 }
272
273 printf("PID: %d\n", getpid());
274 fflush(stdout);
275
276 parent_pid_detected = getppid();
277 verbose_printf("Parent PID (detected): %d\n", parent_pid_detected);
278
279 if (parent_pid_arg != 0) {
280 verbose_printf("Parent PID (argument): %d\n", parent_pid_arg);
281 parent_pid = parent_pid_arg;
282
283 // This check is really only useful for testing.
284 if (parent_pid_arg != parent_pid_detected) {
285 verbose_printf("Note: detected parent PID differs from argument parent PID.\n");
286 verbose_printf("Using parent PID from argument (%d).\n", parent_pid_arg);
287 }
288 } else {
289 parent_pid = parent_pid_detected;
290 }
291
292 if (input_pipe_name != NULL) {
293 verbose_printf("Reading input from %s.\n", input_pipe_name);
294 }
295
296
297 // Open and configure input source ----------------------------------------
298
299 // Input buffer for messages from the R process
300 char readbuf[INPUT_BUF_LEN];
301
302 #ifdef WIN32
303
304 HANDLE h_input;
305
306 if (input_pipe_name == NULL) {
307 h_input = open_stdin();
308 } else {
309 h_input = open_named_pipe(input_pipe_name);
310 }
311
312 configure_input_handle(h_input);
313
314 #else
315
316 FILE* fp_input;
317
318 if (input_pipe_name == NULL) {
319 fp_input = stdin;
320
321 } else {
322 printf("fopen.\n");
323
324 fp_input = fopen(input_pipe_name, "r");
325 printf("fopened.\n");
326 if (fp_input == NULL) {
327 printf("Unable to open %s for reading.\n", input_pipe_name);
328 exit(1);
329 }
330 }
331
332 if (fcntl(fileno(fp_input), F_SETFL, O_NONBLOCK) == -1) {
333 printf("Error setting input to non-blocking mode.\n");
334 exit(1);
335 }
336
337 #endif
338
339 printf("Ready\n");
340 fflush(stdout);
341
342
343 // Register signal handler ------------------------------------------------
344 #ifdef WIN32
345
346 signal(SIGINT, sig_handler);
347 signal(SIGTERM, sig_handler);
348
349 #else
350
351 struct sigaction sa;
352 memset(&sa, 0, sizeof(sa));
353 sa.sa_handler = sig_handler;
354 sigemptyset(&sa.sa_mask);
355 if (sigaction(SIGINT, &sa, NULL) == -1 ||
356 sigaction(SIGTERM, &sa, NULL) == -1) {
357 printf("Error setting up signal handler.\n");
358 exit(1);
359 }
360
361 #endif
362
363
364 // Poll -------------------------------------------------------------------
365 while(1) {
366
367 // Check if a sigint or sigterm has been received. If so, then kill
368 // the child processes and quit. Do the work here instead of in the
369 // signal handler, because the signal handler can itself be
370 // interrupted by another call to the same handler if another signal
371 // is received, and that could result in some unsafe operations.
372 if (sigint_received || sigterm_received) {
373 kill_children();
374 verbose_printf("\nExiting.\n");
375 exit(0);
376 }
377
378
379 // Look for any new processes IDs from the input
380 char* res = NULL;
381
382 // Read in the input buffer. There could be multiple lines so we'll
383 // keep reading lines until there's no more content.
384 while(1) {
385 #ifdef WIN32
386 res = get_line_nonblock(readbuf, INPUT_BUF_LEN, h_input);
387 #else
388 res = fgets(readbuf, INPUT_BUF_LEN, fp_input);
389 #endif
390
391 if (res == NULL)
392 break;
393
394 if (strncmp(readbuf, "kill", 4) == 0) {
395 verbose_printf("\'kill' command received.\n");
396 kill_children();
397 verbose_printf("\nExiting.\n");
398 return 0;
399 }
400 int pid = extract_pid(readbuf, INPUT_BUF_LEN);
401 if (pid > 0) {
402 if (n_children == MAX_CHILDREN) {
403 printf(
404 "Number of child processes to watch has exceeded limit of %d.",
405 MAX_CHILDREN
406 );
407
408 } else if (array_contains(children, n_children, pid)) {
409 verbose_printf("Not adding (already present):%d\n", pid);
410
411 } else {
412 verbose_printf("Adding:%d\n", pid);
413 children[n_children] = pid;
414 n_children++;
415 }
416
417 } else if (pid < 0) {
418 // Remove pids that start with '-'
419 pid = -pid;
420 for (int i=0; i<n_children; i++) {
421 if (children[i] == pid) {
422 verbose_printf("Removing:%d\n", pid);
423 n_children = remove_element(children, n_children, i);
424 break;
425 }
426 }
427 }
428 }
429
430 // Remove any children from list that are no longer running.
431 verbose_printf("Children: ");
432 for (int i=0; i<n_children; i++) {
433 if (pid_is_running(children[i])) {
434 verbose_printf("%d ", children[i]);
435 } else {
436 verbose_printf("%d(stopped) ", children[i]);
437 n_children = remove_element(children, n_children, i);
438 }
439 }
440 verbose_printf("\n");
441
442 // Check that parent is still running. If not, kill children.
443 if (!pid_is_running(parent_pid)) {
444 verbose_printf("Parent (%d) is no longer running.\n", parent_pid);
445 kill_children();
446 verbose_printf("\nExiting.\n");
447 return 0;
448 }
449
450 sleep_ms(POLL_MS);
451 }
452
453 return 0;
454 }
455