1 /****************************************************************************
2 **
3 ** Copyright (C) 2020 Intel Corporation.
4 ** Copyright (C) 2015 Klarälvdalens Datakonsult AB, a KDAB Group company, info@kdab.com
5 **
6 ** Permission is hereby granted, free of charge, to any person obtaining a copy
7 ** of this software and associated documentation files (the "Software"), to deal
8 ** in the Software without restriction, including without limitation the rights
9 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10 ** copies of the Software, and to permit persons to whom the Software is
11 ** furnished to do so, subject to the following conditions:
12 **
13 ** The above copyright notice and this permission notice shall be included in
14 ** all copies or substantial portions of the Software.
15 **
16 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 ** THE SOFTWARE.
23 **
24 ****************************************************************************/
25 
26 #ifndef _GNU_SOURCE
27 #  define _GNU_SOURCE
28 #endif
29 
30 #include "forkfd.h"
31 
32 /* Macros fine-tuning the build: */
33 //#define FORKFD_NO_FORKFD 1                /* disable the forkfd() function */
34 //#define FORKFD_NO_SPAWNFD 1               /* disable the spawnfd() function */
35 //#define FORKFD_DISABLE_FORK_FALLBACK 1    /* disable falling back to fork() from system_forkfd() */
36 
37 #include <sys/types.h>
38 #if defined(__OpenBSD__) || defined(__NetBSD__)
39 #  include <sys/param.h>
40 #endif
41 #include <sys/time.h>
42 #include <sys/resource.h>
43 #include <sys/wait.h>
44 #include <assert.h>
45 #include <errno.h>
46 #include <pthread.h>
47 #include <signal.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <time.h>
51 #include <unistd.h>
52 
53 #ifdef __linux__
54 #  define HAVE_WAIT4    1
55 #  if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x208 && \
56        (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
57 #    include <sys/eventfd.h>
58 #    ifdef EFD_CLOEXEC
59 #      define HAVE_EVENTFD  1
60 #    endif
61 #  endif
62 #  if defined(__BIONIC__) || (defined(__GLIBC__) && (__GLIBC__ << 8) + __GLIBC_MINOR__ >= 0x209 && \
63        (!defined(__UCLIBC__) || ((__UCLIBC_MAJOR__ << 16) + (__UCLIBC_MINOR__ << 8) + __UCLIBC_SUBLEVEL__ > 0x90201)))
64 #    define HAVE_PIPE2    1
65 #  endif
66 #endif
67 
68 #if _POSIX_VERSION-0 >= 200809L || _XOPEN_VERSION-0 >= 500
69 #  define HAVE_WAITID   1
70 #endif
71 #if !defined(WEXITED) || !defined(WNOWAIT)
72 #  undef HAVE_WAITID
73 #endif
74 
75 #if (defined(__FreeBSD__) && defined(__FreeBSD_version) && __FreeBSD_version >= 1000032) || \
76     (defined(__OpenBSD__) && OpenBSD >= 201505) || \
77     (defined(__NetBSD__) && __NetBSD_Version__ >= 600000000)
78 #  define HAVE_PIPE2    1
79 #endif
80 #if defined(__FreeBSD__) || defined(__DragonFly__) || defined(__FreeBSD_kernel__) || \
81     defined(__OpenBSD__) || defined(__NetBSD__) || defined(__APPLE__)
82 #  define HAVE_WAIT4    1
83 #endif
84 
85 #if defined(__APPLE__)
86 /* Up until OS X 10.7, waitid(P_ALL, ...) will return success, but will not
87  * fill in the details of the dead child. That means waitid is not useful to us.
88  * Therefore, we only enable waitid() support if we're targetting OS X 10.8 or
89  * later.
90  */
91 #  include <Availability.h>
92 #  include <AvailabilityMacros.h>
93 #  if MAC_OS_X_VERSION_MIN_REQUIRED <= 1070
94 #    define HAVE_BROKEN_WAITID 1
95 #  endif
96 #endif
97 
98 #include "forkfd_atomic.h"
99 
100 static int system_has_forkfd(void);
101 static int system_forkfd(int flags, pid_t *ppid, int *system);
102 static int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdwoptions, struct rusage *rusage);
103 
disable_fork_fallback(void)104 static int disable_fork_fallback(void)
105 {
106 #ifdef FORKFD_DISABLE_FORK_FALLBACK
107     /* if there's no system forkfd, we have to use the fallback */
108     return system_has_forkfd();
109 #else
110     return false;
111 #endif
112 }
113 
114 #define CHILDREN_IN_SMALL_ARRAY     16
115 #define CHILDREN_IN_BIG_ARRAY       256
116 #define sizeofarray(array)          (sizeof(array)/sizeof(array[0]))
117 #define EINTR_LOOP(ret, call) \
118     do {                      \
119         ret = call;           \
120     } while (ret == -1 && errno == EINTR)
121 
122 struct pipe_payload
123 {
124     struct forkfd_info info;
125     struct rusage rusage;
126 };
127 
128 typedef struct process_info
129 {
130     ffd_atomic_int pid;
131     int deathPipe;
132 } ProcessInfo;
133 
134 struct BigArray;
135 typedef struct Header
136 {
137     ffd_atomic_pointer(struct BigArray) nextArray;
138     ffd_atomic_int busyCount;
139 } Header;
140 
141 typedef struct BigArray
142 {
143     Header header;
144     ProcessInfo entries[CHILDREN_IN_BIG_ARRAY];
145 } BigArray;
146 
147 typedef struct SmallArray
148 {
149     Header header;
150     ProcessInfo entries[CHILDREN_IN_SMALL_ARRAY];
151 } SmallArray;
152 static SmallArray children;
153 
154 static struct sigaction old_sigaction;
155 static pthread_once_t forkfd_initialization = PTHREAD_ONCE_INIT;
156 static ffd_atomic_int forkfd_status = FFD_ATOMIC_INIT(0);
157 
158 #ifdef HAVE_BROKEN_WAITID
159 static int waitid_works = 0;
160 #else
161 static const int waitid_works = 1;
162 #endif
163 
tryAllocateInSection(Header * header,ProcessInfo entries[],int maxCount)164 static ProcessInfo *tryAllocateInSection(Header *header, ProcessInfo entries[], int maxCount)
165 {
166     /* we use ACQUIRE here because the signal handler might have released the PID */
167     int busyCount = ffd_atomic_add_fetch(&header->busyCount, 1, FFD_ATOMIC_ACQUIRE);
168     if (busyCount <= maxCount) {
169         /* there's an available entry in this section, find it and take it */
170         int i;
171         for (i = 0; i < maxCount; ++i) {
172             /* if the PID is 0, it's free; mark it as used by swapping it with -1 */
173             int expected_pid = 0;
174             if (ffd_atomic_compare_exchange(&entries[i].pid, &expected_pid,
175                                             -1, FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
176                 return &entries[i];
177         }
178     }
179 
180     /* there isn't an available entry, undo our increment */
181     (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELAXED);
182     return NULL;
183 }
184 
allocateInfo(Header ** header)185 static ProcessInfo *allocateInfo(Header **header)
186 {
187     Header *currentHeader = &children.header;
188 
189     /* try to find an available entry in the small array first */
190     ProcessInfo *info =
191             tryAllocateInSection(currentHeader, children.entries, sizeofarray(children.entries));
192 
193     /* go on to the next arrays */
194     while (info == NULL) {
195         BigArray *array = ffd_atomic_load(&currentHeader->nextArray, FFD_ATOMIC_ACQUIRE);
196         if (array == NULL) {
197             /* allocate an array and try to use it */
198             BigArray *allocatedArray = (BigArray *)calloc(1, sizeof(BigArray));
199             if (allocatedArray == NULL)
200                 return NULL;
201 
202             if (ffd_atomic_compare_exchange(&currentHeader->nextArray, &array, allocatedArray,
203                                              FFD_ATOMIC_RELEASE, FFD_ATOMIC_ACQUIRE)) {
204                 /* success */
205                 array = allocatedArray;
206             } else {
207                 /* failed, the atomic updated 'array' */
208                 free(allocatedArray);
209             }
210         }
211 
212         currentHeader = &array->header;
213         info = tryAllocateInSection(currentHeader, array->entries, sizeofarray(array->entries));
214     }
215 
216     *header = currentHeader;
217     return info;
218 }
219 
220 #ifdef HAVE_WAITID
isChildReady(pid_t pid,siginfo_t * info)221 static int isChildReady(pid_t pid, siginfo_t *info)
222 {
223     info->si_pid = 0;
224     return waitid(P_PID, pid, info, WEXITED | WNOHANG | WNOWAIT) == 0 && info->si_pid == pid;
225 }
226 #endif
227 
convertStatusToForkfdInfo(int status,struct forkfd_info * info)228 static void convertStatusToForkfdInfo(int status, struct forkfd_info *info)
229 {
230     if (WIFEXITED(status)) {
231         info->code = CLD_EXITED;
232         info->status = WEXITSTATUS(status);
233     } else if (WIFSIGNALED(status)) {
234         info->code = CLD_KILLED;
235 #  ifdef WCOREDUMP
236         if (WCOREDUMP(status))
237             info->code = CLD_DUMPED;
238 #  endif
239         info->status = WTERMSIG(status);
240     }
241 }
242 
convertForkfdWaitFlagsToWaitFlags(int ffdoptions)243 static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions)
244 {
245     int woptions = WEXITED;
246     if (ffdoptions & FFDW_NOWAIT)
247         woptions |= WNOWAIT;
248     if (ffdoptions & FFDW_NOHANG)
249         woptions |= WNOHANG;
250     return woptions;
251 }
252 
tryReaping(pid_t pid,struct pipe_payload * payload)253 static int tryReaping(pid_t pid, struct pipe_payload *payload)
254 {
255     /* reap the child */
256 #if defined(HAVE_WAIT4)
257     int status;
258     if (wait4(pid, &status, WNOHANG, &payload->rusage) <= 0)
259         return 0;
260     convertStatusToForkfdInfo(status, &payload->info);
261 #else
262 #  if defined(HAVE_WAITID)
263     if (waitid_works) {
264         /* we have waitid(2), which gets us some payload values on some systems */
265         siginfo_t info;
266         info.si_pid = 0;
267         int ret = waitid(P_PID, pid, &info, WEXITED | WNOHANG) == 0 && info.si_pid == pid;
268         if (!ret)
269             return ret;
270 
271         payload->info.code = info.si_code;
272         payload->info.status = info.si_status;
273 #    ifdef __linux__
274         payload->rusage.ru_utime.tv_sec = info.si_utime / CLOCKS_PER_SEC;
275         payload->rusage.ru_utime.tv_usec = info.si_utime % CLOCKS_PER_SEC;
276         payload->rusage.ru_stime.tv_sec = info.si_stime / CLOCKS_PER_SEC;
277         payload->rusage.ru_stime.tv_usec = info.si_stime % CLOCKS_PER_SEC;
278 #    endif
279         return 1;
280     }
281 #  endif // HAVE_WAITID
282     int status;
283     if (waitpid(pid, &status, WNOHANG) <= 0)
284         return 0;     // child did not change state
285     convertStatusToForkfdInfo(status, &payload->info);
286 #endif // !HAVE_WAIT4
287 
288     return 1;
289 }
290 
freeInfo(Header * header,ProcessInfo * entry)291 static void freeInfo(Header *header, ProcessInfo *entry)
292 {
293     entry->deathPipe = -1;
294     ffd_atomic_store(&entry->pid, 0, FFD_ATOMIC_RELEASE);
295 
296     (void)ffd_atomic_add_fetch(&header->busyCount, -1, FFD_ATOMIC_RELEASE);
297     assert(header->busyCount >= 0);
298 }
299 
notifyAndFreeInfo(Header * header,ProcessInfo * entry,const struct pipe_payload * payload)300 static void notifyAndFreeInfo(Header *header, ProcessInfo *entry,
301                               const struct pipe_payload *payload)
302 {
303     ssize_t ret;
304     EINTR_LOOP(ret, write(entry->deathPipe, payload, sizeof(*payload)));
305     EINTR_LOOP(ret, close(entry->deathPipe));
306 
307     freeInfo(header, entry);
308 }
309 
310 static void reapChildProcesses();
sigchld_handler(int signum,siginfo_t * handler_info,void * handler_context)311 static void sigchld_handler(int signum, siginfo_t *handler_info, void *handler_context)
312 {
313     /*
314      * This is a signal handler, so we need to be careful about which functions
315      * we can call. See the full, official listing in the POSIX.1-2008
316      * specification at:
317      *   http://pubs.opengroup.org/onlinepubs/9699919799/functions/V2_chap02.html#tag_15_04_03
318      *
319      * The handler_info and handler_context parameters may not be valid, if
320      * we're a chained handler from another handler that did not use
321      * SA_SIGINFO. Therefore, we must obtain the siginfo ourselves directly by
322      * calling waitid.
323      *
324      * But we pass them anyway. Let's call the chained handler first, while
325      * those two arguments have a chance of being correct.
326      */
327     if (old_sigaction.sa_handler != SIG_IGN && old_sigaction.sa_handler != SIG_DFL) {
328         if (old_sigaction.sa_flags & SA_SIGINFO)
329             old_sigaction.sa_sigaction(signum, handler_info, handler_context);
330         else
331             old_sigaction.sa_handler(signum);
332     }
333 
334     if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 1) {
335         int saved_errno = errno;
336         reapChildProcesses();
337         errno = saved_errno;
338     }
339 }
340 
reapChildProcesses()341 static inline void reapChildProcesses()
342 {
343     /* is this one of our children? */
344     BigArray *array;
345     siginfo_t info;
346     struct pipe_payload payload;
347     int i;
348 
349     memset(&info, 0, sizeof info);
350     memset(&payload, 0, sizeof payload);
351 
352 #ifdef HAVE_WAITID
353     if (waitid_works) {
354         /* be optimistic: try to see if we can get the child that exited */
355 search_next_child:
356         /* waitid returns -1 ECHILD if there are no further children at all;
357          * it returns 0 and sets si_pid to 0 if there are children but they are not ready
358          * to be waited (we're passing WNOHANG). We should not get EINTR because
359          * we're passing WNOHANG and we should definitely not get EINVAL or anything else.
360          * That means we can actually ignore the return code and only inspect si_pid.
361          */
362         info.si_pid = 0;
363         waitid(P_ALL, 0, &info, WNOHANG | WNOWAIT | WEXITED);
364         if (info.si_pid == 0) {
365             /* there are no further un-waited-for children, so we can just exit.
366              */
367             return;
368         }
369 
370         for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
371             /* acquire the child first: swap the PID with -1 to indicate it's busy */
372             int pid = info.si_pid;
373             if (ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
374                                             FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
375                 /* this is our child, send notification and free up this entry */
376                 /* ### FIXME: what if tryReaping returns false? */
377                 if (tryReaping(pid, &payload))
378                     notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
379                 goto search_next_child;
380             }
381         }
382 
383         /* try the arrays */
384         array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
385         while (array != NULL) {
386             for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
387                 int pid = info.si_pid;
388                 if (ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
389                                                 FFD_ATOMIC_ACQUIRE, FFD_ATOMIC_RELAXED)) {
390                     /* this is our child, send notification and free up this entry */
391                     /* ### FIXME: what if tryReaping returns false? */
392                     if (tryReaping(pid, &payload))
393                         notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
394                     goto search_next_child;
395                 }
396             }
397 
398             array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
399         }
400 
401         /* if we got here, we couldn't find this child in our list. That means this child
402          * belongs to one of the chained SIGCHLD handlers. However, there might be another
403          * child that exited and does belong to us, so we need to check each one individually.
404          */
405     }
406 #endif
407 
408     for (i = 0; i < (int)sizeofarray(children.entries); ++i) {
409         int pid = ffd_atomic_load(&children.entries[i].pid, FFD_ATOMIC_ACQUIRE);
410         if (pid <= 0)
411             continue;
412 #ifdef HAVE_WAITID
413         if (waitid_works) {
414             /* The child might have been reaped by the block above in another thread,
415              * so first check if it's ready and, if it is, lock it */
416             if (!isChildReady(pid, &info) ||
417                     !ffd_atomic_compare_exchange(&children.entries[i].pid, &pid, -1,
418                                                  FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
419                 continue;
420         }
421 #endif
422         if (tryReaping(pid, &payload)) {
423             /* this is our child, send notification and free up this entry */
424             notifyAndFreeInfo(&children.header, &children.entries[i], &payload);
425         }
426     }
427 
428     /* try the arrays */
429     array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
430     while (array != NULL) {
431         for (i = 0; i < (int)sizeofarray(array->entries); ++i) {
432             int pid = ffd_atomic_load(&array->entries[i].pid, FFD_ATOMIC_ACQUIRE);
433             if (pid <= 0)
434                 continue;
435 #ifdef HAVE_WAITID
436             if (waitid_works) {
437                 /* The child might have been reaped by the block above in another thread,
438                  * so first check if it's ready and, if it is, lock it */
439                 if (!isChildReady(pid, &info) ||
440                         !ffd_atomic_compare_exchange(&array->entries[i].pid, &pid, -1,
441                                                      FFD_ATOMIC_RELAXED, FFD_ATOMIC_RELAXED))
442                     continue;
443             }
444 #endif
445             if (tryReaping(pid, &payload)) {
446                 /* this is our child, send notification and free up this entry */
447                 notifyAndFreeInfo(&array->header, &array->entries[i], &payload);
448             }
449         }
450 
451         array = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
452     }
453 }
454 
ignore_sigpipe()455 static void ignore_sigpipe()
456 {
457 #ifdef O_NOSIGPIPE
458     static ffd_atomic_int done = FFD_ATOMIC_INIT(0);
459     if (ffd_atomic_load(&done, FFD_ATOMIC_RELAXED))
460         return;
461 #endif
462 
463     struct sigaction action;
464     memset(&action, 0, sizeof action);
465     sigemptyset(&action.sa_mask);
466     action.sa_handler = SIG_IGN;
467     action.sa_flags = 0;
468     sigaction(SIGPIPE, &action, NULL);
469 
470 #ifdef O_NOSIGPIPE
471     ffd_atomic_store(&done, 1, FFD_ATOMIC_RELAXED);
472 #endif
473 }
474 
475 #if defined(__GNUC__) && (!defined(__FreeBSD__) || __FreeBSD__ < 10)
476 __attribute((destructor, unused)) static void cleanup();
477 #endif
478 
cleanup()479 static void cleanup()
480 {
481     BigArray *array;
482     /* This function is not thread-safe!
483      * It must only be called when the process is shutting down.
484      * At shutdown, we expect no one to be calling forkfd(), so we don't
485      * need to be thread-safe with what is done there.
486      *
487      * But SIGCHLD might be delivered to any thread, including this one.
488      * There's no way to prevent that. The correct solution would be to
489      * cooperatively delete. We don't do that.
490      */
491     if (ffd_atomic_load(&forkfd_status, FFD_ATOMIC_RELAXED) == 0)
492         return;
493 
494     /* notify the handler that we're no longer in operation */
495     ffd_atomic_store(&forkfd_status, 0, FFD_ATOMIC_RELAXED);
496 
497     /* free any arrays we might have */
498     array = ffd_atomic_load(&children.header.nextArray, FFD_ATOMIC_ACQUIRE);
499     while (array != NULL) {
500         BigArray *next = ffd_atomic_load(&array->header.nextArray, FFD_ATOMIC_ACQUIRE);
501         free(array);
502         array = next;
503     }
504 }
505 
forkfd_initialize()506 static void forkfd_initialize()
507 {
508 #if defined(HAVE_BROKEN_WAITID)
509     pid_t pid = fork();
510     if (pid == 0) {
511         _exit(0);
512     } else if (pid > 0) {
513         siginfo_t info;
514         waitid(P_ALL, 0, &info, WNOWAIT | WEXITED);
515         waitid_works = (info.si_pid != 0);
516         info.si_pid = 0;
517 
518         // now really reap the child
519         waitid(P_PID, pid, &info, WEXITED);
520         waitid_works = waitid_works && (info.si_pid != 0);
521     }
522 #endif
523 
524     /* install our signal handler */
525     struct sigaction action;
526     memset(&action, 0, sizeof action);
527     sigemptyset(&action.sa_mask);
528     action.sa_flags = SA_NOCLDSTOP | SA_SIGINFO;
529     action.sa_sigaction = sigchld_handler;
530 
531     /* ### RACE CONDITION
532      * The sigaction function does a memcpy from an internal buffer
533      * to old_sigaction, which we use in the SIGCHLD handler. If a
534      * SIGCHLD is delivered before or during that memcpy, the handler will
535      * see an inconsistent state.
536      *
537      * There is no solution. pthread_sigmask doesn't work here because the
538      * signal could be delivered to another thread.
539      */
540     sigaction(SIGCHLD, &action, &old_sigaction);
541 
542 #ifndef O_NOSIGPIPE
543     /* disable SIGPIPE too */
544     ignore_sigpipe();
545 #endif
546 
547 #ifdef __GNUC__
548     (void) cleanup; /* suppress unused static function warning */
549 #else
550     atexit(cleanup);
551 #endif
552 
553     ffd_atomic_store(&forkfd_status, 1, FFD_ATOMIC_RELAXED);
554 }
555 
create_pipe(int filedes[],int flags)556 static int create_pipe(int filedes[], int flags)
557 {
558     int ret = -1;
559 #ifdef HAVE_PIPE2
560     /* use pipe2(2) whenever possible, since it can thread-safely create a
561      * cloexec pair of pipes. Without it, we have a race condition setting
562      * FD_CLOEXEC
563      */
564 
565 #  ifdef O_NOSIGPIPE
566     /* try first with O_NOSIGPIPE */
567     ret = pipe2(filedes, O_CLOEXEC | O_NOSIGPIPE);
568     if (ret == -1) {
569         /* O_NOSIGPIPE not supported, ignore SIGPIPE */
570         ignore_sigpipe();
571     }
572 #  endif
573     if (ret == -1)
574         ret = pipe2(filedes, O_CLOEXEC);
575     if (ret == -1)
576         return ret;
577 
578     if ((flags & FFD_CLOEXEC) == 0)
579         fcntl(filedes[0], F_SETFD, 0);
580 #else
581     ret = pipe(filedes);
582     if (ret == -1)
583         return ret;
584 
585     fcntl(filedes[1], F_SETFD, FD_CLOEXEC);
586     if (flags & FFD_CLOEXEC)
587         fcntl(filedes[0], F_SETFD, FD_CLOEXEC);
588 #endif
589     if (flags & FFD_NONBLOCK)
590         fcntl(filedes[0], F_SETFL, fcntl(filedes[0], F_GETFL) | O_NONBLOCK);
591     return ret;
592 }
593 
594 #ifndef FORKFD_NO_FORKFD
595 /**
596  * @brief forkfd returns a file descriptor representing a child process
597  * @return a file descriptor, or -1 in case of failure
598  *
599  * forkfd() creates a file descriptor that can be used to be notified of when a
600  * child process exits. This file descriptor can be monitored using select(2),
601  * poll(2) or similar mechanisms.
602  *
603  * The @a flags parameter can contain the following values ORed to change the
604  * behaviour of forkfd():
605  *
606  * @li @c FFD_NONBLOCK Set the O_NONBLOCK file status flag on the new open file
607  * descriptor. Using this flag saves extra calls to fnctl(2) to achieve the same
608  * result.
609  *
610  * @li @c FFD_CLOEXEC Set the close-on-exec (FD_CLOEXEC) flag on the new file
611  * descriptor. You probably want to set this flag, since forkfd() does not work
612  * if the original parent process dies.
613  *
614  * @li @c FFD_USE_FORK Tell forkfd() to actually call fork() instead of a
615  * different system implementation that may be available. On systems where a
616  * different implementation is available, its behavior may differ from that of
617  * fork(), such as not calling the functions registered with pthread_atfork().
618  * If that's necessary, pass this flag.
619  *
620  * The file descriptor returned by forkfd() supports the following operations:
621  *
622  * @li read(2) When the child process exits, then the buffer supplied to
623  * read(2) is used to return information about the status of the child in the
624  * form of one @c siginfo_t structure. The buffer must be at least
625  * sizeof(siginfo_t) bytes. The return value of read(2) is the total number of
626  * bytes read.
627  *
628  * @li poll(2), select(2) (and similar) The file descriptor is readable (the
629  * select(2) readfds argument; the poll(2) POLLIN flag) if the child has exited
630  * or signalled via SIGCHLD.
631  *
632  * @li close(2) When the file descriptor is no longer required it should be closed.
633  */
forkfd(int flags,pid_t * ppid)634 int forkfd(int flags, pid_t *ppid)
635 {
636     Header *header;
637     ProcessInfo *info;
638     pid_t pid;
639     int fd = -1;
640     int death_pipe[2];
641     int sync_pipe[2];
642     int ret;
643 #ifdef __linux__
644     int efd;
645 #endif
646 
647     if (disable_fork_fallback())
648         flags &= ~FFD_USE_FORK;
649 
650     if ((flags & FFD_USE_FORK) == 0) {
651         fd = system_forkfd(flags, ppid, &ret);
652         if (ret || disable_fork_fallback())
653             return fd;
654     }
655 
656     (void) pthread_once(&forkfd_initialization, forkfd_initialize);
657 
658     info = allocateInfo(&header);
659     if (info == NULL) {
660         errno = ENOMEM;
661         return -1;
662     }
663 
664     /* create the pipes before we fork */
665     if (create_pipe(death_pipe, flags) == -1)
666         goto err_free; /* failed to create the pipes, pass errno */
667 
668 #ifdef HAVE_EVENTFD
669     /* try using an eventfd, which consumes less resources */
670     efd = eventfd(0, EFD_CLOEXEC);
671     if (efd == -1)
672 #endif
673     {
674         /* try a pipe */
675         if (create_pipe(sync_pipe, FFD_CLOEXEC) == -1) {
676             /* failed both at eventfd and pipe; fail and pass errno */
677             goto err_close;
678         }
679     }
680 
681     /* now fork */
682     pid = fork();
683     if (pid == -1)
684         goto err_close2; /* failed to fork, pass errno */
685     if (ppid)
686         *ppid = pid;
687 
688     /*
689      * We need to store the child's PID in the info structure, so
690      * the SIGCHLD handler knows that this child is present and it
691      * knows the writing end of the pipe to pass information on.
692      * However, the child process could exit before we stored the
693      * information (or the handler could run for other children exiting).
694      * We prevent that from happening by blocking the child process in
695      * a read(2) until we're finished storing the information.
696      */
697     if (pid == 0) {
698         /* this is the child process */
699         /* first, wait for the all clear */
700 #ifdef HAVE_EVENTFD
701         if (efd != -1) {
702             eventfd_t val64;
703             EINTR_LOOP(ret, eventfd_read(efd, &val64));
704             EINTR_LOOP(ret, close(efd));
705         } else
706 #endif
707         {
708             char c;
709             EINTR_LOOP(ret, close(sync_pipe[1]));
710             EINTR_LOOP(ret, read(sync_pipe[0], &c, sizeof c));
711             EINTR_LOOP(ret, close(sync_pipe[0]));
712         }
713 
714         /* now close the pipes and return to the caller */
715         EINTR_LOOP(ret, close(death_pipe[0]));
716         EINTR_LOOP(ret, close(death_pipe[1]));
717         fd = FFD_CHILD_PROCESS;
718     } else {
719         /* parent process */
720         info->deathPipe = death_pipe[1];
721         fd = death_pipe[0];
722         ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
723 
724         /* release the child */
725 #ifdef HAVE_EVENTFD
726         if (efd != -1) {
727             eventfd_t val64 = 42;
728             EINTR_LOOP(ret, eventfd_write(efd, val64));
729             EINTR_LOOP(ret, close(efd));
730         } else
731 #endif
732         {
733             /*
734              * Usually, closing would be enough to make read(2) return and the child process
735              * continue. We need to write here: another thread could be calling forkfd at the
736              * same time, which means auxpipe[1] might be open in another child process.
737              */
738             EINTR_LOOP(ret, close(sync_pipe[0]));
739             EINTR_LOOP(ret, write(sync_pipe[1], "", 1));
740             EINTR_LOOP(ret, close(sync_pipe[1]));
741         }
742     }
743 
744     return fd;
745 
746 err_close2:
747 #ifdef HAVE_EVENTFD
748     if (efd != -1) {
749         EINTR_LOOP(ret, close(efd));
750     } else
751 #endif
752     {
753         EINTR_LOOP(ret, close(sync_pipe[0]));
754         EINTR_LOOP(ret, close(sync_pipe[1]));
755     }
756 err_close:
757     EINTR_LOOP(ret, close(death_pipe[0]));
758     EINTR_LOOP(ret, close(death_pipe[1]));
759 err_free:
760     /* free the info pointer */
761     freeInfo(header, info);
762     return -1;
763 }
764 #endif // FORKFD_NO_FORKFD
765 
766 #if _POSIX_SPAWN > 0 && !defined(FORKFD_NO_SPAWNFD)
spawnfd(int flags,pid_t * ppid,const char * path,const posix_spawn_file_actions_t * file_actions,posix_spawnattr_t * attrp,char * const argv[],char * const envp[])767 int spawnfd(int flags, pid_t *ppid, const char *path, const posix_spawn_file_actions_t *file_actions,
768             posix_spawnattr_t *attrp, char *const argv[], char *const envp[])
769 {
770     Header *header;
771     ProcessInfo *info;
772     struct pipe_payload payload;
773     pid_t pid;
774     int death_pipe[2];
775     int ret = -1;
776     /* we can only do work if we have a way to start the child in stopped mode;
777      * otherwise, we have a major race condition. */
778 
779     assert(!system_has_forkfd());
780 
781     (void) pthread_once(&forkfd_initialization, forkfd_initialize);
782 
783     info = allocateInfo(&header);
784     if (info == NULL) {
785         errno = ENOMEM;
786         goto out;
787     }
788 
789     /* create the pipe before we spawn */
790     if (create_pipe(death_pipe, flags) == -1)
791         goto err_free; /* failed to create the pipes, pass errno */
792 
793     /* start the process */
794     if (flags & FFD_SPAWN_SEARCH_PATH) {
795         /* use posix_spawnp */
796         if (posix_spawnp(&pid, path, file_actions, attrp, argv, envp) != 0)
797             goto err_close;
798     } else {
799         if (posix_spawn(&pid, path, file_actions, attrp, argv, envp) != 0)
800             goto err_close;
801     }
802 
803     if (ppid)
804         *ppid = pid;
805 
806     /* Store the child's PID in the info structure.
807      */
808     info->deathPipe = death_pipe[1];
809     ffd_atomic_store(&info->pid, pid, FFD_ATOMIC_RELEASE);
810 
811     /* check if the child has already exited */
812     if (tryReaping(pid, &payload))
813         notifyAndFreeInfo(header, info, &payload);
814 
815     ret = death_pipe[0];
816     return ret;
817 
818 err_close:
819     EINTR_LOOP(ret, close(death_pipe[0]));
820     EINTR_LOOP(ret, close(death_pipe[1]));
821 
822 err_free:
823     /* free the info pointer */
824     freeInfo(header, info);
825 
826 out:
827     return -1;
828 }
829 #endif // _POSIX_SPAWN && !FORKFD_NO_SPAWNFD
830 
forkfd_wait4(int ffd,struct forkfd_info * info,int options,struct rusage * rusage)831 int forkfd_wait4(int ffd, struct forkfd_info *info, int options, struct rusage *rusage)
832 {
833     struct pipe_payload payload;
834     int ret;
835 
836     if (system_has_forkfd()) {
837         /* if this is one of our pipes, not a procdesc/pidfd, we'll get an EBADF */
838         ret = system_forkfd_wait(ffd, info, options, rusage);
839         if (disable_fork_fallback() || ret != -1 || errno != EBADF)
840             return ret;
841     }
842 
843     ret = read(ffd, &payload, sizeof(payload));
844     if (ret == -1)
845         return ret;     /* pass errno, probably EINTR, EBADF or EWOULDBLOCK */
846 
847     assert(ret == sizeof(payload));
848     if (info)
849         *info = payload.info;
850     if (rusage)
851         *rusage = payload.rusage;
852 
853     return 0;           /* success */
854 }
855 
856 
forkfd_close(int ffd)857 int forkfd_close(int ffd)
858 {
859     return close(ffd);
860 }
861 
862 #if defined(__FreeBSD__) && __FreeBSD__ >= 9
863 #  include "forkfd_freebsd.c"
864 #elif defined(__linux__)
865 #  include "forkfd_linux.c"
866 #else
system_has_forkfd()867 int system_has_forkfd()
868 {
869     return 0;
870 }
871 
system_forkfd(int flags,pid_t * ppid,int * system)872 int system_forkfd(int flags, pid_t *ppid, int *system)
873 {
874     (void)flags;
875     (void)ppid;
876     *system = 0;
877     return -1;
878 }
879 
system_forkfd_wait(int ffd,struct forkfd_info * info,int options,struct rusage * rusage)880 int system_forkfd_wait(int ffd, struct forkfd_info *info, int options, struct rusage *rusage)
881 {
882     (void)ffd;
883     (void)info;
884     (void)options;
885     (void)rusage;
886     return -1;
887 }
888 #endif
889