1 /****************************************************************************
2 **
3 ** Copyright (C) 2020 Intel Corporation.
4 **
5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
6 ** of this software and associated documentation files (the "Software"), to deal
7 ** in the Software without restriction, including without limitation the rights
8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 ** copies of the Software, and to permit persons to whom the Software is
10 ** furnished to do so, subject to the following conditions:
11 **
12 ** The above copyright notice and this permission notice shall be included in
13 ** all copies or substantial portions of the Software.
14 **
15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 ** THE SOFTWARE.
22 **
23 ****************************************************************************/
24 
25 #ifndef _GNU_SOURCE
26 #  define _GNU_SOURCE
27 #endif
28 
29 #include "forkfd.h"
30 
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <limits.h>
34 #include <sched.h>
35 #include <signal.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <sys/resource.h>
40 #include <sys/syscall.h>
41 #include <sys/types.h>
42 #include <sys/wait.h>
43 #include <unistd.h>
44 
45 #include "forkfd_atomic.h"
46 
47 #ifndef CLONE_PIDFD
48 #  define CLONE_PIDFD   0x00001000
49 #endif
50 #ifndef P_PIDFD
51 #  define P_PIDFD       3
52 #endif
53 
54 // in forkfd.c
55 static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions);
56 static void convertStatusToForkfdInfo(int status, struct forkfd_info *info);
57 
58 static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0);
59 
sys_waitid(int which,int pid_or_pidfd,siginfo_t * infop,int options,struct rusage * ru)60 static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options,
61                       struct rusage *ru)
62 {
63     /* use the waitid raw system call, which has an extra parameter that glibc
64      * doesn't offer to us */
65     return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru);
66 }
67 
sys_clone(unsigned long cloneflags,int * ptid)68 static int sys_clone(unsigned long cloneflags, int *ptid)
69 {
70     void *child_stack = NULL;
71     int *ctid = NULL;
72     unsigned long newtls = 0;
73 #if defined(__NR_clone2)
74     size_t stack_size = 0;
75     return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls);
76 #elif defined(__cris__) || defined(__s390__)
77     /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */
78     return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid);
79 #elif defined(__microblaze__)
80     /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */
81     size_t stack_size = 0;
82     return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid);
83 #elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
84     defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \
85     defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__) || defined(__riscv)
86     /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures,
87      * but since both values are 0, there's no harm. */
88     return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls);
89 #else
90     (void) child_stack;
91     (void) ctid;
92     (void) newtls;
93     errno = ENOSYS;
94     return -1;
95 #endif
96 }
97 
detect_clone_pidfd_support()98 static int detect_clone_pidfd_support()
99 {
100     /*
101      * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps:
102      * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call
103      * - Linux 5.2 added pidfd_send_signal(2)
104      * - Linux 5.3 added support for poll(2) on pidfds
105      * - Linux 5.3 added clone3(2)
106      * - Linux 5.4 added P_PIDFD support in waitid(2)
107      *
108      * We need CLONE_PIDFD and the poll(2) support. We could emulate the
109      * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works
110      * in Linux 5.2, but without poll(2), we can't guarantee the functionality
111      * anyway.
112      *
113      * So we detect by trying to waitid(2) on a positive file descriptor that
114      * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will
115      * return EBADF. If it isn't supported, it returns EINVAL (as it would for
116      * a negative file descriptor). This will succeed on Linux 5.4.
117      *
118      * We could have instead detected by the existence of the clone3(2) system
119      * call, but for that we would have needed to wait for __NR_clone3 to show
120      * up on the libcs. We choose to go via the waitid(2) route, which requires
121      * platform-independent constants only. It would have simplified the
122      * sys_clone() mess above...
123      */
124 
125     sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL);
126     return errno == EBADF ? 1 : -1;
127 }
128 
system_has_forkfd()129 int system_has_forkfd()
130 {
131     return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0;
132 }
133 
system_forkfd(int flags,pid_t * ppid,int * system)134 int system_forkfd(int flags, pid_t *ppid, int *system)
135 {
136     pid_t pid;
137     int pidfd;
138 
139     int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED);
140     if (state == 0) {
141         state = detect_clone_pidfd_support();
142         ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED);
143     }
144     if (state < 0) {
145         *system = 0;
146         return state;
147     }
148 
149     *system = 1;
150     unsigned long cloneflags = CLONE_PIDFD | SIGCHLD;
151     pid = sys_clone(cloneflags, &pidfd);
152     if (ppid)
153         *ppid = pid;
154 
155     if (pid == 0) {
156         /* Child process */
157         return FFD_CHILD_PROCESS;
158     }
159 
160     /* parent process */
161     if ((flags & FFD_CLOEXEC) == 0) {
162         /* pidfd defaults to O_CLOEXEC */
163         fcntl(pidfd, F_SETFD, 0);
164     }
165     if (flags & FFD_NONBLOCK)
166         fcntl(pidfd, F_SETFL, fcntl(pidfd, F_GETFL) | O_NONBLOCK);
167     return pidfd;
168 }
169 
system_forkfd_wait(int ffd,struct forkfd_info * info,int ffdoptions,struct rusage * rusage)170 int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdoptions, struct rusage *rusage)
171 {
172     siginfo_t si;
173     int ret;
174     int options = convertForkfdWaitFlagsToWaitFlags(ffdoptions);
175 
176     if ((options & WNOHANG) == 0) {
177         /* check if the file descriptor is non-blocking */
178         ret = fcntl(ffd, F_GETFL);
179         if (ret == -1)
180             return ret;
181         if (ret & O_NONBLOCK)
182             options |= WNOHANG;
183     }
184 
185     ret = sys_waitid(P_PIDFD, ffd, &si, options, rusage);
186     if (ret == -1 && errno == ECHILD) {
187         errno = EWOULDBLOCK;
188     } else if (ret == 0 && info) {
189         info->code = si.si_code;
190         info->status = si.si_status;
191     }
192     return ret;
193 }
194