1 /****************************************************************************
2 **
3 ** Copyright (C) 2020 Intel Corporation.
4 **
5 ** Permission is hereby granted, free of charge, to any person obtaining a copy
6 ** of this software and associated documentation files (the "Software"), to deal
7 ** in the Software without restriction, including without limitation the rights
8 ** to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 ** copies of the Software, and to permit persons to whom the Software is
10 ** furnished to do so, subject to the following conditions:
11 **
12 ** The above copyright notice and this permission notice shall be included in
13 ** all copies or substantial portions of the Software.
14 **
15 ** THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 ** IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 ** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 ** AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 ** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 ** OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 ** THE SOFTWARE.
22 **
23 ****************************************************************************/
24
25 #ifndef _GNU_SOURCE
26 # define _GNU_SOURCE
27 #endif
28
29 #include "forkfd.h"
30
31 #include <errno.h>
32 #include <fcntl.h>
33 #include <limits.h>
34 #include <sched.h>
35 #include <signal.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <sys/resource.h>
40 #include <sys/syscall.h>
41 #include <sys/types.h>
42 #include <sys/wait.h>
43 #include <unistd.h>
44
45 #include "forkfd_atomic.h"
46
47 #ifndef CLONE_PIDFD
48 # define CLONE_PIDFD 0x00001000
49 #endif
50 #ifndef P_PIDFD
51 # define P_PIDFD 3
52 #endif
53
54 // in forkfd.c
55 static int convertForkfdWaitFlagsToWaitFlags(int ffdoptions);
56 static void convertStatusToForkfdInfo(int status, struct forkfd_info *info);
57
58 static ffd_atomic_int system_forkfd_state = FFD_ATOMIC_INIT(0);
59
sys_waitid(int which,int pid_or_pidfd,siginfo_t * infop,int options,struct rusage * ru)60 static int sys_waitid(int which, int pid_or_pidfd, siginfo_t *infop, int options,
61 struct rusage *ru)
62 {
63 /* use the waitid raw system call, which has an extra parameter that glibc
64 * doesn't offer to us */
65 return syscall(__NR_waitid, which, pid_or_pidfd, infop, options, ru);
66 }
67
sys_clone(unsigned long cloneflags,int * ptid)68 static int sys_clone(unsigned long cloneflags, int *ptid)
69 {
70 void *child_stack = NULL;
71 int *ctid = NULL;
72 unsigned long newtls = 0;
73 #if defined(__NR_clone2)
74 size_t stack_size = 0;
75 return syscall(__NR_clone2, cloneflags, child_stack, stack_size, ptid, ctid, newtls);
76 #elif defined(__cris__) || defined(__s390__)
77 /* a.k.a., CONFIG_CLONE_BACKWARDS2 architectures */
78 return syscall(__NR_clone, child_stack, cloneflags, ptid, newtls, ctid);
79 #elif defined(__microblaze__)
80 /* a.k.a., CONFIG_CLONE_BACKWARDS3 architectures */
81 size_t stack_size = 0;
82 return syscall(__NR_clone, cloneflags, child_stack, stack_size, ptid, newtls, ctid);
83 #elif defined(__arc__) || defined(__arm__) || defined(__aarch64__) || defined(__mips__) || \
84 defined(__nds32__) || defined(__hppa__) || defined(__powerpc__) || defined(__i386__) || \
85 defined(__x86_64__) || defined(__xtensa__) || defined(__alpha__) || defined(__riscv)
86 /* ctid and newtls are inverted on CONFIG_CLONE_BACKWARDS architectures,
87 * but since both values are 0, there's no harm. */
88 return syscall(__NR_clone, cloneflags, child_stack, ptid, ctid, newtls);
89 #else
90 (void) child_stack;
91 (void) ctid;
92 (void) newtls;
93 errno = ENOSYS;
94 return -1;
95 #endif
96 }
97
detect_clone_pidfd_support()98 static int detect_clone_pidfd_support()
99 {
100 /*
101 * Detect support for CLONE_PIDFD and P_PIDFD. Support was added in steps:
102 * - Linux 5.2 added CLONE_PIDFD support in clone(2) system call
103 * - Linux 5.2 added pidfd_send_signal(2)
104 * - Linux 5.3 added support for poll(2) on pidfds
105 * - Linux 5.3 added clone3(2)
106 * - Linux 5.4 added P_PIDFD support in waitid(2)
107 *
108 * We need CLONE_PIDFD and the poll(2) support. We could emulate the
109 * P_PIDFD support by reading the PID from /proc/self/fdinfo/n, which works
110 * in Linux 5.2, but without poll(2), we can't guarantee the functionality
111 * anyway.
112 *
113 * So we detect by trying to waitid(2) on a positive file descriptor that
114 * is definitely closed (INT_MAX). If P_PIDFD is supported, waitid(2) will
115 * return EBADF. If it isn't supported, it returns EINVAL (as it would for
116 * a negative file descriptor). This will succeed on Linux 5.4.
117 *
118 * We could have instead detected by the existence of the clone3(2) system
119 * call, but for that we would have needed to wait for __NR_clone3 to show
120 * up on the libcs. We choose to go via the waitid(2) route, which requires
121 * platform-independent constants only. It would have simplified the
122 * sys_clone() mess above...
123 */
124
125 sys_waitid(P_PIDFD, INT_MAX, NULL, WEXITED|WNOHANG, NULL);
126 return errno == EBADF ? 1 : -1;
127 }
128
system_has_forkfd()129 int system_has_forkfd()
130 {
131 return ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED) > 0;
132 }
133
system_forkfd(int flags,pid_t * ppid,int * system)134 int system_forkfd(int flags, pid_t *ppid, int *system)
135 {
136 pid_t pid;
137 int pidfd;
138
139 int state = ffd_atomic_load(&system_forkfd_state, FFD_ATOMIC_RELAXED);
140 if (state == 0) {
141 state = detect_clone_pidfd_support();
142 ffd_atomic_store(&system_forkfd_state, state, FFD_ATOMIC_RELAXED);
143 }
144 if (state < 0) {
145 *system = 0;
146 return state;
147 }
148
149 *system = 1;
150 unsigned long cloneflags = CLONE_PIDFD | SIGCHLD;
151 pid = sys_clone(cloneflags, &pidfd);
152 if (ppid)
153 *ppid = pid;
154
155 if (pid == 0) {
156 /* Child process */
157 return FFD_CHILD_PROCESS;
158 }
159
160 /* parent process */
161 if ((flags & FFD_CLOEXEC) == 0) {
162 /* pidfd defaults to O_CLOEXEC */
163 fcntl(pidfd, F_SETFD, 0);
164 }
165 if (flags & FFD_NONBLOCK)
166 fcntl(pidfd, F_SETFL, fcntl(pidfd, F_GETFL) | O_NONBLOCK);
167 return pidfd;
168 }
169
system_forkfd_wait(int ffd,struct forkfd_info * info,int ffdoptions,struct rusage * rusage)170 int system_forkfd_wait(int ffd, struct forkfd_info *info, int ffdoptions, struct rusage *rusage)
171 {
172 siginfo_t si;
173 int ret;
174 int options = convertForkfdWaitFlagsToWaitFlags(ffdoptions);
175
176 if ((options & WNOHANG) == 0) {
177 /* check if the file descriptor is non-blocking */
178 ret = fcntl(ffd, F_GETFL);
179 if (ret == -1)
180 return ret;
181 if (ret & O_NONBLOCK)
182 options |= WNOHANG;
183 }
184
185 ret = sys_waitid(P_PIDFD, ffd, &si, options, rusage);
186 if (ret == -1 && errno == ECHILD) {
187 errno = EWOULDBLOCK;
188 } else if (ret == 0 && info) {
189 info->code = si.si_code;
190 info->status = si.si_status;
191 }
192 return ret;
193 }
194