1 // SPDX-License-Identifier: GPL-2.0
2 
3 /*
4  * Based on Christian Brauner's clone3() example.
5  * These tests are assuming to be running in the host's
6  * PID namespace.
7  */
8 
9 #define _GNU_SOURCE
10 #include <errno.h>
11 #include <linux/types.h>
12 #include <linux/sched.h>
13 #include <stdio.h>
14 #include <stdlib.h>
15 #include <stdbool.h>
16 #include <sys/syscall.h>
17 #include <sys/types.h>
18 #include <sys/un.h>
19 #include <sys/wait.h>
20 #include <unistd.h>
21 #include <sched.h>
22 
23 #include "../kselftest.h"
24 #include "clone3_selftests.h"
25 
26 #ifndef MAX_PID_NS_LEVEL
27 #define MAX_PID_NS_LEVEL 32
28 #endif
29 
30 static int pipe_1[2];
31 static int pipe_2[2];
32 
child_exit(int ret)33 static void child_exit(int ret)
34 {
35 	fflush(stdout);
36 	fflush(stderr);
37 	_exit(ret);
38 }
39 
call_clone3_set_tid(pid_t * set_tid,size_t set_tid_size,int flags,int expected_pid,bool wait_for_it)40 static int call_clone3_set_tid(pid_t *set_tid,
41 			       size_t set_tid_size,
42 			       int flags,
43 			       int expected_pid,
44 			       bool wait_for_it)
45 {
46 	int status;
47 	pid_t pid = -1;
48 
49 	struct __clone_args args = {
50 		.flags = flags,
51 		.exit_signal = SIGCHLD,
52 		.set_tid = ptr_to_u64(set_tid),
53 		.set_tid_size = set_tid_size,
54 	};
55 
56 	pid = sys_clone3(&args, sizeof(args));
57 	if (pid < 0) {
58 		ksft_print_msg("%s - Failed to create new process\n",
59 			       strerror(errno));
60 		return -errno;
61 	}
62 
63 	if (pid == 0) {
64 		int ret;
65 		char tmp = 0;
66 		int exit_code = EXIT_SUCCESS;
67 
68 		ksft_print_msg("I am the child, my PID is %d (expected %d)\n",
69 			       getpid(), set_tid[0]);
70 		if (wait_for_it) {
71 			ksft_print_msg("[%d] Child is ready and waiting\n",
72 				       getpid());
73 
74 			/* Signal the parent that the child is ready */
75 			close(pipe_1[0]);
76 			ret = write(pipe_1[1], &tmp, 1);
77 			if (ret != 1) {
78 				ksft_print_msg(
79 					"Writing to pipe returned %d", ret);
80 				exit_code = EXIT_FAILURE;
81 			}
82 			close(pipe_1[1]);
83 			close(pipe_2[1]);
84 			ret = read(pipe_2[0], &tmp, 1);
85 			if (ret != 1) {
86 				ksft_print_msg(
87 					"Reading from pipe returned %d", ret);
88 				exit_code = EXIT_FAILURE;
89 			}
90 			close(pipe_2[0]);
91 		}
92 
93 		if (set_tid[0] != getpid())
94 			child_exit(EXIT_FAILURE);
95 		child_exit(exit_code);
96 	}
97 
98 	if (expected_pid == 0 || expected_pid == pid) {
99 		ksft_print_msg("I am the parent (%d). My child's pid is %d\n",
100 			       getpid(), pid);
101 	} else {
102 		ksft_print_msg(
103 			"Expected child pid %d does not match actual pid %d\n",
104 			expected_pid, pid);
105 		return -1;
106 	}
107 
108 	if (waitpid(pid, &status, 0) < 0) {
109 		ksft_print_msg("Child returned %s\n", strerror(errno));
110 		return -errno;
111 	}
112 
113 	if (!WIFEXITED(status))
114 		return -1;
115 
116 	return WEXITSTATUS(status);
117 }
118 
test_clone3_set_tid(pid_t * set_tid,size_t set_tid_size,int flags,int expected,int expected_pid,bool wait_for_it)119 static void test_clone3_set_tid(pid_t *set_tid,
120 				size_t set_tid_size,
121 				int flags,
122 				int expected,
123 				int expected_pid,
124 				bool wait_for_it)
125 {
126 	int ret;
127 
128 	ksft_print_msg(
129 		"[%d] Trying clone3() with CLONE_SET_TID to %d and 0x%x\n",
130 		getpid(), set_tid[0], flags);
131 	ret = call_clone3_set_tid(set_tid, set_tid_size, flags, expected_pid,
132 				  wait_for_it);
133 	ksft_print_msg(
134 		"[%d] clone3() with CLONE_SET_TID %d says :%d - expected %d\n",
135 		getpid(), set_tid[0], ret, expected);
136 	if (ret != expected)
137 		ksft_test_result_fail(
138 			"[%d] Result (%d) is different than expected (%d)\n",
139 			getpid(), ret, expected);
140 	else
141 		ksft_test_result_pass(
142 			"[%d] Result (%d) matches expectation (%d)\n",
143 			getpid(), ret, expected);
144 }
main(int argc,char * argv[])145 int main(int argc, char *argv[])
146 {
147 	FILE *f;
148 	char buf;
149 	char *line;
150 	int status;
151 	int ret = -1;
152 	size_t len = 0;
153 	int pid_max = 0;
154 	uid_t uid = getuid();
155 	char proc_path[100] = {0};
156 	pid_t pid, ns1, ns2, ns3, ns_pid;
157 	pid_t set_tid[MAX_PID_NS_LEVEL * 2];
158 
159 	ksft_print_header();
160 	ksft_set_plan(29);
161 	test_clone3_supported();
162 
163 	if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
164 		ksft_exit_fail_msg("pipe() failed\n");
165 
166 	f = fopen("/proc/sys/kernel/pid_max", "r");
167 	if (f == NULL)
168 		ksft_exit_fail_msg(
169 			"%s - Could not open /proc/sys/kernel/pid_max\n",
170 			strerror(errno));
171 	fscanf(f, "%d", &pid_max);
172 	fclose(f);
173 	ksft_print_msg("/proc/sys/kernel/pid_max %d\n", pid_max);
174 
175 	/* Try invalid settings */
176 	memset(&set_tid, 0, sizeof(set_tid));
177 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
178 
179 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
180 
181 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
182 			-EINVAL, 0, 0);
183 
184 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
185 
186 	/*
187 	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
188 	 * nested PID namespace.
189 	 */
190 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
191 
192 	memset(&set_tid, 0xff, sizeof(set_tid));
193 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL + 1, 0, -EINVAL, 0, 0);
194 
195 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2, 0, -EINVAL, 0, 0);
196 
197 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 2 + 1, 0,
198 			-EINVAL, 0, 0);
199 
200 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL * 42, 0, -EINVAL, 0, 0);
201 
202 	/*
203 	 * This can actually work if this test running in a MAX_PID_NS_LEVEL - 1
204 	 * nested PID namespace.
205 	 */
206 	test_clone3_set_tid(set_tid, MAX_PID_NS_LEVEL - 1, 0, -EINVAL, 0, 0);
207 
208 	memset(&set_tid, 0, sizeof(set_tid));
209 	/* Try with an invalid PID */
210 	set_tid[0] = 0;
211 	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
212 
213 	set_tid[0] = -1;
214 	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
215 
216 	/* Claim that the set_tid array actually contains 2 elements. */
217 	test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
218 
219 	/* Try it in a new PID namespace */
220 	if (uid == 0)
221 		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
222 	else
223 		ksft_test_result_skip("Clone3() with set_tid requires root\n");
224 
225 	/* Try with a valid PID (1) this should return -EEXIST. */
226 	set_tid[0] = 1;
227 	if (uid == 0)
228 		test_clone3_set_tid(set_tid, 1, 0, -EEXIST, 0, 0);
229 	else
230 		ksft_test_result_skip("Clone3() with set_tid requires root\n");
231 
232 	/* Try it in a new PID namespace */
233 	if (uid == 0)
234 		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, 0, 0, 0);
235 	else
236 		ksft_test_result_skip("Clone3() with set_tid requires root\n");
237 
238 	/* pid_max should fail everywhere */
239 	set_tid[0] = pid_max;
240 	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
241 
242 	if (uid == 0)
243 		test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
244 	else
245 		ksft_test_result_skip("Clone3() with set_tid requires root\n");
246 
247 	if (uid != 0) {
248 		/*
249 		 * All remaining tests require root. Tell the framework
250 		 * that all those tests are skipped as non-root.
251 		 */
252 		ksft_cnt.ksft_xskip += ksft_plan - ksft_test_num();
253 		goto out;
254 	}
255 
256 	/* Find the current active PID */
257 	pid = fork();
258 	if (pid == 0) {
259 		ksft_print_msg("Child has PID %d\n", getpid());
260 		child_exit(EXIT_SUCCESS);
261 	}
262 	if (waitpid(pid, &status, 0) < 0)
263 		ksft_exit_fail_msg("Waiting for child %d failed", pid);
264 
265 	/* After the child has finished, its PID should be free. */
266 	set_tid[0] = pid;
267 	test_clone3_set_tid(set_tid, 1, 0, 0, 0, 0);
268 
269 	/* This should fail as there is no PID 1 in that namespace */
270 	test_clone3_set_tid(set_tid, 1, CLONE_NEWPID, -EINVAL, 0, 0);
271 
272 	/*
273 	 * Creating a process with PID 1 in the newly created most nested
274 	 * PID namespace and PID 'pid' in the parent PID namespace. This
275 	 * needs to work.
276 	 */
277 	set_tid[0] = 1;
278 	set_tid[1] = pid;
279 	test_clone3_set_tid(set_tid, 2, CLONE_NEWPID, 0, pid, 0);
280 
281 	ksft_print_msg("unshare PID namespace\n");
282 	if (unshare(CLONE_NEWPID) == -1)
283 		ksft_exit_fail_msg("unshare(CLONE_NEWPID) failed: %s\n",
284 				strerror(errno));
285 
286 	set_tid[0] = pid;
287 
288 	/* This should fail as there is no PID 1 in that namespace */
289 	test_clone3_set_tid(set_tid, 1, 0, -EINVAL, 0, 0);
290 
291 	/* Let's create a PID 1 */
292 	ns_pid = fork();
293 	if (ns_pid == 0) {
294 		/*
295 		 * This and the next test cases check that all pid-s are
296 		 * released on error paths.
297 		 */
298 		set_tid[0] = 43;
299 		set_tid[1] = -1;
300 		test_clone3_set_tid(set_tid, 2, 0, -EINVAL, 0, 0);
301 
302 		set_tid[0] = 43;
303 		set_tid[1] = pid;
304 		test_clone3_set_tid(set_tid, 2, 0, 0, 43, 0);
305 
306 		ksft_print_msg("Child in PID namespace has PID %d\n", getpid());
307 		set_tid[0] = 2;
308 		test_clone3_set_tid(set_tid, 1, 0, 0, 2, 0);
309 
310 		set_tid[0] = 1;
311 		set_tid[1] = -1;
312 		set_tid[2] = pid;
313 		/* This should fail as there is invalid PID at level '1'. */
314 		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, -EINVAL, 0, 0);
315 
316 		set_tid[0] = 1;
317 		set_tid[1] = 42;
318 		set_tid[2] = pid;
319 		/*
320 		 * This should fail as there are not enough active PID
321 		 * namespaces. Again assuming this is running in the host's
322 		 * PID namespace. Not yet nested.
323 		 */
324 		test_clone3_set_tid(set_tid, 4, CLONE_NEWPID, -EINVAL, 0, 0);
325 
326 		/*
327 		 * This should work and from the parent we should see
328 		 * something like 'NSpid:	pid	42	1'.
329 		 */
330 		test_clone3_set_tid(set_tid, 3, CLONE_NEWPID, 0, 42, true);
331 
332 		child_exit(ksft_cnt.ksft_fail);
333 	}
334 
335 	close(pipe_1[1]);
336 	close(pipe_2[0]);
337 	while (read(pipe_1[0], &buf, 1) > 0) {
338 		ksft_print_msg("[%d] Child is ready and waiting\n", getpid());
339 		break;
340 	}
341 
342 	snprintf(proc_path, sizeof(proc_path), "/proc/%d/status", pid);
343 	f = fopen(proc_path, "r");
344 	if (f == NULL)
345 		ksft_exit_fail_msg(
346 			"%s - Could not open %s\n",
347 			strerror(errno), proc_path);
348 
349 	while (getline(&line, &len, f) != -1) {
350 		if (strstr(line, "NSpid")) {
351 			int i;
352 
353 			/* Verify that all generated PIDs are as expected. */
354 			i = sscanf(line, "NSpid:\t%d\t%d\t%d",
355 				   &ns3, &ns2, &ns1);
356 			if (i != 3) {
357 				ksft_print_msg(
358 					"Unexpected 'NSPid:' entry: %s",
359 					line);
360 				ns1 = ns2 = ns3 = 0;
361 			}
362 			break;
363 		}
364 	}
365 	fclose(f);
366 	free(line);
367 	close(pipe_2[0]);
368 
369 	/* Tell the clone3()'d child to finish. */
370 	write(pipe_2[1], &buf, 1);
371 	close(pipe_2[1]);
372 
373 	if (waitpid(ns_pid, &status, 0) < 0) {
374 		ksft_print_msg("Child returned %s\n", strerror(errno));
375 		ret = -errno;
376 		goto out;
377 	}
378 
379 	if (!WIFEXITED(status))
380 		ksft_test_result_fail("Child error\n");
381 
382 	ksft_cnt.ksft_pass += 6 - (ksft_cnt.ksft_fail - WEXITSTATUS(status));
383 	ksft_cnt.ksft_fail = WEXITSTATUS(status);
384 
385 	if (ns3 == pid && ns2 == 42 && ns1 == 1)
386 		ksft_test_result_pass(
387 			"PIDs in all namespaces as expected (%d,%d,%d)\n",
388 			ns3, ns2, ns1);
389 	else
390 		ksft_test_result_fail(
391 			"PIDs in all namespaces not as expected (%d,%d,%d)\n",
392 			ns3, ns2, ns1);
393 out:
394 	ret = 0;
395 
396 	return !ret ? ksft_exit_pass() : ksft_exit_fail();
397 }
398