xref: /freebsd/sys/kern/kern_procctl.c (revision 81ad6265)
1 /*-
2  * Copyright (c) 2014 John Baldwin
3  * Copyright (c) 2014, 2016 The FreeBSD Foundation
4  *
5  * Portions of this software were developed by Konstantin Belousov
6  * under sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/_unrhdr.h>
35 #include <sys/systm.h>
36 #include <sys/capsicum.h>
37 #include <sys/lock.h>
38 #include <sys/mman.h>
39 #include <sys/mutex.h>
40 #include <sys/priv.h>
41 #include <sys/proc.h>
42 #include <sys/procctl.h>
43 #include <sys/sx.h>
44 #include <sys/syscallsubr.h>
45 #include <sys/sysproto.h>
46 #include <sys/taskqueue.h>
47 #include <sys/wait.h>
48 
49 #include <vm/vm.h>
50 #include <vm/pmap.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_extern.h>
53 
54 static int
55 protect_setchild(struct thread *td, struct proc *p, int flags)
56 {
57 
58 	PROC_LOCK_ASSERT(p, MA_OWNED);
59 	if (p->p_flag & P_SYSTEM || p_cansched(td, p) != 0)
60 		return (0);
61 	if (flags & PPROT_SET) {
62 		p->p_flag |= P_PROTECTED;
63 		if (flags & PPROT_INHERIT)
64 			p->p_flag2 |= P2_INHERIT_PROTECTED;
65 	} else {
66 		p->p_flag &= ~P_PROTECTED;
67 		p->p_flag2 &= ~P2_INHERIT_PROTECTED;
68 	}
69 	return (1);
70 }
71 
72 static int
73 protect_setchildren(struct thread *td, struct proc *top, int flags)
74 {
75 	struct proc *p;
76 	int ret;
77 
78 	p = top;
79 	ret = 0;
80 	sx_assert(&proctree_lock, SX_LOCKED);
81 	for (;;) {
82 		ret |= protect_setchild(td, p, flags);
83 		PROC_UNLOCK(p);
84 		/*
85 		 * If this process has children, descend to them next,
86 		 * otherwise do any siblings, and if done with this level,
87 		 * follow back up the tree (but not past top).
88 		 */
89 		if (!LIST_EMPTY(&p->p_children))
90 			p = LIST_FIRST(&p->p_children);
91 		else for (;;) {
92 			if (p == top) {
93 				PROC_LOCK(p);
94 				return (ret);
95 			}
96 			if (LIST_NEXT(p, p_sibling)) {
97 				p = LIST_NEXT(p, p_sibling);
98 				break;
99 			}
100 			p = p->p_pptr;
101 		}
102 		PROC_LOCK(p);
103 	}
104 }
105 
106 static int
107 protect_set(struct thread *td, struct proc *p, void *data)
108 {
109 	int error, flags, ret;
110 
111 	flags = *(int *)data;
112 	switch (PPROT_OP(flags)) {
113 	case PPROT_SET:
114 	case PPROT_CLEAR:
115 		break;
116 	default:
117 		return (EINVAL);
118 	}
119 
120 	if ((PPROT_FLAGS(flags) & ~(PPROT_DESCEND | PPROT_INHERIT)) != 0)
121 		return (EINVAL);
122 
123 	error = priv_check(td, PRIV_VM_MADV_PROTECT);
124 	if (error)
125 		return (error);
126 
127 	if (flags & PPROT_DESCEND)
128 		ret = protect_setchildren(td, p, flags);
129 	else
130 		ret = protect_setchild(td, p, flags);
131 	if (ret == 0)
132 		return (EPERM);
133 	return (0);
134 }
135 
136 static int
137 reap_acquire(struct thread *td, struct proc *p, void *data __unused)
138 {
139 
140 	sx_assert(&proctree_lock, SX_XLOCKED);
141 	if (p != td->td_proc)
142 		return (EPERM);
143 	if ((p->p_treeflag & P_TREE_REAPER) != 0)
144 		return (EBUSY);
145 	p->p_treeflag |= P_TREE_REAPER;
146 	/*
147 	 * We do not reattach existing children and the whole tree
148 	 * under them to us, since p->p_reaper already seen them.
149 	 */
150 	return (0);
151 }
152 
153 static int
154 reap_release(struct thread *td, struct proc *p, void *data __unused)
155 {
156 
157 	sx_assert(&proctree_lock, SX_XLOCKED);
158 	if (p != td->td_proc)
159 		return (EPERM);
160 	if (p == initproc)
161 		return (EINVAL);
162 	if ((p->p_treeflag & P_TREE_REAPER) == 0)
163 		return (EINVAL);
164 	reaper_abandon_children(p, false);
165 	return (0);
166 }
167 
168 static int
169 reap_status(struct thread *td, struct proc *p, void *data)
170 {
171 	struct proc *reap, *p2, *first_p;
172 	struct procctl_reaper_status *rs;
173 
174 	rs = data;
175 	sx_assert(&proctree_lock, SX_LOCKED);
176 	if ((p->p_treeflag & P_TREE_REAPER) == 0) {
177 		reap = p->p_reaper;
178 	} else {
179 		reap = p;
180 		rs->rs_flags |= REAPER_STATUS_OWNED;
181 	}
182 	if (reap == initproc)
183 		rs->rs_flags |= REAPER_STATUS_REALINIT;
184 	rs->rs_reaper = reap->p_pid;
185 	rs->rs_descendants = 0;
186 	rs->rs_children = 0;
187 	if (!LIST_EMPTY(&reap->p_reaplist)) {
188 		first_p = LIST_FIRST(&reap->p_children);
189 		if (first_p == NULL)
190 			first_p = LIST_FIRST(&reap->p_reaplist);
191 		rs->rs_pid = first_p->p_pid;
192 		LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
193 			if (proc_realparent(p2) == reap)
194 				rs->rs_children++;
195 			rs->rs_descendants++;
196 		}
197 	} else {
198 		rs->rs_pid = -1;
199 	}
200 	return (0);
201 }
202 
203 static int
204 reap_getpids(struct thread *td, struct proc *p, void *data)
205 {
206 	struct proc *reap, *p2;
207 	struct procctl_reaper_pidinfo *pi, *pip;
208 	struct procctl_reaper_pids *rp;
209 	u_int i, n;
210 	int error;
211 
212 	rp = data;
213 	sx_assert(&proctree_lock, SX_LOCKED);
214 	PROC_UNLOCK(p);
215 	reap = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
216 	n = i = 0;
217 	error = 0;
218 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling)
219 		n++;
220 	sx_unlock(&proctree_lock);
221 	if (rp->rp_count < n)
222 		n = rp->rp_count;
223 	pi = malloc(n * sizeof(*pi), M_TEMP, M_WAITOK);
224 	sx_slock(&proctree_lock);
225 	LIST_FOREACH(p2, &reap->p_reaplist, p_reapsibling) {
226 		if (i == n)
227 			break;
228 		pip = &pi[i];
229 		bzero(pip, sizeof(*pip));
230 		pip->pi_pid = p2->p_pid;
231 		pip->pi_subtree = p2->p_reapsubtree;
232 		pip->pi_flags = REAPER_PIDINFO_VALID;
233 		if (proc_realparent(p2) == reap)
234 			pip->pi_flags |= REAPER_PIDINFO_CHILD;
235 		if ((p2->p_treeflag & P_TREE_REAPER) != 0)
236 			pip->pi_flags |= REAPER_PIDINFO_REAPER;
237 		i++;
238 	}
239 	sx_sunlock(&proctree_lock);
240 	error = copyout(pi, rp->rp_pids, i * sizeof(*pi));
241 	free(pi, M_TEMP);
242 	sx_slock(&proctree_lock);
243 	PROC_LOCK(p);
244 	return (error);
245 }
246 
247 struct reap_kill_proc_work {
248 	struct ucred *cr;
249 	struct proc *target;
250 	ksiginfo_t *ksi;
251 	struct procctl_reaper_kill *rk;
252 	int *error;
253 	struct task t;
254 };
255 
256 static void
257 reap_kill_proc_locked(struct reap_kill_proc_work *w)
258 {
259 	int error1;
260 	bool need_stop;
261 
262 	PROC_LOCK_ASSERT(w->target, MA_OWNED);
263 	PROC_ASSERT_HELD(w->target);
264 
265 	error1 = cr_cansignal(w->cr, w->target, w->rk->rk_sig);
266 	if (error1 != 0) {
267 		if (*w->error == ESRCH) {
268 			w->rk->rk_fpid = w->target->p_pid;
269 			*w->error = error1;
270 		}
271 		return;
272 	}
273 
274 	/*
275 	 * The need_stop indicates if the target process needs to be
276 	 * suspended before being signalled.  This is needed when we
277 	 * guarantee that all processes in subtree are signalled,
278 	 * avoiding the race with some process not yet fully linked
279 	 * into all structures during fork, ignored by iterator, and
280 	 * then escaping signalling.
281 	 *
282 	 * The thread cannot usefully stop itself anyway, and if other
283 	 * thread of the current process forks while the current
284 	 * thread signals the whole subtree, it is an application
285 	 * race.
286 	 */
287 	if ((w->target->p_flag & (P_KPROC | P_SYSTEM | P_STOPPED)) == 0)
288 		need_stop = thread_single(w->target, SINGLE_ALLPROC) == 0;
289 	else
290 		need_stop = false;
291 
292 	(void)pksignal(w->target, w->rk->rk_sig, w->ksi);
293 	w->rk->rk_killed++;
294 	*w->error = error1;
295 
296 	if (need_stop)
297 		thread_single_end(w->target, SINGLE_ALLPROC);
298 }
299 
300 static void
301 reap_kill_proc_work(void *arg, int pending __unused)
302 {
303 	struct reap_kill_proc_work *w;
304 
305 	w = arg;
306 	PROC_LOCK(w->target);
307 	if ((w->target->p_flag2 & P2_WEXIT) == 0)
308 		reap_kill_proc_locked(w);
309 	PROC_UNLOCK(w->target);
310 
311 	sx_xlock(&proctree_lock);
312 	w->target = NULL;
313 	wakeup(&w->target);
314 	sx_xunlock(&proctree_lock);
315 }
316 
317 struct reap_kill_tracker {
318 	struct proc *parent;
319 	TAILQ_ENTRY(reap_kill_tracker) link;
320 };
321 
322 TAILQ_HEAD(reap_kill_tracker_head, reap_kill_tracker);
323 
324 static void
325 reap_kill_sched(struct reap_kill_tracker_head *tracker, struct proc *p2)
326 {
327 	struct reap_kill_tracker *t;
328 
329 	PROC_LOCK(p2);
330 	if ((p2->p_flag2 & P2_WEXIT) != 0) {
331 		PROC_UNLOCK(p2);
332 		return;
333 	}
334 	_PHOLD_LITE(p2);
335 	PROC_UNLOCK(p2);
336 	t = malloc(sizeof(struct reap_kill_tracker), M_TEMP, M_WAITOK);
337 	t->parent = p2;
338 	TAILQ_INSERT_TAIL(tracker, t, link);
339 }
340 
341 static void
342 reap_kill_sched_free(struct reap_kill_tracker *t)
343 {
344 	PRELE(t->parent);
345 	free(t, M_TEMP);
346 }
347 
348 static void
349 reap_kill_children(struct thread *td, struct proc *reaper,
350     struct procctl_reaper_kill *rk, ksiginfo_t *ksi, int *error)
351 {
352 	struct proc *p2;
353 	int error1;
354 
355 	LIST_FOREACH(p2, &reaper->p_children, p_sibling) {
356 		PROC_LOCK(p2);
357 		if ((p2->p_flag2 & P2_WEXIT) == 0) {
358 			error1 = p_cansignal(td, p2, rk->rk_sig);
359 			if (error1 != 0) {
360 				if (*error == ESRCH) {
361 					rk->rk_fpid = p2->p_pid;
362 					*error = error1;
363 				}
364 
365 				/*
366 				 * Do not end the loop on error,
367 				 * signal everything we can.
368 				 */
369 			} else {
370 				(void)pksignal(p2, rk->rk_sig, ksi);
371 				rk->rk_killed++;
372 			}
373 		}
374 		PROC_UNLOCK(p2);
375 	}
376 }
377 
378 static bool
379 reap_kill_subtree_once(struct thread *td, struct proc *p, struct proc *reaper,
380     struct unrhdr *pids, struct reap_kill_proc_work *w)
381 {
382 	struct reap_kill_tracker_head tracker;
383 	struct reap_kill_tracker *t;
384 	struct proc *p2;
385 	int r, xlocked;
386 	bool res, st;
387 
388 	res = false;
389 	TAILQ_INIT(&tracker);
390 	reap_kill_sched(&tracker, reaper);
391 	while ((t = TAILQ_FIRST(&tracker)) != NULL) {
392 		TAILQ_REMOVE(&tracker, t, link);
393 
394 		/*
395 		 * Since reap_kill_proc() drops proctree_lock sx, it
396 		 * is possible that the tracked reaper is no longer.
397 		 * In this case the subtree is reparented to the new
398 		 * reaper, which should handle it.
399 		 */
400 		if ((t->parent->p_treeflag & P_TREE_REAPER) == 0) {
401 			reap_kill_sched_free(t);
402 			res = true;
403 			continue;
404 		}
405 
406 		LIST_FOREACH(p2, &t->parent->p_reaplist, p_reapsibling) {
407 			if (t->parent == reaper &&
408 			    (w->rk->rk_flags & REAPER_KILL_SUBTREE) != 0 &&
409 			    p2->p_reapsubtree != w->rk->rk_subtree)
410 				continue;
411 			if ((p2->p_treeflag & P_TREE_REAPER) != 0)
412 				reap_kill_sched(&tracker, p2);
413 			if (alloc_unr_specific(pids, p2->p_pid) != p2->p_pid)
414 				continue;
415 			if (p2 == td->td_proc) {
416 				if ((p2->p_flag & P_HADTHREADS) != 0 &&
417 				    (p2->p_flag2 & P2_WEXIT) == 0) {
418 					xlocked = sx_xlocked(&proctree_lock);
419 					sx_unlock(&proctree_lock);
420 					st = true;
421 				} else {
422 					st = false;
423 				}
424 				PROC_LOCK(p2);
425 				if (st)
426 					r = thread_single(p2, SINGLE_NO_EXIT);
427 				(void)pksignal(p2, w->rk->rk_sig, w->ksi);
428 				w->rk->rk_killed++;
429 				if (st && r == 0)
430 					thread_single_end(p2, SINGLE_NO_EXIT);
431 				PROC_UNLOCK(p2);
432 				if (st) {
433 					if (xlocked)
434 						sx_xlock(&proctree_lock);
435 					else
436 						sx_slock(&proctree_lock);
437 				}
438 			} else {
439 				PROC_LOCK(p2);
440 				if ((p2->p_flag2 & P2_WEXIT) == 0) {
441 					_PHOLD_LITE(p2);
442 					PROC_UNLOCK(p2);
443 					w->target = p2;
444 					taskqueue_enqueue(taskqueue_thread,
445 					    &w->t);
446 					while (w->target != NULL) {
447 						sx_sleep(&w->target,
448 						    &proctree_lock, PWAIT,
449 						    "reapst", 0);
450 					}
451 					PROC_LOCK(p2);
452 					_PRELE(p2);
453 				}
454 				PROC_UNLOCK(p2);
455 			}
456 			res = true;
457 		}
458 		reap_kill_sched_free(t);
459 	}
460 	return (res);
461 }
462 
463 static void
464 reap_kill_subtree(struct thread *td, struct proc *p, struct proc *reaper,
465     struct reap_kill_proc_work *w)
466 {
467 	struct unrhdr pids;
468 
469 	/*
470 	 * pids records processes which were already signalled, to
471 	 * avoid doubling signals to them if iteration needs to be
472 	 * repeated.
473 	 */
474 	init_unrhdr(&pids, 1, PID_MAX, UNR_NO_MTX);
475 	PROC_LOCK(td->td_proc);
476 	if ((td->td_proc->p_flag2 & P2_WEXIT) != 0) {
477 		PROC_UNLOCK(td->td_proc);
478 		goto out;
479 	}
480 	PROC_UNLOCK(td->td_proc);
481 	while (reap_kill_subtree_once(td, p, reaper, &pids, w))
482 	       ;
483 out:
484 	clean_unrhdr(&pids);
485 	clear_unrhdr(&pids);
486 }
487 
488 static bool
489 reap_kill_sapblk(struct thread *td __unused, void *data)
490 {
491 	struct procctl_reaper_kill *rk;
492 
493 	rk = data;
494 	return ((rk->rk_flags & REAPER_KILL_CHILDREN) == 0);
495 }
496 
497 static int
498 reap_kill(struct thread *td, struct proc *p, void *data)
499 {
500 	struct reap_kill_proc_work w;
501 	struct proc *reaper;
502 	ksiginfo_t ksi;
503 	struct procctl_reaper_kill *rk;
504 	int error;
505 
506 	rk = data;
507 	sx_assert(&proctree_lock, SX_LOCKED);
508 	if (IN_CAPABILITY_MODE(td))
509 		return (ECAPMODE);
510 	if (rk->rk_sig <= 0 || rk->rk_sig > _SIG_MAXSIG ||
511 	    (rk->rk_flags & ~(REAPER_KILL_CHILDREN |
512 	    REAPER_KILL_SUBTREE)) != 0 || (rk->rk_flags &
513 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE)) ==
514 	    (REAPER_KILL_CHILDREN | REAPER_KILL_SUBTREE))
515 		return (EINVAL);
516 	PROC_UNLOCK(p);
517 	reaper = (p->p_treeflag & P_TREE_REAPER) == 0 ? p->p_reaper : p;
518 	ksiginfo_init(&ksi);
519 	ksi.ksi_signo = rk->rk_sig;
520 	ksi.ksi_code = SI_USER;
521 	ksi.ksi_pid = td->td_proc->p_pid;
522 	ksi.ksi_uid = td->td_ucred->cr_ruid;
523 	error = ESRCH;
524 	rk->rk_killed = 0;
525 	rk->rk_fpid = -1;
526 	if ((rk->rk_flags & REAPER_KILL_CHILDREN) != 0) {
527 		reap_kill_children(td, reaper, rk, &ksi, &error);
528 	} else {
529 		w.cr = crhold(td->td_ucred);
530 		w.ksi = &ksi;
531 		w.rk = rk;
532 		w.error = &error;
533 		TASK_INIT(&w.t, 0, reap_kill_proc_work, &w);
534 
535 		/*
536 		 * Prevent swapout, since w, ksi, and possibly rk, are
537 		 * allocated on the stack.  We sleep in
538 		 * reap_kill_subtree_once() waiting for task to
539 		 * complete single-threading.
540 		 */
541 		PHOLD(td->td_proc);
542 
543 		reap_kill_subtree(td, p, reaper, &w);
544 		PRELE(td->td_proc);
545 		crfree(w.cr);
546 	}
547 	PROC_LOCK(p);
548 	return (error);
549 }
550 
551 static int
552 trace_ctl(struct thread *td, struct proc *p, void *data)
553 {
554 	int state;
555 
556 	PROC_LOCK_ASSERT(p, MA_OWNED);
557 	state = *(int *)data;
558 
559 	/*
560 	 * Ktrace changes p_traceflag from or to zero under the
561 	 * process lock, so the test does not need to acquire ktrace
562 	 * mutex.
563 	 */
564 	if ((p->p_flag & P_TRACED) != 0 || p->p_traceflag != 0)
565 		return (EBUSY);
566 
567 	switch (state) {
568 	case PROC_TRACE_CTL_ENABLE:
569 		if (td->td_proc != p)
570 			return (EPERM);
571 		p->p_flag2 &= ~(P2_NOTRACE | P2_NOTRACE_EXEC);
572 		break;
573 	case PROC_TRACE_CTL_DISABLE_EXEC:
574 		p->p_flag2 |= P2_NOTRACE_EXEC | P2_NOTRACE;
575 		break;
576 	case PROC_TRACE_CTL_DISABLE:
577 		if ((p->p_flag2 & P2_NOTRACE_EXEC) != 0) {
578 			KASSERT((p->p_flag2 & P2_NOTRACE) != 0,
579 			    ("dandling P2_NOTRACE_EXEC"));
580 			if (td->td_proc != p)
581 				return (EPERM);
582 			p->p_flag2 &= ~P2_NOTRACE_EXEC;
583 		} else {
584 			p->p_flag2 |= P2_NOTRACE;
585 		}
586 		break;
587 	default:
588 		return (EINVAL);
589 	}
590 	return (0);
591 }
592 
593 static int
594 trace_status(struct thread *td, struct proc *p, void *data)
595 {
596 	int *status;
597 
598 	status = data;
599 	if ((p->p_flag2 & P2_NOTRACE) != 0) {
600 		KASSERT((p->p_flag & P_TRACED) == 0,
601 		    ("%d traced but tracing disabled", p->p_pid));
602 		*status = -1;
603 	} else if ((p->p_flag & P_TRACED) != 0) {
604 		*status = p->p_pptr->p_pid;
605 	} else {
606 		*status = 0;
607 	}
608 	return (0);
609 }
610 
611 static int
612 trapcap_ctl(struct thread *td, struct proc *p, void *data)
613 {
614 	int state;
615 
616 	PROC_LOCK_ASSERT(p, MA_OWNED);
617 	state = *(int *)data;
618 
619 	switch (state) {
620 	case PROC_TRAPCAP_CTL_ENABLE:
621 		p->p_flag2 |= P2_TRAPCAP;
622 		break;
623 	case PROC_TRAPCAP_CTL_DISABLE:
624 		p->p_flag2 &= ~P2_TRAPCAP;
625 		break;
626 	default:
627 		return (EINVAL);
628 	}
629 	return (0);
630 }
631 
632 static int
633 trapcap_status(struct thread *td, struct proc *p, void *data)
634 {
635 	int *status;
636 
637 	status = data;
638 	*status = (p->p_flag2 & P2_TRAPCAP) != 0 ? PROC_TRAPCAP_CTL_ENABLE :
639 	    PROC_TRAPCAP_CTL_DISABLE;
640 	return (0);
641 }
642 
643 static int
644 no_new_privs_ctl(struct thread *td, struct proc *p, void *data)
645 {
646 	int state;
647 
648 	PROC_LOCK_ASSERT(p, MA_OWNED);
649 	state = *(int *)data;
650 
651 	if (state != PROC_NO_NEW_PRIVS_ENABLE)
652 		return (EINVAL);
653 	p->p_flag2 |= P2_NO_NEW_PRIVS;
654 	return (0);
655 }
656 
657 static int
658 no_new_privs_status(struct thread *td, struct proc *p, void *data)
659 {
660 
661 	*(int *)data = (p->p_flag2 & P2_NO_NEW_PRIVS) != 0 ?
662 	    PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE;
663 	return (0);
664 }
665 
666 static int
667 protmax_ctl(struct thread *td, struct proc *p, void *data)
668 {
669 	int state;
670 
671 	PROC_LOCK_ASSERT(p, MA_OWNED);
672 	state = *(int *)data;
673 
674 	switch (state) {
675 	case PROC_PROTMAX_FORCE_ENABLE:
676 		p->p_flag2 &= ~P2_PROTMAX_DISABLE;
677 		p->p_flag2 |= P2_PROTMAX_ENABLE;
678 		break;
679 	case PROC_PROTMAX_FORCE_DISABLE:
680 		p->p_flag2 |= P2_PROTMAX_DISABLE;
681 		p->p_flag2 &= ~P2_PROTMAX_ENABLE;
682 		break;
683 	case PROC_PROTMAX_NOFORCE:
684 		p->p_flag2 &= ~(P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE);
685 		break;
686 	default:
687 		return (EINVAL);
688 	}
689 	return (0);
690 }
691 
692 static int
693 protmax_status(struct thread *td, struct proc *p, void *data)
694 {
695 	int d;
696 
697 	switch (p->p_flag2 & (P2_PROTMAX_ENABLE | P2_PROTMAX_DISABLE)) {
698 	case 0:
699 		d = PROC_PROTMAX_NOFORCE;
700 		break;
701 	case P2_PROTMAX_ENABLE:
702 		d = PROC_PROTMAX_FORCE_ENABLE;
703 		break;
704 	case P2_PROTMAX_DISABLE:
705 		d = PROC_PROTMAX_FORCE_DISABLE;
706 		break;
707 	}
708 	if (kern_mmap_maxprot(p, PROT_READ) == PROT_READ)
709 		d |= PROC_PROTMAX_ACTIVE;
710 	*(int *)data = d;
711 	return (0);
712 }
713 
714 static int
715 aslr_ctl(struct thread *td, struct proc *p, void *data)
716 {
717 	int state;
718 
719 	PROC_LOCK_ASSERT(p, MA_OWNED);
720 	state = *(int *)data;
721 
722 	switch (state) {
723 	case PROC_ASLR_FORCE_ENABLE:
724 		p->p_flag2 &= ~P2_ASLR_DISABLE;
725 		p->p_flag2 |= P2_ASLR_ENABLE;
726 		break;
727 	case PROC_ASLR_FORCE_DISABLE:
728 		p->p_flag2 |= P2_ASLR_DISABLE;
729 		p->p_flag2 &= ~P2_ASLR_ENABLE;
730 		break;
731 	case PROC_ASLR_NOFORCE:
732 		p->p_flag2 &= ~(P2_ASLR_ENABLE | P2_ASLR_DISABLE);
733 		break;
734 	default:
735 		return (EINVAL);
736 	}
737 	return (0);
738 }
739 
740 static int
741 aslr_status(struct thread *td, struct proc *p, void *data)
742 {
743 	struct vmspace *vm;
744 	int d;
745 
746 	switch (p->p_flag2 & (P2_ASLR_ENABLE | P2_ASLR_DISABLE)) {
747 	case 0:
748 		d = PROC_ASLR_NOFORCE;
749 		break;
750 	case P2_ASLR_ENABLE:
751 		d = PROC_ASLR_FORCE_ENABLE;
752 		break;
753 	case P2_ASLR_DISABLE:
754 		d = PROC_ASLR_FORCE_DISABLE;
755 		break;
756 	}
757 	if ((p->p_flag & P_WEXIT) == 0) {
758 		_PHOLD(p);
759 		PROC_UNLOCK(p);
760 		vm = vmspace_acquire_ref(p);
761 		if (vm != NULL) {
762 			if ((vm->vm_map.flags & MAP_ASLR) != 0)
763 				d |= PROC_ASLR_ACTIVE;
764 			vmspace_free(vm);
765 		}
766 		PROC_LOCK(p);
767 		_PRELE(p);
768 	}
769 	*(int *)data = d;
770 	return (0);
771 }
772 
773 static int
774 stackgap_ctl(struct thread *td, struct proc *p, void *data)
775 {
776 	int state;
777 
778 	PROC_LOCK_ASSERT(p, MA_OWNED);
779 	state = *(int *)data;
780 
781 	if ((state & ~(PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE |
782 	    PROC_STACKGAP_ENABLE_EXEC | PROC_STACKGAP_DISABLE_EXEC)) != 0)
783 		return (EINVAL);
784 	switch (state & (PROC_STACKGAP_ENABLE | PROC_STACKGAP_DISABLE)) {
785 	case PROC_STACKGAP_ENABLE:
786 		if ((p->p_flag2 & P2_STKGAP_DISABLE) != 0)
787 			return (EINVAL);
788 		break;
789 	case PROC_STACKGAP_DISABLE:
790 		p->p_flag2 |= P2_STKGAP_DISABLE;
791 		break;
792 	case 0:
793 		break;
794 	default:
795 		return (EINVAL);
796 	}
797 	switch (state & (PROC_STACKGAP_ENABLE_EXEC |
798 	    PROC_STACKGAP_DISABLE_EXEC)) {
799 	case PROC_STACKGAP_ENABLE_EXEC:
800 		p->p_flag2 &= ~P2_STKGAP_DISABLE_EXEC;
801 		break;
802 	case PROC_STACKGAP_DISABLE_EXEC:
803 		p->p_flag2 |= P2_STKGAP_DISABLE_EXEC;
804 		break;
805 	case 0:
806 		break;
807 	default:
808 		return (EINVAL);
809 	}
810 	return (0);
811 }
812 
813 static int
814 stackgap_status(struct thread *td, struct proc *p, void *data)
815 {
816 	int d;
817 
818 	PROC_LOCK_ASSERT(p, MA_OWNED);
819 
820 	d = (p->p_flag2 & P2_STKGAP_DISABLE) != 0 ? PROC_STACKGAP_DISABLE :
821 	    PROC_STACKGAP_ENABLE;
822 	d |= (p->p_flag2 & P2_STKGAP_DISABLE_EXEC) != 0 ?
823 	    PROC_STACKGAP_DISABLE_EXEC : PROC_STACKGAP_ENABLE_EXEC;
824 	*(int *)data = d;
825 	return (0);
826 }
827 
828 static int
829 wxmap_ctl(struct thread *td, struct proc *p, void *data)
830 {
831 	struct vmspace *vm;
832 	vm_map_t map;
833 	int state;
834 
835 	PROC_LOCK_ASSERT(p, MA_OWNED);
836 	if ((p->p_flag & P_WEXIT) != 0)
837 		return (ESRCH);
838 	state = *(int *)data;
839 
840 	switch (state) {
841 	case PROC_WX_MAPPINGS_PERMIT:
842 		p->p_flag2 |= P2_WXORX_DISABLE;
843 		_PHOLD(p);
844 		PROC_UNLOCK(p);
845 		vm = vmspace_acquire_ref(p);
846 		if (vm != NULL) {
847 			map = &vm->vm_map;
848 			vm_map_lock(map);
849 			map->flags &= ~MAP_WXORX;
850 			vm_map_unlock(map);
851 			vmspace_free(vm);
852 		}
853 		PROC_LOCK(p);
854 		_PRELE(p);
855 		break;
856 	case PROC_WX_MAPPINGS_DISALLOW_EXEC:
857 		p->p_flag2 |= P2_WXORX_ENABLE_EXEC;
858 		break;
859 	default:
860 		return (EINVAL);
861 	}
862 
863 	return (0);
864 }
865 
866 static int
867 wxmap_status(struct thread *td, struct proc *p, void *data)
868 {
869 	struct vmspace *vm;
870 	int d;
871 
872 	PROC_LOCK_ASSERT(p, MA_OWNED);
873 	if ((p->p_flag & P_WEXIT) != 0)
874 		return (ESRCH);
875 
876 	d = 0;
877 	if ((p->p_flag2 & P2_WXORX_DISABLE) != 0)
878 		d |= PROC_WX_MAPPINGS_PERMIT;
879 	if ((p->p_flag2 & P2_WXORX_ENABLE_EXEC) != 0)
880 		d |= PROC_WX_MAPPINGS_DISALLOW_EXEC;
881 	_PHOLD(p);
882 	PROC_UNLOCK(p);
883 	vm = vmspace_acquire_ref(p);
884 	if (vm != NULL) {
885 		if ((vm->vm_map.flags & MAP_WXORX) != 0)
886 			d |= PROC_WXORX_ENFORCE;
887 		vmspace_free(vm);
888 	}
889 	PROC_LOCK(p);
890 	_PRELE(p);
891 	*(int *)data = d;
892 	return (0);
893 }
894 
895 static int
896 pdeathsig_ctl(struct thread *td, struct proc *p, void *data)
897 {
898 	int signum;
899 
900 	signum = *(int *)data;
901 	if (p != td->td_proc || (signum != 0 && !_SIG_VALID(signum)))
902 		return (EINVAL);
903 	p->p_pdeathsig = signum;
904 	return (0);
905 }
906 
907 static int
908 pdeathsig_status(struct thread *td, struct proc *p, void *data)
909 {
910 	if (p != td->td_proc)
911 		return (EINVAL);
912 	*(int *)data = p->p_pdeathsig;
913 	return (0);
914 }
915 
916 enum {
917 	PCTL_SLOCKED,
918 	PCTL_XLOCKED,
919 	PCTL_UNLOCKED,
920 };
921 
922 struct procctl_cmd_info {
923 	int lock_tree;
924 	bool one_proc : 1;
925 	bool esrch_is_einval : 1;
926 	bool copyout_on_error : 1;
927 	bool no_nonnull_data : 1;
928 	bool need_candebug : 1;
929 	int copyin_sz;
930 	int copyout_sz;
931 	int (*exec)(struct thread *, struct proc *, void *);
932 	bool (*sapblk)(struct thread *, void *);
933 };
934 static const struct procctl_cmd_info procctl_cmds_info[] = {
935 	[PROC_SPROTECT] =
936 	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
937 	      .esrch_is_einval = false, .no_nonnull_data = false,
938 	      .need_candebug = false,
939 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
940 	      .exec = protect_set, .copyout_on_error = false, },
941 	[PROC_REAP_ACQUIRE] =
942 	    { .lock_tree = PCTL_XLOCKED, .one_proc = true,
943 	      .esrch_is_einval = false, .no_nonnull_data = true,
944 	      .need_candebug = false,
945 	      .copyin_sz = 0, .copyout_sz = 0,
946 	      .exec = reap_acquire, .copyout_on_error = false, },
947 	[PROC_REAP_RELEASE] =
948 	    { .lock_tree = PCTL_XLOCKED, .one_proc = true,
949 	      .esrch_is_einval = false, .no_nonnull_data = true,
950 	      .need_candebug = false,
951 	      .copyin_sz = 0, .copyout_sz = 0,
952 	      .exec = reap_release, .copyout_on_error = false, },
953 	[PROC_REAP_STATUS] =
954 	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
955 	      .esrch_is_einval = false, .no_nonnull_data = false,
956 	      .need_candebug = false,
957 	      .copyin_sz = 0,
958 	      .copyout_sz = sizeof(struct procctl_reaper_status),
959 	      .exec = reap_status, .copyout_on_error = false, },
960 	[PROC_REAP_GETPIDS] =
961 	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
962 	      .esrch_is_einval = false, .no_nonnull_data = false,
963 	      .need_candebug = false,
964 	      .copyin_sz = sizeof(struct procctl_reaper_pids),
965 	      .copyout_sz = 0,
966 	      .exec = reap_getpids, .copyout_on_error = false, },
967 	[PROC_REAP_KILL] =
968 	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
969 	      .esrch_is_einval = false, .no_nonnull_data = false,
970 	      .need_candebug = false,
971 	      .copyin_sz = sizeof(struct procctl_reaper_kill),
972 	      .copyout_sz = sizeof(struct procctl_reaper_kill),
973 	      .exec = reap_kill, .copyout_on_error = true,
974 	      .sapblk = reap_kill_sapblk, },
975 	[PROC_TRACE_CTL] =
976 	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
977 	      .esrch_is_einval = false, .no_nonnull_data = false,
978 	      .need_candebug = true,
979 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
980 	      .exec = trace_ctl, .copyout_on_error = false, },
981 	[PROC_TRACE_STATUS] =
982 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
983 	      .esrch_is_einval = false, .no_nonnull_data = false,
984 	      .need_candebug = false,
985 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
986 	      .exec = trace_status, .copyout_on_error = false, },
987 	[PROC_TRAPCAP_CTL] =
988 	    { .lock_tree = PCTL_SLOCKED, .one_proc = false,
989 	      .esrch_is_einval = false, .no_nonnull_data = false,
990 	      .need_candebug = true,
991 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
992 	      .exec = trapcap_ctl, .copyout_on_error = false, },
993 	[PROC_TRAPCAP_STATUS] =
994 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
995 	      .esrch_is_einval = false, .no_nonnull_data = false,
996 	      .need_candebug = false,
997 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
998 	      .exec = trapcap_status, .copyout_on_error = false, },
999 	[PROC_PDEATHSIG_CTL] =
1000 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1001 	      .esrch_is_einval = true, .no_nonnull_data = false,
1002 	      .need_candebug = false,
1003 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1004 	      .exec = pdeathsig_ctl, .copyout_on_error = false, },
1005 	[PROC_PDEATHSIG_STATUS] =
1006 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1007 	      .esrch_is_einval = true, .no_nonnull_data = false,
1008 	      .need_candebug = false,
1009 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1010 	      .exec = pdeathsig_status, .copyout_on_error = false, },
1011 	[PROC_ASLR_CTL] =
1012 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1013 	      .esrch_is_einval = false, .no_nonnull_data = false,
1014 	      .need_candebug = true,
1015 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1016 	      .exec = aslr_ctl, .copyout_on_error = false, },
1017 	[PROC_ASLR_STATUS] =
1018 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1019 	      .esrch_is_einval = false, .no_nonnull_data = false,
1020 	      .need_candebug = false,
1021 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1022 	      .exec = aslr_status, .copyout_on_error = false, },
1023 	[PROC_PROTMAX_CTL] =
1024 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1025 	      .esrch_is_einval = false, .no_nonnull_data = false,
1026 	      .need_candebug = true,
1027 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1028 	      .exec = protmax_ctl, .copyout_on_error = false, },
1029 	[PROC_PROTMAX_STATUS] =
1030 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1031 	      .esrch_is_einval = false, .no_nonnull_data = false,
1032 	      .need_candebug = false,
1033 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1034 	      .exec = protmax_status, .copyout_on_error = false, },
1035 	[PROC_STACKGAP_CTL] =
1036 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1037 	      .esrch_is_einval = false, .no_nonnull_data = false,
1038 	      .need_candebug = true,
1039 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1040 	      .exec = stackgap_ctl, .copyout_on_error = false, },
1041 	[PROC_STACKGAP_STATUS] =
1042 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1043 	      .esrch_is_einval = false, .no_nonnull_data = false,
1044 	      .need_candebug = false,
1045 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1046 	      .exec = stackgap_status, .copyout_on_error = false, },
1047 	[PROC_NO_NEW_PRIVS_CTL] =
1048 	    { .lock_tree = PCTL_SLOCKED, .one_proc = true,
1049 	      .esrch_is_einval = false, .no_nonnull_data = false,
1050 	      .need_candebug = true,
1051 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1052 	      .exec = no_new_privs_ctl, .copyout_on_error = false, },
1053 	[PROC_NO_NEW_PRIVS_STATUS] =
1054 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1055 	      .esrch_is_einval = false, .no_nonnull_data = false,
1056 	      .need_candebug = false,
1057 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1058 	      .exec = no_new_privs_status, .copyout_on_error = false, },
1059 	[PROC_WXMAP_CTL] =
1060 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1061 	      .esrch_is_einval = false, .no_nonnull_data = false,
1062 	      .need_candebug = true,
1063 	      .copyin_sz = sizeof(int), .copyout_sz = 0,
1064 	      .exec = wxmap_ctl, .copyout_on_error = false, },
1065 	[PROC_WXMAP_STATUS] =
1066 	    { .lock_tree = PCTL_UNLOCKED, .one_proc = true,
1067 	      .esrch_is_einval = false, .no_nonnull_data = false,
1068 	      .need_candebug = false,
1069 	      .copyin_sz = 0, .copyout_sz = sizeof(int),
1070 	      .exec = wxmap_status, .copyout_on_error = false, },
1071 };
1072 
1073 int
1074 sys_procctl(struct thread *td, struct procctl_args *uap)
1075 {
1076 	union {
1077 		struct procctl_reaper_status rs;
1078 		struct procctl_reaper_pids rp;
1079 		struct procctl_reaper_kill rk;
1080 		int flags;
1081 	} x;
1082 	const struct procctl_cmd_info *cmd_info;
1083 	int error, error1;
1084 
1085 	if (uap->com >= PROC_PROCCTL_MD_MIN)
1086 		return (cpu_procctl(td, uap->idtype, uap->id,
1087 		    uap->com, uap->data));
1088 	if (uap->com == 0 || uap->com >= nitems(procctl_cmds_info))
1089 		return (EINVAL);
1090 	cmd_info = &procctl_cmds_info[uap->com];
1091 	bzero(&x, sizeof(x));
1092 
1093 	if (cmd_info->copyin_sz > 0) {
1094 		error = copyin(uap->data, &x, cmd_info->copyin_sz);
1095 		if (error != 0)
1096 			return (error);
1097 	} else if (cmd_info->no_nonnull_data && uap->data != NULL) {
1098 		return (EINVAL);
1099 	}
1100 
1101 	error = kern_procctl(td, uap->idtype, uap->id, uap->com, &x);
1102 
1103 	if (cmd_info->copyout_sz > 0 && (error == 0 ||
1104 	    cmd_info->copyout_on_error)) {
1105 		error1 = copyout(&x, uap->data, cmd_info->copyout_sz);
1106 		if (error == 0)
1107 			error = error1;
1108 	}
1109 	return (error);
1110 }
1111 
1112 static int
1113 kern_procctl_single(struct thread *td, struct proc *p, int com, void *data)
1114 {
1115 
1116 	PROC_LOCK_ASSERT(p, MA_OWNED);
1117 	return (procctl_cmds_info[com].exec(td, p, data));
1118 }
1119 
1120 int
1121 kern_procctl(struct thread *td, idtype_t idtype, id_t id, int com, void *data)
1122 {
1123 	struct pgrp *pg;
1124 	struct proc *p;
1125 	const struct procctl_cmd_info *cmd_info;
1126 	int error, first_error, ok;
1127 	bool sapblk;
1128 
1129 	MPASS(com > 0 && com < nitems(procctl_cmds_info));
1130 	cmd_info = &procctl_cmds_info[com];
1131 	if (idtype != P_PID && cmd_info->one_proc)
1132 		return (EINVAL);
1133 
1134 	sapblk = false;
1135 	if (cmd_info->sapblk != NULL) {
1136 		sapblk = cmd_info->sapblk(td, data);
1137 		if (sapblk && !stop_all_proc_block())
1138 			return (ERESTART);
1139 	}
1140 
1141 	switch (cmd_info->lock_tree) {
1142 	case PCTL_XLOCKED:
1143 		sx_xlock(&proctree_lock);
1144 		break;
1145 	case PCTL_SLOCKED:
1146 		sx_slock(&proctree_lock);
1147 		break;
1148 	default:
1149 		break;
1150 	}
1151 
1152 	switch (idtype) {
1153 	case P_PID:
1154 		if (id == 0) {
1155 			p = td->td_proc;
1156 			error = 0;
1157 			PROC_LOCK(p);
1158 		} else {
1159 			p = pfind(id);
1160 			if (p == NULL) {
1161 				error = cmd_info->esrch_is_einval ?
1162 				    EINVAL : ESRCH;
1163 				break;
1164 			}
1165 			error = cmd_info->need_candebug ? p_candebug(td, p) :
1166 			    p_cansee(td, p);
1167 		}
1168 		if (error == 0)
1169 			error = kern_procctl_single(td, p, com, data);
1170 		PROC_UNLOCK(p);
1171 		break;
1172 	case P_PGID:
1173 		/*
1174 		 * Attempt to apply the operation to all members of the
1175 		 * group.  Ignore processes in the group that can't be
1176 		 * seen.  Ignore errors so long as at least one process is
1177 		 * able to complete the request successfully.
1178 		 */
1179 		pg = pgfind(id);
1180 		if (pg == NULL) {
1181 			error = ESRCH;
1182 			break;
1183 		}
1184 		PGRP_UNLOCK(pg);
1185 		ok = 0;
1186 		first_error = 0;
1187 		LIST_FOREACH(p, &pg->pg_members, p_pglist) {
1188 			PROC_LOCK(p);
1189 			if (p->p_state == PRS_NEW ||
1190 			    p->p_state == PRS_ZOMBIE ||
1191 			    (cmd_info->need_candebug ? p_candebug(td, p) :
1192 			    p_cansee(td, p)) != 0) {
1193 				PROC_UNLOCK(p);
1194 				continue;
1195 			}
1196 			error = kern_procctl_single(td, p, com, data);
1197 			PROC_UNLOCK(p);
1198 			if (error == 0)
1199 				ok = 1;
1200 			else if (first_error == 0)
1201 				first_error = error;
1202 		}
1203 		if (ok)
1204 			error = 0;
1205 		else if (first_error != 0)
1206 			error = first_error;
1207 		else
1208 			/*
1209 			 * Was not able to see any processes in the
1210 			 * process group.
1211 			 */
1212 			error = ESRCH;
1213 		break;
1214 	default:
1215 		error = EINVAL;
1216 		break;
1217 	}
1218 
1219 	switch (cmd_info->lock_tree) {
1220 	case PCTL_XLOCKED:
1221 		sx_xunlock(&proctree_lock);
1222 		break;
1223 	case PCTL_SLOCKED:
1224 		sx_sunlock(&proctree_lock);
1225 		break;
1226 	default:
1227 		break;
1228 	}
1229 	if (sapblk)
1230 		stop_all_proc_unblock();
1231 	return (error);
1232 }
1233