xref: /dragonfly/sys/kern/kern_proc.c (revision 678e8cc6)
1 /*
2  * (MPSAFE)
3  *
4  * Copyright (c) 1982, 1986, 1989, 1991, 1993
5  *	The Regents of the University of California.  All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  * 3. All advertising materials mentioning features or use of this software
16  *    must display the following acknowledgement:
17  *	This product includes software developed by the University of
18  *	California, Berkeley and its contributors.
19  * 4. Neither the name of the University nor the names of its contributors
20  *    may be used to endorse or promote products derived from this software
21  *    without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
24  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
25  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
26  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
27  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
28  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
29  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
30  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
31  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
32  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  *
35  *	@(#)kern_proc.c	8.7 (Berkeley) 2/14/95
36  * $FreeBSD: src/sys/kern/kern_proc.c,v 1.63.2.9 2003/05/08 07:47:16 kbyanc Exp $
37  */
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/kernel.h>
42 #include <sys/sysctl.h>
43 #include <sys/malloc.h>
44 #include <sys/proc.h>
45 #include <sys/jail.h>
46 #include <sys/filedesc.h>
47 #include <sys/tty.h>
48 #include <sys/dsched.h>
49 #include <sys/signalvar.h>
50 #include <sys/spinlock.h>
51 #include <vm/vm.h>
52 #include <sys/lock.h>
53 #include <vm/pmap.h>
54 #include <vm/vm_map.h>
55 #include <sys/user.h>
56 #include <machine/smp.h>
57 
58 #include <sys/refcount.h>
59 #include <sys/spinlock2.h>
60 #include <sys/mplock2.h>
61 
62 static MALLOC_DEFINE(M_PGRP, "pgrp", "process group header");
63 MALLOC_DEFINE(M_SESSION, "session", "session header");
64 MALLOC_DEFINE(M_PROC, "proc", "Proc structures");
65 MALLOC_DEFINE(M_LWP, "lwp", "lwp structures");
66 MALLOC_DEFINE(M_SUBPROC, "subproc", "Proc sub-structures");
67 
68 int ps_showallprocs = 1;
69 static int ps_showallthreads = 1;
70 SYSCTL_INT(_security, OID_AUTO, ps_showallprocs, CTLFLAG_RW,
71     &ps_showallprocs, 0,
72     "Unprivileged processes can see proccesses with different UID/GID");
73 SYSCTL_INT(_security, OID_AUTO, ps_showallthreads, CTLFLAG_RW,
74     &ps_showallthreads, 0,
75     "Unprivileged processes can see kernel threads");
76 
77 static void pgdelete(struct pgrp *);
78 static void orphanpg(struct pgrp *pg);
79 static pid_t proc_getnewpid_locked(int random_offset);
80 
81 /*
82  * Other process lists
83  */
84 struct pidhashhead *pidhashtbl;
85 u_long pidhash;
86 struct pgrphashhead *pgrphashtbl;
87 u_long pgrphash;
88 struct proclist allproc;
89 struct proclist zombproc;
90 
91 /*
92  * Random component to nextpid generation.  We mix in a random factor to make
93  * it a little harder to predict.  We sanity check the modulus value to avoid
94  * doing it in critical paths.  Don't let it be too small or we pointlessly
95  * waste randomness entropy, and don't let it be impossibly large.  Using a
96  * modulus that is too big causes a LOT more process table scans and slows
97  * down fork processing as the pidchecked caching is defeated.
98  */
99 static int randompid = 0;
100 
101 /*
102  * No requirements.
103  */
104 static int
105 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
106 {
107 	int error, pid;
108 
109 	pid = randompid;
110 	error = sysctl_handle_int(oidp, &pid, 0, req);
111 	if (error || !req->newptr)
112 		return (error);
113 	if (pid < 0 || pid > PID_MAX - 100)     /* out of range */
114 		pid = PID_MAX - 100;
115 	else if (pid < 2)                       /* NOP */
116 		pid = 0;
117 	else if (pid < 100)                     /* Make it reasonable */
118 		pid = 100;
119 	randompid = pid;
120 	return (error);
121 }
122 
123 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
124 	    0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
125 
126 /*
127  * Initialize global process hashing structures.
128  *
129  * Called from the low level boot code only.
130  */
131 void
132 procinit(void)
133 {
134 	LIST_INIT(&allproc);
135 	LIST_INIT(&zombproc);
136 	lwkt_init();
137 	pidhashtbl = hashinit(maxproc / 4, M_PROC, &pidhash);
138 	pgrphashtbl = hashinit(maxproc / 4, M_PROC, &pgrphash);
139 	uihashinit();
140 }
141 
142 /*
143  * Process hold/release support functions.  These functions must be MPSAFE.
144  * Called via the PHOLD(), PRELE(), and PSTALL() macros.
145  *
146  * p->p_lock is a simple hold count with a waiting interlock.  No wakeup()
147  * is issued unless someone is actually waiting for the process.
148  *
149  * Most holds are short-term, allowing a process scan or other similar
150  * operation to access a proc structure without it getting ripped out from
151  * under us.  procfs and process-list sysctl ops also use the hold function
152  * interlocked with various p_flags to keep the vmspace intact when reading
153  * or writing a user process's address space.
154  *
155  * There are two situations where a hold count can be longer.  Exiting lwps
156  * hold the process until the lwp is reaped, and the parent will hold the
157  * child during vfork()/exec() sequences while the child is marked P_PPWAIT.
158  *
159  * The kernel waits for the hold count to drop to 0 (or 1 in some cases) at
160  * various critical points in the fork/exec and exit paths before proceeding.
161  */
162 #define PLOCK_WAITING	0x40000000
163 #define PLOCK_MASK	0x3FFFFFFF
164 
165 void
166 pstall(struct proc *p, const char *wmesg, int count)
167 {
168 	int o;
169 	int n;
170 
171 	for (;;) {
172 		o = p->p_lock;
173 		cpu_ccfence();
174 		if ((o & PLOCK_MASK) <= count)
175 			break;
176 		n = o | PLOCK_WAITING;
177 		tsleep_interlock(&p->p_lock, 0);
178 		if (atomic_cmpset_int(&p->p_lock, o, n)) {
179 			tsleep(&p->p_lock, PINTERLOCKED, wmesg, 0);
180 		}
181 	}
182 }
183 
184 void
185 phold(struct proc *p)
186 {
187 	int o;
188 	int n;
189 
190 	for (;;) {
191 		o = p->p_lock;
192 		cpu_ccfence();
193 		n = o + 1;
194 		if (atomic_cmpset_int(&p->p_lock, o, n))
195 			break;
196 	}
197 }
198 
199 void
200 prele(struct proc *p)
201 {
202 	int o;
203 	int n;
204 
205 	/*
206 	 * Fast path
207 	 */
208 	if (atomic_cmpset_int(&p->p_lock, 1, 0))
209 		return;
210 
211 	/*
212 	 * Slow path
213 	 */
214 	for (;;) {
215 		o = p->p_lock;
216 		KKASSERT((o & PLOCK_MASK) > 0);
217 		cpu_ccfence();
218 		n = (o - 1) & ~PLOCK_WAITING;
219 		if (atomic_cmpset_int(&p->p_lock, o, n)) {
220 			if (o & PLOCK_WAITING)
221 				wakeup(&p->p_lock);
222 			break;
223 		}
224 	}
225 }
226 
227 /*
228  * Is p an inferior of the current process?
229  *
230  * No requirements.
231  * The caller must hold proc_token if the caller wishes a stable result.
232  */
233 int
234 inferior(struct proc *p)
235 {
236 	lwkt_gettoken(&proc_token);
237 	while (p != curproc) {
238 		if (p->p_pid == 0) {
239 			lwkt_reltoken(&proc_token);
240 			return (0);
241 		}
242 		p = p->p_pptr;
243 	}
244 	lwkt_reltoken(&proc_token);
245 	return (1);
246 }
247 
248 /*
249  * Locate a process by number.  The returned process will be referenced and
250  * must be released with PRELE().
251  *
252  * No requirements.
253  */
254 struct proc *
255 pfind(pid_t pid)
256 {
257 	struct proc *p;
258 
259 	lwkt_gettoken(&proc_token);
260 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
261 		if (p->p_pid == pid) {
262 			PHOLD(p);
263 			lwkt_reltoken(&proc_token);
264 			return (p);
265 		}
266 	}
267 	lwkt_reltoken(&proc_token);
268 	return (NULL);
269 }
270 
271 /*
272  * Locate a process by number.  The returned process is NOT referenced.
273  * The caller should hold proc_token if the caller wishes a stable result.
274  *
275  * No requirements.
276  */
277 struct proc *
278 pfindn(pid_t pid)
279 {
280 	struct proc *p;
281 
282 	lwkt_gettoken(&proc_token);
283 	LIST_FOREACH(p, PIDHASH(pid), p_hash) {
284 		if (p->p_pid == pid) {
285 			lwkt_reltoken(&proc_token);
286 			return (p);
287 		}
288 	}
289 	lwkt_reltoken(&proc_token);
290 	return (NULL);
291 }
292 
293 void
294 pgref(struct pgrp *pgrp)
295 {
296 	refcount_acquire(&pgrp->pg_refs);
297 }
298 
299 void
300 pgrel(struct pgrp *pgrp)
301 {
302 	if (refcount_release(&pgrp->pg_refs))
303 		pgdelete(pgrp);
304 }
305 
306 /*
307  * Locate a process group by number.  The returned process group will be
308  * referenced w/pgref() and must be released with pgrel() (or assigned
309  * somewhere if you wish to keep the reference).
310  *
311  * No requirements.
312  */
313 struct pgrp *
314 pgfind(pid_t pgid)
315 {
316 	struct pgrp *pgrp;
317 
318 	lwkt_gettoken(&proc_token);
319 	LIST_FOREACH(pgrp, PGRPHASH(pgid), pg_hash) {
320 		if (pgrp->pg_id == pgid) {
321 			refcount_acquire(&pgrp->pg_refs);
322 			lwkt_reltoken(&proc_token);
323 			return (pgrp);
324 		}
325 	}
326 	lwkt_reltoken(&proc_token);
327 	return (NULL);
328 }
329 
330 /*
331  * Move p to a new or existing process group (and session)
332  *
333  * No requirements.
334  */
335 int
336 enterpgrp(struct proc *p, pid_t pgid, int mksess)
337 {
338 	struct pgrp *pgrp;
339 	struct pgrp *opgrp;
340 	int error;
341 
342 	pgrp = pgfind(pgid);
343 
344 	KASSERT(pgrp == NULL || !mksess,
345 		("enterpgrp: setsid into non-empty pgrp"));
346 	KASSERT(!SESS_LEADER(p),
347 		("enterpgrp: session leader attempted setpgrp"));
348 
349 	if (pgrp == NULL) {
350 		pid_t savepid = p->p_pid;
351 		struct proc *np;
352 		/*
353 		 * new process group
354 		 */
355 		KASSERT(p->p_pid == pgid,
356 			("enterpgrp: new pgrp and pid != pgid"));
357 		if ((np = pfindn(savepid)) == NULL || np != p) {
358 			error = ESRCH;
359 			goto fatal;
360 		}
361 		pgrp = kmalloc(sizeof(struct pgrp), M_PGRP, M_WAITOK);
362 		if (mksess) {
363 			struct session *sess;
364 
365 			/*
366 			 * new session
367 			 */
368 			sess = kmalloc(sizeof(struct session), M_SESSION,
369 				       M_WAITOK);
370 			sess->s_leader = p;
371 			sess->s_sid = p->p_pid;
372 			sess->s_count = 1;
373 			sess->s_ttyvp = NULL;
374 			sess->s_ttyp = NULL;
375 			bcopy(p->p_session->s_login, sess->s_login,
376 			      sizeof(sess->s_login));
377 			pgrp->pg_session = sess;
378 			KASSERT(p == curproc,
379 				("enterpgrp: mksession and p != curproc"));
380 			lwkt_gettoken(&p->p_token);
381 			p->p_flags &= ~P_CONTROLT;
382 			lwkt_reltoken(&p->p_token);
383 		} else {
384 			pgrp->pg_session = p->p_session;
385 			sess_hold(pgrp->pg_session);
386 		}
387 		pgrp->pg_id = pgid;
388 		LIST_INIT(&pgrp->pg_members);
389 		LIST_INSERT_HEAD(PGRPHASH(pgid), pgrp, pg_hash);
390 		pgrp->pg_jobc = 0;
391 		SLIST_INIT(&pgrp->pg_sigiolst);
392 		lwkt_token_init(&pgrp->pg_token, "pgrp_token");
393 		refcount_init(&pgrp->pg_refs, 1);
394 		lockinit(&pgrp->pg_lock, "pgwt", 0, 0);
395 	} else if (pgrp == p->p_pgrp) {
396 		pgrel(pgrp);
397 		goto done;
398 	} /* else pgfind() referenced the pgrp */
399 
400 	/*
401 	 * Adjust eligibility of affected pgrps to participate in job control.
402 	 * Increment eligibility counts before decrementing, otherwise we
403 	 * could reach 0 spuriously during the first call.
404 	 */
405 	lwkt_gettoken(&pgrp->pg_token);
406 	lwkt_gettoken(&p->p_token);
407 	fixjobc(p, pgrp, 1);
408 	fixjobc(p, p->p_pgrp, 0);
409 	while ((opgrp = p->p_pgrp) != NULL) {
410 		opgrp = p->p_pgrp;
411 		lwkt_gettoken(&opgrp->pg_token);
412 		LIST_REMOVE(p, p_pglist);
413 		p->p_pgrp = NULL;
414 		lwkt_reltoken(&opgrp->pg_token);
415 		pgrel(opgrp);
416 	}
417 	p->p_pgrp = pgrp;
418 	LIST_INSERT_HEAD(&pgrp->pg_members, p, p_pglist);
419 	lwkt_reltoken(&p->p_token);
420 	lwkt_reltoken(&pgrp->pg_token);
421 done:
422 	error = 0;
423 fatal:
424 	return (error);
425 }
426 
427 /*
428  * Remove process from process group
429  *
430  * No requirements.
431  */
432 int
433 leavepgrp(struct proc *p)
434 {
435 	struct pgrp *pg = p->p_pgrp;
436 
437 	lwkt_gettoken(&p->p_token);
438 	pg = p->p_pgrp;
439 	if (pg) {
440 		pgref(pg);
441 		lwkt_gettoken(&pg->pg_token);
442 		if (p->p_pgrp == pg) {
443 			p->p_pgrp = NULL;
444 			LIST_REMOVE(p, p_pglist);
445 			pgrel(pg);
446 		}
447 		lwkt_reltoken(&pg->pg_token);
448 		lwkt_reltoken(&p->p_token);	/* avoid chaining on rel */
449 		pgrel(pg);
450 	} else {
451 		lwkt_reltoken(&p->p_token);
452 	}
453 	return (0);
454 }
455 
456 /*
457  * Delete a process group.  Must be called only after the last ref has been
458  * released.
459  */
460 static void
461 pgdelete(struct pgrp *pgrp)
462 {
463 	/*
464 	 * Reset any sigio structures pointing to us as a result of
465 	 * F_SETOWN with our pgid.
466 	 */
467 	funsetownlst(&pgrp->pg_sigiolst);
468 
469 	if (pgrp->pg_session->s_ttyp != NULL &&
470 	    pgrp->pg_session->s_ttyp->t_pgrp == pgrp)
471 		pgrp->pg_session->s_ttyp->t_pgrp = NULL;
472 	LIST_REMOVE(pgrp, pg_hash);
473 	sess_rele(pgrp->pg_session);
474 	kfree(pgrp, M_PGRP);
475 }
476 
477 /*
478  * Adjust the ref count on a session structure.  When the ref count falls to
479  * zero the tty is disassociated from the session and the session structure
480  * is freed.  Note that tty assocation is not itself ref-counted.
481  *
482  * No requirements.
483  */
484 void
485 sess_hold(struct session *sp)
486 {
487 	lwkt_gettoken(&tty_token);
488 	++sp->s_count;
489 	lwkt_reltoken(&tty_token);
490 }
491 
492 /*
493  * No requirements.
494  */
495 void
496 sess_rele(struct session *sp)
497 {
498 	struct tty *tp;
499 
500 	KKASSERT(sp->s_count > 0);
501 	lwkt_gettoken(&tty_token);
502 	if (--sp->s_count == 0) {
503 		if (sp->s_ttyp && sp->s_ttyp->t_session) {
504 #ifdef TTY_DO_FULL_CLOSE
505 			/* FULL CLOSE, see ttyclearsession() */
506 			KKASSERT(sp->s_ttyp->t_session == sp);
507 			sp->s_ttyp->t_session = NULL;
508 #else
509 			/* HALF CLOSE, see ttyclearsession() */
510 			if (sp->s_ttyp->t_session == sp)
511 				sp->s_ttyp->t_session = NULL;
512 #endif
513 		}
514 		if ((tp = sp->s_ttyp) != NULL) {
515 			sp->s_ttyp = NULL;
516 			ttyunhold(tp);
517 		}
518 		kfree(sp, M_SESSION);
519 	}
520 	lwkt_reltoken(&tty_token);
521 }
522 
523 /*
524  * Adjust pgrp jobc counters when specified process changes process group.
525  * We count the number of processes in each process group that "qualify"
526  * the group for terminal job control (those with a parent in a different
527  * process group of the same session).  If that count reaches zero, the
528  * process group becomes orphaned.  Check both the specified process'
529  * process group and that of its children.
530  * entering == 0 => p is leaving specified group.
531  * entering == 1 => p is entering specified group.
532  *
533  * No requirements.
534  */
535 void
536 fixjobc(struct proc *p, struct pgrp *pgrp, int entering)
537 {
538 	struct pgrp *hispgrp;
539 	struct session *mysession;
540 	struct proc *np;
541 
542 	/*
543 	 * Check p's parent to see whether p qualifies its own process
544 	 * group; if so, adjust count for p's process group.
545 	 */
546 	lwkt_gettoken(&p->p_token);	/* p_children scan */
547 	lwkt_gettoken(&pgrp->pg_token);
548 
549 	mysession = pgrp->pg_session;
550 	if ((hispgrp = p->p_pptr->p_pgrp) != pgrp &&
551 	    hispgrp->pg_session == mysession) {
552 		if (entering)
553 			pgrp->pg_jobc++;
554 		else if (--pgrp->pg_jobc == 0)
555 			orphanpg(pgrp);
556 	}
557 
558 	/*
559 	 * Check this process' children to see whether they qualify
560 	 * their process groups; if so, adjust counts for children's
561 	 * process groups.
562 	 */
563 	LIST_FOREACH(np, &p->p_children, p_sibling) {
564 		PHOLD(np);
565 		lwkt_gettoken(&np->p_token);
566 		if ((hispgrp = np->p_pgrp) != pgrp &&
567 		    hispgrp->pg_session == mysession &&
568 		    np->p_stat != SZOMB) {
569 			pgref(hispgrp);
570 			lwkt_gettoken(&hispgrp->pg_token);
571 			if (entering)
572 				hispgrp->pg_jobc++;
573 			else if (--hispgrp->pg_jobc == 0)
574 				orphanpg(hispgrp);
575 			lwkt_reltoken(&hispgrp->pg_token);
576 			pgrel(hispgrp);
577 		}
578 		lwkt_reltoken(&np->p_token);
579 		PRELE(np);
580 	}
581 	KKASSERT(pgrp->pg_refs > 0);
582 	lwkt_reltoken(&pgrp->pg_token);
583 	lwkt_reltoken(&p->p_token);
584 }
585 
586 /*
587  * A process group has become orphaned;
588  * if there are any stopped processes in the group,
589  * hang-up all process in that group.
590  *
591  * The caller must hold pg_token.
592  */
593 static void
594 orphanpg(struct pgrp *pg)
595 {
596 	struct proc *p;
597 
598 	LIST_FOREACH(p, &pg->pg_members, p_pglist) {
599 		if (p->p_stat == SSTOP) {
600 			LIST_FOREACH(p, &pg->pg_members, p_pglist) {
601 				ksignal(p, SIGHUP);
602 				ksignal(p, SIGCONT);
603 			}
604 			return;
605 		}
606 	}
607 }
608 
609 /*
610  * Add a new process to the allproc list and the PID hash.  This
611  * also assigns a pid to the new process.
612  *
613  * No requirements.
614  */
615 void
616 proc_add_allproc(struct proc *p)
617 {
618 	int random_offset;
619 
620 	if ((random_offset = randompid) != 0) {
621 		get_mplock();
622 		random_offset = karc4random() % random_offset;
623 		rel_mplock();
624 	}
625 
626 	lwkt_gettoken(&proc_token);
627 	p->p_pid = proc_getnewpid_locked(random_offset);
628 	LIST_INSERT_HEAD(&allproc, p, p_list);
629 	LIST_INSERT_HEAD(PIDHASH(p->p_pid), p, p_hash);
630 	lwkt_reltoken(&proc_token);
631 }
632 
633 /*
634  * Calculate a new process pid.  This function is integrated into
635  * proc_add_allproc() to guarentee that the new pid is not reused before
636  * the new process can be added to the allproc list.
637  *
638  * The caller must hold proc_token.
639  */
640 static
641 pid_t
642 proc_getnewpid_locked(int random_offset)
643 {
644 	static pid_t nextpid;
645 	static pid_t pidchecked;
646 	struct proc *p;
647 
648 	/*
649 	 * Find an unused process ID.  We remember a range of unused IDs
650 	 * ready to use (from nextpid+1 through pidchecked-1).
651 	 */
652 	nextpid = nextpid + 1 + random_offset;
653 retry:
654 	/*
655 	 * If the process ID prototype has wrapped around,
656 	 * restart somewhat above 0, as the low-numbered procs
657 	 * tend to include daemons that don't exit.
658 	 */
659 	if (nextpid >= PID_MAX) {
660 		nextpid = nextpid % PID_MAX;
661 		if (nextpid < 100)
662 			nextpid += 100;
663 		pidchecked = 0;
664 	}
665 	if (nextpid >= pidchecked) {
666 		int doingzomb = 0;
667 
668 		pidchecked = PID_MAX;
669 
670 		/*
671 		 * Scan the active and zombie procs to check whether this pid
672 		 * is in use.  Remember the lowest pid that's greater
673 		 * than nextpid, so we can avoid checking for a while.
674 		 *
675 		 * NOTE: Processes in the midst of being forked may not
676 		 *	 yet have p_pgrp and p_pgrp->pg_session set up
677 		 *	 yet, so we have to check for NULL.
678 		 *
679 		 *	 Processes being torn down should be interlocked
680 		 *	 with proc_token prior to the clearing of their
681 		 *	 p_pgrp.
682 		 */
683 		p = LIST_FIRST(&allproc);
684 again:
685 		for (; p != NULL; p = LIST_NEXT(p, p_list)) {
686 			while (p->p_pid == nextpid ||
687 			    (p->p_pgrp && p->p_pgrp->pg_id == nextpid) ||
688 			    (p->p_pgrp && p->p_session &&
689 			     p->p_session->s_sid == nextpid)) {
690 				nextpid++;
691 				if (nextpid >= pidchecked)
692 					goto retry;
693 			}
694 			if (p->p_pid > nextpid && pidchecked > p->p_pid)
695 				pidchecked = p->p_pid;
696 			if (p->p_pgrp &&
697 			    p->p_pgrp->pg_id > nextpid &&
698 			    pidchecked > p->p_pgrp->pg_id) {
699 				pidchecked = p->p_pgrp->pg_id;
700 			}
701 			if (p->p_pgrp && p->p_session &&
702 			    p->p_session->s_sid > nextpid &&
703 			    pidchecked > p->p_session->s_sid) {
704 				pidchecked = p->p_session->s_sid;
705 			}
706 		}
707 		if (!doingzomb) {
708 			doingzomb = 1;
709 			p = LIST_FIRST(&zombproc);
710 			goto again;
711 		}
712 	}
713 	return(nextpid);
714 }
715 
716 /*
717  * Called from exit1 to remove a process from the allproc
718  * list and move it to the zombie list.
719  *
720  * Caller must hold p->p_token.  We are required to wait until p_lock
721  * becomes zero before we can manipulate the list, allowing allproc
722  * scans to guarantee consistency during a list scan.
723  */
724 void
725 proc_move_allproc_zombie(struct proc *p)
726 {
727 	lwkt_gettoken(&proc_token);
728 	PSTALL(p, "reap1", 0);
729 	LIST_REMOVE(p, p_list);
730 	LIST_INSERT_HEAD(&zombproc, p, p_list);
731 	LIST_REMOVE(p, p_hash);
732 	p->p_stat = SZOMB;
733 	lwkt_reltoken(&proc_token);
734 	dsched_exit_proc(p);
735 }
736 
737 /*
738  * This routine is called from kern_wait() and will remove the process
739  * from the zombie list and the sibling list.  This routine will block
740  * if someone has a lock on the proces (p_lock).
741  *
742  * Caller must hold p->p_token.  We are required to wait until p_lock
743  * becomes zero before we can manipulate the list, allowing allproc
744  * scans to guarantee consistency during a list scan.
745  */
746 void
747 proc_remove_zombie(struct proc *p)
748 {
749 	lwkt_gettoken(&proc_token);
750 	PSTALL(p, "reap2", 0);
751 	LIST_REMOVE(p, p_list); /* off zombproc */
752 	LIST_REMOVE(p, p_sibling);
753 	lwkt_reltoken(&proc_token);
754 }
755 
756 /*
757  * Scan all processes on the allproc list.  The process is automatically
758  * held for the callback.  A return value of -1 terminates the loop.
759  *
760  * The callback is made with the process held and proc_token held.
761  *
762  * We limit the scan to the number of processes as-of the start of
763  * the scan so as not to get caught up in an endless loop if new processes
764  * are created more quickly than we can scan the old ones.  Add a little
765  * slop to try to catch edge cases since nprocs can race.
766  *
767  * No requirements.
768  */
769 void
770 allproc_scan(int (*callback)(struct proc *, void *), void *data)
771 {
772 	struct proc *p;
773 	int r;
774 	int limit = nprocs + ncpus;
775 
776 	/*
777 	 * proc_token protects the allproc list and PHOLD() prevents the
778 	 * process from being removed from the allproc list or the zombproc
779 	 * list.
780 	 */
781 	lwkt_gettoken(&proc_token);
782 	LIST_FOREACH(p, &allproc, p_list) {
783 		PHOLD(p);
784 		r = callback(p, data);
785 		PRELE(p);
786 		if (r < 0)
787 			break;
788 		if (--limit < 0)
789 			break;
790 	}
791 	lwkt_reltoken(&proc_token);
792 }
793 
794 /*
795  * Scan all lwps of processes on the allproc list.  The lwp is automatically
796  * held for the callback.  A return value of -1 terminates the loop.
797  *
798  * The callback is made with the proces and lwp both held, and proc_token held.
799  *
800  * No requirements.
801  */
802 void
803 alllwp_scan(int (*callback)(struct lwp *, void *), void *data)
804 {
805 	struct proc *p;
806 	struct lwp *lp;
807 	int r = 0;
808 
809 	/*
810 	 * proc_token protects the allproc list and PHOLD() prevents the
811 	 * process from being removed from the allproc list or the zombproc
812 	 * list.
813 	 */
814 	lwkt_gettoken(&proc_token);
815 	LIST_FOREACH(p, &allproc, p_list) {
816 		PHOLD(p);
817 		FOREACH_LWP_IN_PROC(lp, p) {
818 			LWPHOLD(lp);
819 			r = callback(lp, data);
820 			LWPRELE(lp);
821 		}
822 		PRELE(p);
823 		if (r < 0)
824 			break;
825 	}
826 	lwkt_reltoken(&proc_token);
827 }
828 
829 /*
830  * Scan all processes on the zombproc list.  The process is automatically
831  * held for the callback.  A return value of -1 terminates the loop.
832  *
833  * No requirements.
834  * The callback is made with the proces held and proc_token held.
835  */
836 void
837 zombproc_scan(int (*callback)(struct proc *, void *), void *data)
838 {
839 	struct proc *p;
840 	int r;
841 
842 	lwkt_gettoken(&proc_token);
843 	LIST_FOREACH(p, &zombproc, p_list) {
844 		PHOLD(p);
845 		r = callback(p, data);
846 		PRELE(p);
847 		if (r < 0)
848 			break;
849 	}
850 	lwkt_reltoken(&proc_token);
851 }
852 
853 #include "opt_ddb.h"
854 #ifdef DDB
855 #include <ddb/ddb.h>
856 
857 /*
858  * Debugging only
859  */
860 DB_SHOW_COMMAND(pgrpdump, pgrpdump)
861 {
862 	struct pgrp *pgrp;
863 	struct proc *p;
864 	int i;
865 
866 	for (i = 0; i <= pgrphash; i++) {
867 		if (!LIST_EMPTY(&pgrphashtbl[i])) {
868 			kprintf("\tindx %d\n", i);
869 			LIST_FOREACH(pgrp, &pgrphashtbl[i], pg_hash) {
870 				kprintf(
871 			"\tpgrp %p, pgid %ld, sess %p, sesscnt %d, mem %p\n",
872 				    (void *)pgrp, (long)pgrp->pg_id,
873 				    (void *)pgrp->pg_session,
874 				    pgrp->pg_session->s_count,
875 				    (void *)LIST_FIRST(&pgrp->pg_members));
876 				LIST_FOREACH(p, &pgrp->pg_members, p_pglist) {
877 					kprintf("\t\tpid %ld addr %p pgrp %p\n",
878 					    (long)p->p_pid, (void *)p,
879 					    (void *)p->p_pgrp);
880 				}
881 			}
882 		}
883 	}
884 }
885 #endif /* DDB */
886 
887 /*
888  * Locate a process on the zombie list.  Return a process or NULL.
889  * The returned process will be referenced and the caller must release
890  * it with PRELE().
891  *
892  * No other requirements.
893  */
894 struct proc *
895 zpfind(pid_t pid)
896 {
897 	struct proc *p;
898 
899 	lwkt_gettoken(&proc_token);
900 	LIST_FOREACH(p, &zombproc, p_list) {
901 		if (p->p_pid == pid) {
902 			PHOLD(p);
903 			lwkt_reltoken(&proc_token);
904 			return (p);
905 		}
906 	}
907 	lwkt_reltoken(&proc_token);
908 	return (NULL);
909 }
910 
911 /*
912  * The caller must hold proc_token.
913  */
914 static int
915 sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
916 {
917 	struct kinfo_proc ki;
918 	struct lwp *lp;
919 	int skp = 0, had_output = 0;
920 	int error;
921 
922 	bzero(&ki, sizeof(ki));
923 	lwkt_gettoken(&p->p_token);
924 	fill_kinfo_proc(p, &ki);
925 	if ((flags & KERN_PROC_FLAG_LWP) == 0)
926 		skp = 1;
927 	error = 0;
928 	FOREACH_LWP_IN_PROC(lp, p) {
929 		LWPHOLD(lp);
930 		fill_kinfo_lwp(lp, &ki.kp_lwp);
931 		had_output = 1;
932 		error = SYSCTL_OUT(req, &ki, sizeof(ki));
933 		LWPRELE(lp);
934 		if (error)
935 			break;
936 		if (skp)
937 			break;
938 	}
939 	lwkt_reltoken(&p->p_token);
940 	/* We need to output at least the proc, even if there is no lwp. */
941 	if (had_output == 0) {
942 		error = SYSCTL_OUT(req, &ki, sizeof(ki));
943 	}
944 	return (error);
945 }
946 
947 /*
948  * The caller must hold proc_token.
949  */
950 static int
951 sysctl_out_proc_kthread(struct thread *td, struct sysctl_req *req, int flags)
952 {
953 	struct kinfo_proc ki;
954 	int error;
955 
956 	fill_kinfo_proc_kthread(td, &ki);
957 	error = SYSCTL_OUT(req, &ki, sizeof(ki));
958 	if (error)
959 		return error;
960 	return(0);
961 }
962 
963 /*
964  * No requirements.
965  */
966 static int
967 sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
968 {
969 	int *name = (int*) arg1;
970 	int oid = oidp->oid_number;
971 	u_int namelen = arg2;
972 	struct proc *p;
973 	struct proclist *plist;
974 	struct thread *td;
975 	struct thread *marker;
976 	int doingzomb, flags = 0;
977 	int error = 0;
978 	int n;
979 	int origcpu;
980 	struct ucred *cr1 = curproc->p_ucred;
981 
982 	flags = oid & KERN_PROC_FLAGMASK;
983 	oid &= ~KERN_PROC_FLAGMASK;
984 
985 	if ((oid == KERN_PROC_ALL && namelen != 0) ||
986 	    (oid != KERN_PROC_ALL && namelen != 1)) {
987 		return (EINVAL);
988 	}
989 
990 	/*
991 	 * proc_token protects the allproc list and PHOLD() prevents the
992 	 * process from being removed from the allproc list or the zombproc
993 	 * list.
994 	 */
995 	lwkt_gettoken(&proc_token);
996 	if (oid == KERN_PROC_PID) {
997 		p = pfindn((pid_t)name[0]);
998 		if (p == NULL)
999 			goto post_threads;
1000 		if (!PRISON_CHECK(cr1, p->p_ucred))
1001 			goto post_threads;
1002 		PHOLD(p);
1003 		error = sysctl_out_proc(p, req, flags);
1004 		PRELE(p);
1005 		goto post_threads;
1006 	}
1007 
1008 	if (!req->oldptr) {
1009 		/* overestimate by 5 procs */
1010 		error = SYSCTL_OUT(req, 0, sizeof (struct kinfo_proc) * 5);
1011 		if (error)
1012 			goto post_threads;
1013 	}
1014 	for (doingzomb = 0; doingzomb <= 1; doingzomb++) {
1015 		if (doingzomb)
1016 			plist = &zombproc;
1017 		else
1018 			plist = &allproc;
1019 		LIST_FOREACH(p, plist, p_list) {
1020 			/*
1021 			 * Show a user only their processes.
1022 			 */
1023 			if ((!ps_showallprocs) && p_trespass(cr1, p->p_ucred))
1024 				continue;
1025 			/*
1026 			 * Skip embryonic processes.
1027 			 */
1028 			if (p->p_stat == SIDL)
1029 				continue;
1030 			/*
1031 			 * TODO - make more efficient (see notes below).
1032 			 * do by session.
1033 			 */
1034 			switch (oid) {
1035 			case KERN_PROC_PGRP:
1036 				/* could do this by traversing pgrp */
1037 				if (p->p_pgrp == NULL ||
1038 				    p->p_pgrp->pg_id != (pid_t)name[0])
1039 					continue;
1040 				break;
1041 
1042 			case KERN_PROC_TTY:
1043 				if ((p->p_flags & P_CONTROLT) == 0 ||
1044 				    p->p_session == NULL ||
1045 				    p->p_session->s_ttyp == NULL ||
1046 				    dev2udev(p->p_session->s_ttyp->t_dev) !=
1047 					(udev_t)name[0])
1048 					continue;
1049 				break;
1050 
1051 			case KERN_PROC_UID:
1052 				if (p->p_ucred == NULL ||
1053 				    p->p_ucred->cr_uid != (uid_t)name[0])
1054 					continue;
1055 				break;
1056 
1057 			case KERN_PROC_RUID:
1058 				if (p->p_ucred == NULL ||
1059 				    p->p_ucred->cr_ruid != (uid_t)name[0])
1060 					continue;
1061 				break;
1062 			}
1063 
1064 			if (!PRISON_CHECK(cr1, p->p_ucred))
1065 				continue;
1066 			PHOLD(p);
1067 			error = sysctl_out_proc(p, req, flags);
1068 			PRELE(p);
1069 			if (error)
1070 				goto post_threads;
1071 		}
1072 	}
1073 
1074 	/*
1075 	 * Iterate over all active cpus and scan their thread list.  Start
1076 	 * with the next logical cpu and end with our original cpu.  We
1077 	 * migrate our own thread to each target cpu in order to safely scan
1078 	 * its thread list.  In the last loop we migrate back to our original
1079 	 * cpu.
1080 	 */
1081 	origcpu = mycpu->gd_cpuid;
1082 	if (!ps_showallthreads || jailed(cr1))
1083 		goto post_threads;
1084 
1085 	marker = kmalloc(sizeof(struct thread), M_TEMP, M_WAITOK|M_ZERO);
1086 	marker->td_flags = TDF_MARKER;
1087 	error = 0;
1088 
1089 	for (n = 1; n <= ncpus; ++n) {
1090 		globaldata_t rgd;
1091 		int nid;
1092 
1093 		nid = (origcpu + n) % ncpus;
1094 		if ((smp_active_mask & CPUMASK(nid)) == 0)
1095 			continue;
1096 		rgd = globaldata_find(nid);
1097 		lwkt_setcpu_self(rgd);
1098 
1099 		crit_enter();
1100 		TAILQ_INSERT_TAIL(&rgd->gd_tdallq, marker, td_allq);
1101 
1102 		while ((td = TAILQ_PREV(marker, lwkt_queue, td_allq)) != NULL) {
1103 			TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq);
1104 			TAILQ_INSERT_BEFORE(td, marker, td_allq);
1105 			if (td->td_flags & TDF_MARKER)
1106 				continue;
1107 			if (td->td_proc)
1108 				continue;
1109 
1110 			lwkt_hold(td);
1111 			crit_exit();
1112 
1113 			switch (oid) {
1114 			case KERN_PROC_PGRP:
1115 			case KERN_PROC_TTY:
1116 			case KERN_PROC_UID:
1117 			case KERN_PROC_RUID:
1118 				break;
1119 			default:
1120 				error = sysctl_out_proc_kthread(td, req,
1121 								doingzomb);
1122 				break;
1123 			}
1124 			lwkt_rele(td);
1125 			crit_enter();
1126 			if (error)
1127 				break;
1128 		}
1129 		TAILQ_REMOVE(&rgd->gd_tdallq, marker, td_allq);
1130 		crit_exit();
1131 
1132 		if (error)
1133 			break;
1134 	}
1135 	kfree(marker, M_TEMP);
1136 
1137 post_threads:
1138 	lwkt_reltoken(&proc_token);
1139 	return (error);
1140 }
1141 
1142 /*
1143  * This sysctl allows a process to retrieve the argument list or process
1144  * title for another process without groping around in the address space
1145  * of the other process.  It also allow a process to set its own "process
1146  * title to a string of its own choice.
1147  *
1148  * No requirements.
1149  */
1150 static int
1151 sysctl_kern_proc_args(SYSCTL_HANDLER_ARGS)
1152 {
1153 	int *name = (int*) arg1;
1154 	u_int namelen = arg2;
1155 	struct proc *p;
1156 	struct pargs *opa;
1157 	struct pargs *pa;
1158 	int error = 0;
1159 	struct ucred *cr1 = curproc->p_ucred;
1160 
1161 	if (namelen != 1)
1162 		return (EINVAL);
1163 
1164 	p = pfind((pid_t)name[0]);
1165 	if (p == NULL)
1166 		goto done;
1167 	lwkt_gettoken(&p->p_token);
1168 
1169 	if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred))
1170 		goto done;
1171 
1172 	if (req->newptr && curproc != p) {
1173 		error = EPERM;
1174 		goto done;
1175 	}
1176 	if (req->oldptr && (pa = p->p_args) != NULL) {
1177 		refcount_acquire(&pa->ar_ref);
1178 		error = SYSCTL_OUT(req, pa->ar_args, pa->ar_length);
1179 		if (refcount_release(&pa->ar_ref))
1180 			kfree(pa, M_PARGS);
1181 	}
1182 	if (req->newptr == NULL)
1183 		goto done;
1184 
1185 	if (req->newlen + sizeof(struct pargs) > ps_arg_cache_limit) {
1186 		goto done;
1187 	}
1188 
1189 	pa = kmalloc(sizeof(struct pargs) + req->newlen, M_PARGS, M_WAITOK);
1190 	refcount_init(&pa->ar_ref, 1);
1191 	pa->ar_length = req->newlen;
1192 	error = SYSCTL_IN(req, pa->ar_args, req->newlen);
1193 	if (error) {
1194 		kfree(pa, M_PARGS);
1195 		goto done;
1196 	}
1197 
1198 
1199 	/*
1200 	 * Replace p_args with the new pa.  p_args may have previously
1201 	 * been NULL.
1202 	 */
1203 	opa = p->p_args;
1204 	p->p_args = pa;
1205 
1206 	if (opa) {
1207 		KKASSERT(opa->ar_ref > 0);
1208 		if (refcount_release(&opa->ar_ref)) {
1209 			kfree(opa, M_PARGS);
1210 			/* opa = NULL; */
1211 		}
1212 	}
1213 done:
1214 	if (p) {
1215 		lwkt_reltoken(&p->p_token);
1216 		PRELE(p);
1217 	}
1218 	return (error);
1219 }
1220 
1221 static int
1222 sysctl_kern_proc_cwd(SYSCTL_HANDLER_ARGS)
1223 {
1224 	int *name = (int*) arg1;
1225 	u_int namelen = arg2;
1226 	struct proc *p;
1227 	int error = 0;
1228 	char *fullpath, *freepath;
1229 	struct ucred *cr1 = curproc->p_ucred;
1230 
1231 	if (namelen != 1)
1232 		return (EINVAL);
1233 
1234 	p = pfind((pid_t)name[0]);
1235 	if (p == NULL)
1236 		goto done;
1237 	lwkt_gettoken(&p->p_token);
1238 
1239 	/*
1240 	 * If we are not allowed to see other args, we certainly shouldn't
1241 	 * get the cwd either. Also check the usual trespassing.
1242 	 */
1243 	if ((!ps_argsopen) && p_trespass(cr1, p->p_ucred))
1244 		goto done;
1245 
1246 	if (req->oldptr && p->p_fd != NULL && p->p_fd->fd_ncdir.ncp) {
1247 		struct nchandle nch;
1248 
1249 		cache_copy(&p->p_fd->fd_ncdir, &nch);
1250 		error = cache_fullpath(p, &nch, &fullpath, &freepath, 0);
1251 		cache_drop(&nch);
1252 		if (error)
1253 			goto done;
1254 		error = SYSCTL_OUT(req, fullpath, strlen(fullpath) + 1);
1255 		kfree(freepath, M_TEMP);
1256 	}
1257 
1258 done:
1259 	if (p) {
1260 		lwkt_reltoken(&p->p_token);
1261 		PRELE(p);
1262 	}
1263 	return (error);
1264 }
1265 
1266 SYSCTL_NODE(_kern, KERN_PROC, proc, CTLFLAG_RD,  0, "Process table");
1267 
1268 SYSCTL_PROC(_kern_proc, KERN_PROC_ALL, all, CTLFLAG_RD|CTLTYPE_STRUCT,
1269 	0, 0, sysctl_kern_proc, "S,proc", "Return entire process table");
1270 
1271 SYSCTL_NODE(_kern_proc, KERN_PROC_PGRP, pgrp, CTLFLAG_RD,
1272 	sysctl_kern_proc, "Process table");
1273 
1274 SYSCTL_NODE(_kern_proc, KERN_PROC_TTY, tty, CTLFLAG_RD,
1275 	sysctl_kern_proc, "Process table");
1276 
1277 SYSCTL_NODE(_kern_proc, KERN_PROC_UID, uid, CTLFLAG_RD,
1278 	sysctl_kern_proc, "Process table");
1279 
1280 SYSCTL_NODE(_kern_proc, KERN_PROC_RUID, ruid, CTLFLAG_RD,
1281 	sysctl_kern_proc, "Process table");
1282 
1283 SYSCTL_NODE(_kern_proc, KERN_PROC_PID, pid, CTLFLAG_RD,
1284 	sysctl_kern_proc, "Process table");
1285 
1286 SYSCTL_NODE(_kern_proc, (KERN_PROC_ALL | KERN_PROC_FLAG_LWP), all_lwp, CTLFLAG_RD,
1287 	sysctl_kern_proc, "Process table");
1288 
1289 SYSCTL_NODE(_kern_proc, (KERN_PROC_PGRP | KERN_PROC_FLAG_LWP), pgrp_lwp, CTLFLAG_RD,
1290 	sysctl_kern_proc, "Process table");
1291 
1292 SYSCTL_NODE(_kern_proc, (KERN_PROC_TTY | KERN_PROC_FLAG_LWP), tty_lwp, CTLFLAG_RD,
1293 	sysctl_kern_proc, "Process table");
1294 
1295 SYSCTL_NODE(_kern_proc, (KERN_PROC_UID | KERN_PROC_FLAG_LWP), uid_lwp, CTLFLAG_RD,
1296 	sysctl_kern_proc, "Process table");
1297 
1298 SYSCTL_NODE(_kern_proc, (KERN_PROC_RUID | KERN_PROC_FLAG_LWP), ruid_lwp, CTLFLAG_RD,
1299 	sysctl_kern_proc, "Process table");
1300 
1301 SYSCTL_NODE(_kern_proc, (KERN_PROC_PID | KERN_PROC_FLAG_LWP), pid_lwp, CTLFLAG_RD,
1302 	sysctl_kern_proc, "Process table");
1303 
1304 SYSCTL_NODE(_kern_proc, KERN_PROC_ARGS, args, CTLFLAG_RW | CTLFLAG_ANYBODY,
1305 	sysctl_kern_proc_args, "Process argument list");
1306 
1307 SYSCTL_NODE(_kern_proc, KERN_PROC_CWD, cwd, CTLFLAG_RD | CTLFLAG_ANYBODY,
1308 	sysctl_kern_proc_cwd, "Process argument list");
1309