xref: /illumos-gate/usr/src/uts/common/fs/proc/prcontrol.c (revision 179c3dac)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <sys/types.h>
28 #include <sys/uio.h>
29 #include <sys/param.h>
30 #include <sys/cmn_err.h>
31 #include <sys/cred.h>
32 #include <sys/policy.h>
33 #include <sys/debug.h>
34 #include <sys/errno.h>
35 #include <sys/file.h>
36 #include <sys/inline.h>
37 #include <sys/kmem.h>
38 #include <sys/proc.h>
39 #include <sys/regset.h>
40 #include <sys/sysmacros.h>
41 #include <sys/systm.h>
42 #include <sys/vfs.h>
43 #include <sys/vnode.h>
44 #include <sys/signal.h>
45 #include <sys/auxv.h>
46 #include <sys/user.h>
47 #include <sys/class.h>
48 #include <sys/fault.h>
49 #include <sys/syscall.h>
50 #include <sys/procfs.h>
51 #include <sys/zone.h>
52 #include <sys/copyops.h>
53 #include <sys/schedctl.h>
54 #include <vm/as.h>
55 #include <vm/seg.h>
56 #include <fs/proc/prdata.h>
57 #include <sys/contract/process_impl.h>
58 
59 static	void	pr_settrace(proc_t *, sigset_t *);
60 static	int	pr_setfpregs(prnode_t *, prfpregset_t *);
61 #if defined(__sparc)
62 static	int	pr_setxregs(prnode_t *, prxregset_t *);
63 static	int	pr_setasrs(prnode_t *, asrset_t);
64 #endif
65 static	int	pr_setvaddr(prnode_t *, caddr_t);
66 static	int	pr_clearsig(prnode_t *);
67 static	int	pr_clearflt(prnode_t *);
68 static	int	pr_watch(prnode_t *, prwatch_t *, int *);
69 static	int	pr_agent(prnode_t *, prgregset_t, int *);
70 static	int	pr_rdwr(proc_t *, enum uio_rw, priovec_t *);
71 static	int	pr_scred(proc_t *, prcred_t *, cred_t *, boolean_t);
72 static	int	pr_spriv(proc_t *, prpriv_t *, cred_t *);
73 static	int	pr_szoneid(proc_t *, zoneid_t, cred_t *);
74 static	void	pauselwps(proc_t *);
75 static	void	unpauselwps(proc_t *);
76 
77 typedef union {
78 	long		sig;		/* PCKILL, PCUNKILL */
79 	long		nice;		/* PCNICE */
80 	long		timeo;		/* PCTWSTOP */
81 	ulong_t		flags;		/* PCRUN, PCSET, PCUNSET */
82 	caddr_t		vaddr;		/* PCSVADDR */
83 	siginfo_t	siginfo;	/* PCSSIG */
84 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
85 	fltset_t	fltset;		/* PCSFAULT */
86 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
87 	prgregset_t	prgregset;	/* PCSREG, PCAGENT */
88 	prfpregset_t	prfpregset;	/* PCSFPREG */
89 #if defined(__sparc)
90 	prxregset_t	prxregset;	/* PCSXREG */
91 	asrset_t	asrset;		/* PCSASRS */
92 #endif
93 	prwatch_t	prwatch;	/* PCWATCH */
94 	priovec_t	priovec;	/* PCREAD, PCWRITE */
95 	prcred_t	prcred;		/* PCSCRED */
96 	prpriv_t	prpriv;		/* PCSPRIV */
97 	long		przoneid;	/* PCSZONE */
98 } arg_t;
99 
100 static	int	pr_control(long, arg_t *, prnode_t *, cred_t *);
101 
102 static size_t
103 ctlsize(long cmd, size_t resid, arg_t *argp)
104 {
105 	size_t size = sizeof (long);
106 	size_t rnd;
107 	int ngrp;
108 
109 	switch (cmd) {
110 	case PCNULL:
111 	case PCSTOP:
112 	case PCDSTOP:
113 	case PCWSTOP:
114 	case PCCSIG:
115 	case PCCFAULT:
116 		break;
117 	case PCSSIG:
118 		size += sizeof (siginfo_t);
119 		break;
120 	case PCTWSTOP:
121 		size += sizeof (long);
122 		break;
123 	case PCKILL:
124 	case PCUNKILL:
125 	case PCNICE:
126 		size += sizeof (long);
127 		break;
128 	case PCRUN:
129 	case PCSET:
130 	case PCUNSET:
131 		size += sizeof (ulong_t);
132 		break;
133 	case PCSVADDR:
134 		size += sizeof (caddr_t);
135 		break;
136 	case PCSTRACE:
137 	case PCSHOLD:
138 		size += sizeof (sigset_t);
139 		break;
140 	case PCSFAULT:
141 		size += sizeof (fltset_t);
142 		break;
143 	case PCSENTRY:
144 	case PCSEXIT:
145 		size += sizeof (sysset_t);
146 		break;
147 	case PCSREG:
148 	case PCAGENT:
149 		size += sizeof (prgregset_t);
150 		break;
151 	case PCSFPREG:
152 		size += sizeof (prfpregset_t);
153 		break;
154 #if defined(__sparc)
155 	case PCSXREG:
156 		size += sizeof (prxregset_t);
157 		break;
158 	case PCSASRS:
159 		size += sizeof (asrset_t);
160 		break;
161 #endif
162 	case PCWATCH:
163 		size += sizeof (prwatch_t);
164 		break;
165 	case PCREAD:
166 	case PCWRITE:
167 		size += sizeof (priovec_t);
168 		break;
169 	case PCSCRED:
170 		size += sizeof (prcred_t);
171 		break;
172 	case PCSCREDX:
173 		/*
174 		 * We cannot derefence the pr_ngroups fields if it
175 		 * we don't have enough data.
176 		 */
177 		if (resid < size + sizeof (prcred_t) - sizeof (gid_t))
178 			return (0);
179 		ngrp = argp->prcred.pr_ngroups;
180 		if (ngrp < 0 || ngrp > ngroups_max)
181 			return (0);
182 
183 		/* The result can be smaller than sizeof (prcred_t) */
184 		size += sizeof (prcred_t) - sizeof (gid_t);
185 		size += ngrp * sizeof (gid_t);
186 		break;
187 	case PCSPRIV:
188 		if (resid >= size + sizeof (prpriv_t))
189 			size += priv_prgetprivsize(&argp->prpriv);
190 		else
191 			return (0);
192 		break;
193 	case PCSZONE:
194 		size += sizeof (long);
195 		break;
196 	default:
197 		return (0);
198 	}
199 
200 	/* Round up to a multiple of long, unless exact amount written */
201 	if (size < resid) {
202 		rnd = size & (sizeof (long) - 1);
203 
204 		if (rnd != 0)
205 			size += sizeof (long) - rnd;
206 	}
207 
208 	if (size > resid)
209 		return (0);
210 	return (size);
211 }
212 
213 /*
214  * Control operations (lots).
215  */
216 int
217 prwritectl(vnode_t *vp, uio_t *uiop, cred_t *cr)
218 {
219 #define	MY_BUFFER_SIZE \
220 		100 > 1 + sizeof (arg_t) / sizeof (long) ? \
221 		100 : 1 + sizeof (arg_t) / sizeof (long)
222 	long buf[MY_BUFFER_SIZE];
223 	long *bufp;
224 	size_t resid = 0;
225 	size_t size;
226 	prnode_t *pnp = VTOP(vp);
227 	int error;
228 	int locked = 0;
229 
230 	while (uiop->uio_resid) {
231 		/*
232 		 * Read several commands in one gulp.
233 		 */
234 		bufp = buf;
235 		if (resid) {	/* move incomplete command to front of buffer */
236 			long *tail;
237 
238 			if (resid >= sizeof (buf))
239 				break;
240 			tail = (long *)((char *)buf + sizeof (buf) - resid);
241 			do {
242 				*bufp++ = *tail++;
243 			} while ((resid -= sizeof (long)) != 0);
244 		}
245 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
246 		if (resid > uiop->uio_resid)
247 			resid = uiop->uio_resid;
248 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
249 			return (error);
250 		resid += (char *)bufp - (char *)buf;
251 		bufp = buf;
252 
253 		do {		/* loop over commands in buffer */
254 			long cmd = bufp[0];
255 			arg_t *argp = (arg_t *)&bufp[1];
256 
257 			size = ctlsize(cmd, resid, argp);
258 			if (size == 0) {  /* incomplete or invalid command */
259 				if (locked) {
260 					prunlock(pnp);
261 					locked = 0;
262 				}
263 				return (resid? EINVAL : 0);
264 			}
265 			/*
266 			 * Perform the specified control operation.
267 			 */
268 			if (!locked) {
269 				if ((error = prlock(pnp, ZNO)) != 0)
270 					return (error);
271 				locked = 1;
272 			}
273 			if (error = pr_control(cmd, argp, pnp, cr)) {
274 				if (error == -1)	/* -1 is timeout */
275 					locked = 0;
276 				else
277 					return (error);
278 			}
279 			bufp = (long *)((char *)bufp + size);
280 		} while ((resid -= size) != 0);
281 
282 		if (locked) {
283 			prunlock(pnp);
284 			locked = 0;
285 		}
286 	}
287 	return (resid? EINVAL : 0);
288 }
289 
290 static int
291 pr_control(long cmd, arg_t *argp, prnode_t *pnp, cred_t *cr)
292 {
293 	prcommon_t *pcp;
294 	proc_t *p;
295 	int unlocked;
296 	int error = 0;
297 
298 	if (cmd == PCNULL)
299 		return (0);
300 
301 	pcp = pnp->pr_common;
302 	p = pcp->prc_proc;
303 	ASSERT(p != NULL);
304 
305 	switch (cmd) {
306 
307 	default:
308 		error = EINVAL;
309 		break;
310 
311 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
312 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
313 	case PCWSTOP:	/* wait for process or lwp to stop */
314 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
315 		{
316 			time_t timeo;
317 
318 			/*
319 			 * Can't apply to a system process.
320 			 */
321 			if ((p->p_flag & SSYS) || p->p_as == &kas) {
322 				error = EBUSY;
323 				break;
324 			}
325 
326 			if (cmd == PCSTOP || cmd == PCDSTOP)
327 				pr_stop(pnp);
328 
329 			if (cmd == PCDSTOP)
330 				break;
331 
332 			/*
333 			 * If an lwp is waiting for itself or its process,
334 			 * don't wait. The stopped lwp would never see the
335 			 * fact that it is stopped.
336 			 */
337 			if ((pcp->prc_flags & PRC_LWP)?
338 			    (pcp->prc_thread == curthread) : (p == curproc)) {
339 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
340 					error = EBUSY;
341 				break;
342 			}
343 
344 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
345 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
346 				return (error);
347 
348 			break;
349 		}
350 
351 	case PCRUN:	/* make lwp or process runnable */
352 		error = pr_setrun(pnp, argp->flags);
353 		break;
354 
355 	case PCSTRACE:	/* set signal trace mask */
356 		pr_settrace(p,  &argp->sigset);
357 		break;
358 
359 	case PCSSIG:	/* set current signal */
360 		error = pr_setsig(pnp, &argp->siginfo);
361 		if (argp->siginfo.si_signo == SIGKILL && error == 0) {
362 			prunlock(pnp);
363 			pr_wait_die(pnp);
364 			return (-1);
365 		}
366 		break;
367 
368 	case PCKILL:	/* send signal */
369 		error = pr_kill(pnp, (int)argp->sig, cr);
370 		if (error == 0 && argp->sig == SIGKILL) {
371 			prunlock(pnp);
372 			pr_wait_die(pnp);
373 			return (-1);
374 		}
375 		break;
376 
377 	case PCUNKILL:	/* delete a pending signal */
378 		error = pr_unkill(pnp, (int)argp->sig);
379 		break;
380 
381 	case PCNICE:	/* set nice priority */
382 		error = pr_nice(p, (int)argp->nice, cr);
383 		break;
384 
385 	case PCSENTRY:	/* set syscall entry bit mask */
386 	case PCSEXIT:	/* set syscall exit bit mask */
387 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
388 		break;
389 
390 	case PCSET:	/* set process flags */
391 		error = pr_set(p, argp->flags);
392 		break;
393 
394 	case PCUNSET:	/* unset process flags */
395 		error = pr_unset(p, argp->flags);
396 		break;
397 
398 	case PCSREG:	/* set general registers */
399 		{
400 			kthread_t *t = pr_thread(pnp);
401 
402 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
403 				thread_unlock(t);
404 				error = EBUSY;
405 			} else {
406 				thread_unlock(t);
407 				mutex_exit(&p->p_lock);
408 				prsetprregs(ttolwp(t), argp->prgregset, 0);
409 				mutex_enter(&p->p_lock);
410 			}
411 			break;
412 		}
413 
414 	case PCSFPREG:	/* set floating-point registers */
415 		error = pr_setfpregs(pnp, &argp->prfpregset);
416 		break;
417 
418 	case PCSXREG:	/* set extra registers */
419 #if defined(__sparc)
420 		error = pr_setxregs(pnp, &argp->prxregset);
421 #else
422 		error = EINVAL;
423 #endif
424 		break;
425 
426 #if defined(__sparc)
427 	case PCSASRS:	/* set ancillary state registers */
428 		error = pr_setasrs(pnp, argp->asrset);
429 		break;
430 #endif
431 
432 	case PCSVADDR:	/* set virtual address at which to resume */
433 		error = pr_setvaddr(pnp, argp->vaddr);
434 		break;
435 
436 	case PCSHOLD:	/* set signal-hold mask */
437 		pr_sethold(pnp, &argp->sigset);
438 		break;
439 
440 	case PCSFAULT:	/* set mask of traced faults */
441 		pr_setfault(p, &argp->fltset);
442 		break;
443 
444 	case PCCSIG:	/* clear current signal */
445 		error = pr_clearsig(pnp);
446 		break;
447 
448 	case PCCFAULT:	/* clear current fault */
449 		error = pr_clearflt(pnp);
450 		break;
451 
452 	case PCWATCH:	/* set or clear watched areas */
453 		error = pr_watch(pnp, &argp->prwatch, &unlocked);
454 		if (error && unlocked)
455 			return (error);
456 		break;
457 
458 	case PCAGENT:	/* create the /proc agent lwp in the target process */
459 		error = pr_agent(pnp, argp->prgregset, &unlocked);
460 		if (error && unlocked)
461 			return (error);
462 		break;
463 
464 	case PCREAD:	/* read from the address space */
465 		error = pr_rdwr(p, UIO_READ, &argp->priovec);
466 		break;
467 
468 	case PCWRITE:	/* write to the address space */
469 		error = pr_rdwr(p, UIO_WRITE, &argp->priovec);
470 		break;
471 
472 	case PCSCRED:	/* set the process credentials */
473 	case PCSCREDX:
474 		error = pr_scred(p, &argp->prcred, cr, cmd == PCSCREDX);
475 		break;
476 
477 	case PCSPRIV:	/* set the process privileges */
478 		error = pr_spriv(p, &argp->prpriv, cr);
479 		break;
480 	case PCSZONE:	/* set the process's zoneid credentials */
481 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
482 		break;
483 	}
484 
485 	if (error)
486 		prunlock(pnp);
487 	return (error);
488 }
489 
490 #ifdef _SYSCALL32_IMPL
491 
492 typedef union {
493 	int32_t		sig;		/* PCKILL, PCUNKILL */
494 	int32_t		nice;		/* PCNICE */
495 	int32_t		timeo;		/* PCTWSTOP */
496 	uint32_t	flags;		/* PCRUN, PCSET, PCUNSET */
497 	caddr32_t	vaddr;		/* PCSVADDR */
498 	siginfo32_t	siginfo;	/* PCSSIG */
499 	sigset_t	sigset;		/* PCSTRACE, PCSHOLD */
500 	fltset_t	fltset;		/* PCSFAULT */
501 	sysset_t	sysset;		/* PCSENTRY, PCSEXIT */
502 	prgregset32_t	prgregset;	/* PCSREG, PCAGENT */
503 	prfpregset32_t	prfpregset;	/* PCSFPREG */
504 #if defined(__sparc)
505 	prxregset_t	prxregset;	/* PCSXREG */
506 #endif
507 	prwatch32_t	prwatch;	/* PCWATCH */
508 	priovec32_t	priovec;	/* PCREAD, PCWRITE */
509 	prcred32_t	prcred;		/* PCSCRED */
510 	prpriv_t	prpriv;		/* PCSPRIV */
511 	int32_t		przoneid;	/* PCSZONE */
512 } arg32_t;
513 
514 static	int	pr_control32(int32_t, arg32_t *, prnode_t *, cred_t *);
515 static	int	pr_setfpregs32(prnode_t *, prfpregset32_t *);
516 
517 /*
518  * Note that while ctlsize32() can use argp, it must do so only in a way
519  * that assumes 32-bit rather than 64-bit alignment as argp is a pointer
520  * to an array of 32-bit values and only 32-bit alignment is ensured.
521  */
522 static size_t
523 ctlsize32(int32_t cmd, size_t resid, arg32_t *argp)
524 {
525 	size_t size = sizeof (int32_t);
526 	size_t rnd;
527 	int ngrp;
528 
529 	switch (cmd) {
530 	case PCNULL:
531 	case PCSTOP:
532 	case PCDSTOP:
533 	case PCWSTOP:
534 	case PCCSIG:
535 	case PCCFAULT:
536 		break;
537 	case PCSSIG:
538 		size += sizeof (siginfo32_t);
539 		break;
540 	case PCTWSTOP:
541 		size += sizeof (int32_t);
542 		break;
543 	case PCKILL:
544 	case PCUNKILL:
545 	case PCNICE:
546 		size += sizeof (int32_t);
547 		break;
548 	case PCRUN:
549 	case PCSET:
550 	case PCUNSET:
551 		size += sizeof (uint32_t);
552 		break;
553 	case PCSVADDR:
554 		size += sizeof (caddr32_t);
555 		break;
556 	case PCSTRACE:
557 	case PCSHOLD:
558 		size += sizeof (sigset_t);
559 		break;
560 	case PCSFAULT:
561 		size += sizeof (fltset_t);
562 		break;
563 	case PCSENTRY:
564 	case PCSEXIT:
565 		size += sizeof (sysset_t);
566 		break;
567 	case PCSREG:
568 	case PCAGENT:
569 		size += sizeof (prgregset32_t);
570 		break;
571 	case PCSFPREG:
572 		size += sizeof (prfpregset32_t);
573 		break;
574 #if defined(__sparc)
575 	case PCSXREG:
576 		size += sizeof (prxregset_t);
577 		break;
578 #endif
579 	case PCWATCH:
580 		size += sizeof (prwatch32_t);
581 		break;
582 	case PCREAD:
583 	case PCWRITE:
584 		size += sizeof (priovec32_t);
585 		break;
586 	case PCSCRED:
587 		size += sizeof (prcred32_t);
588 		break;
589 	case PCSCREDX:
590 		/*
591 		 * We cannot derefence the pr_ngroups fields if it
592 		 * we don't have enough data.
593 		 */
594 		if (resid < size + sizeof (prcred32_t) - sizeof (gid32_t))
595 			return (0);
596 		ngrp = argp->prcred.pr_ngroups;
597 		if (ngrp < 0 || ngrp > ngroups_max)
598 			return (0);
599 
600 		/* The result can be smaller than sizeof (prcred32_t) */
601 		size += sizeof (prcred32_t) - sizeof (gid32_t);
602 		size += ngrp * sizeof (gid32_t);
603 		break;
604 	case PCSPRIV:
605 		if (resid >= size + sizeof (prpriv_t))
606 			size += priv_prgetprivsize(&argp->prpriv);
607 		else
608 			return (0);
609 		break;
610 	case PCSZONE:
611 		size += sizeof (int32_t);
612 		break;
613 	default:
614 		return (0);
615 	}
616 
617 	/* Round up to a multiple of int32_t */
618 	rnd = size & (sizeof (int32_t) - 1);
619 
620 	if (rnd != 0)
621 		size += sizeof (int32_t) - rnd;
622 
623 	if (size > resid)
624 		return (0);
625 	return (size);
626 }
627 
628 /*
629  * Control operations (lots).
630  */
631 int
632 prwritectl32(struct vnode *vp, struct uio *uiop, cred_t *cr)
633 {
634 #define	MY_BUFFER_SIZE32 \
635 		100 > 1 + sizeof (arg32_t) / sizeof (int32_t) ? \
636 		100 : 1 + sizeof (arg32_t) / sizeof (int32_t)
637 	int32_t buf[MY_BUFFER_SIZE32];
638 	int32_t *bufp;
639 	arg32_t arg;
640 	size_t resid = 0;
641 	size_t size;
642 	prnode_t *pnp = VTOP(vp);
643 	int error;
644 	int locked = 0;
645 
646 	while (uiop->uio_resid) {
647 		/*
648 		 * Read several commands in one gulp.
649 		 */
650 		bufp = buf;
651 		if (resid) {	/* move incomplete command to front of buffer */
652 			int32_t *tail;
653 
654 			if (resid >= sizeof (buf))
655 				break;
656 			tail = (int32_t *)((char *)buf + sizeof (buf) - resid);
657 			do {
658 				*bufp++ = *tail++;
659 			} while ((resid -= sizeof (int32_t)) != 0);
660 		}
661 		resid = sizeof (buf) - ((char *)bufp - (char *)buf);
662 		if (resid > uiop->uio_resid)
663 			resid = uiop->uio_resid;
664 		if (error = uiomove((caddr_t)bufp, resid, UIO_WRITE, uiop))
665 			return (error);
666 		resid += (char *)bufp - (char *)buf;
667 		bufp = buf;
668 
669 		do {		/* loop over commands in buffer */
670 			int32_t cmd = bufp[0];
671 			arg32_t *argp = (arg32_t *)&bufp[1];
672 
673 			size = ctlsize32(cmd, resid, argp);
674 			if (size == 0) {  /* incomplete or invalid command */
675 				if (locked) {
676 					prunlock(pnp);
677 					locked = 0;
678 				}
679 				return (resid? EINVAL : 0);
680 			}
681 			/*
682 			 * Perform the specified control operation.
683 			 */
684 			if (!locked) {
685 				if ((error = prlock(pnp, ZNO)) != 0)
686 					return (error);
687 				locked = 1;
688 			}
689 
690 			/*
691 			 * Since some members of the arg32_t union contain
692 			 * 64-bit values (which must be 64-bit aligned), we
693 			 * can't simply pass a pointer to the structure as
694 			 * it may be unaligned. Note that we do pass the
695 			 * potentially unaligned structure to ctlsize32()
696 			 * above, but that uses it a way that makes no
697 			 * assumptions about alignment.
698 			 */
699 			ASSERT(size - sizeof (cmd) <= sizeof (arg));
700 			bcopy(argp, &arg, size - sizeof (cmd));
701 
702 			if (error = pr_control32(cmd, &arg, pnp, cr)) {
703 				if (error == -1)	/* -1 is timeout */
704 					locked = 0;
705 				else
706 					return (error);
707 			}
708 			bufp = (int32_t *)((char *)bufp + size);
709 		} while ((resid -= size) != 0);
710 
711 		if (locked) {
712 			prunlock(pnp);
713 			locked = 0;
714 		}
715 	}
716 	return (resid? EINVAL : 0);
717 }
718 
719 static int
720 pr_control32(int32_t cmd, arg32_t *argp, prnode_t *pnp, cred_t *cr)
721 {
722 	prcommon_t *pcp;
723 	proc_t *p;
724 	int unlocked;
725 	int error = 0;
726 
727 	if (cmd == PCNULL)
728 		return (0);
729 
730 	pcp = pnp->pr_common;
731 	p = pcp->prc_proc;
732 	ASSERT(p != NULL);
733 
734 	switch (cmd) {
735 
736 	default:
737 		error = EINVAL;
738 		break;
739 
740 	case PCSTOP:	/* direct process or lwp to stop and wait for stop */
741 	case PCDSTOP:	/* direct process or lwp to stop, don't wait */
742 	case PCWSTOP:	/* wait for process or lwp to stop */
743 	case PCTWSTOP:	/* wait for process or lwp to stop, with timeout */
744 		{
745 			time_t timeo;
746 
747 			/*
748 			 * Can't apply to a system process.
749 			 */
750 			if ((p->p_flag & SSYS) || p->p_as == &kas) {
751 				error = EBUSY;
752 				break;
753 			}
754 
755 			if (cmd == PCSTOP || cmd == PCDSTOP)
756 				pr_stop(pnp);
757 
758 			if (cmd == PCDSTOP)
759 				break;
760 
761 			/*
762 			 * If an lwp is waiting for itself or its process,
763 			 * don't wait. The lwp will never see the fact that
764 			 * itself is stopped.
765 			 */
766 			if ((pcp->prc_flags & PRC_LWP)?
767 			    (pcp->prc_thread == curthread) : (p == curproc)) {
768 				if (cmd == PCWSTOP || cmd == PCTWSTOP)
769 					error = EBUSY;
770 				break;
771 			}
772 
773 			timeo = (cmd == PCTWSTOP)? (time_t)argp->timeo : 0;
774 			if ((error = pr_wait_stop(pnp, timeo)) != 0)
775 				return (error);
776 
777 			break;
778 		}
779 
780 	case PCRUN:	/* make lwp or process runnable */
781 		error = pr_setrun(pnp, (ulong_t)argp->flags);
782 		break;
783 
784 	case PCSTRACE:	/* set signal trace mask */
785 		pr_settrace(p,  &argp->sigset);
786 		break;
787 
788 	case PCSSIG:	/* set current signal */
789 		if (PROCESS_NOT_32BIT(p))
790 			error = EOVERFLOW;
791 		else {
792 			int sig = (int)argp->siginfo.si_signo;
793 			siginfo_t siginfo;
794 
795 			bzero(&siginfo, sizeof (siginfo));
796 			siginfo_32tok(&argp->siginfo, (k_siginfo_t *)&siginfo);
797 			error = pr_setsig(pnp, &siginfo);
798 			if (sig == SIGKILL && error == 0) {
799 				prunlock(pnp);
800 				pr_wait_die(pnp);
801 				return (-1);
802 			}
803 		}
804 		break;
805 
806 	case PCKILL:	/* send signal */
807 		error = pr_kill(pnp, (int)argp->sig, cr);
808 		if (error == 0 && argp->sig == SIGKILL) {
809 			prunlock(pnp);
810 			pr_wait_die(pnp);
811 			return (-1);
812 		}
813 		break;
814 
815 	case PCUNKILL:	/* delete a pending signal */
816 		error = pr_unkill(pnp, (int)argp->sig);
817 		break;
818 
819 	case PCNICE:	/* set nice priority */
820 		error = pr_nice(p, (int)argp->nice, cr);
821 		break;
822 
823 	case PCSENTRY:	/* set syscall entry bit mask */
824 	case PCSEXIT:	/* set syscall exit bit mask */
825 		pr_setentryexit(p, &argp->sysset, cmd == PCSENTRY);
826 		break;
827 
828 	case PCSET:	/* set process flags */
829 		error = pr_set(p, (long)argp->flags);
830 		break;
831 
832 	case PCUNSET:	/* unset process flags */
833 		error = pr_unset(p, (long)argp->flags);
834 		break;
835 
836 	case PCSREG:	/* set general registers */
837 		if (PROCESS_NOT_32BIT(p))
838 			error = EOVERFLOW;
839 		else {
840 			kthread_t *t = pr_thread(pnp);
841 
842 			if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
843 				thread_unlock(t);
844 				error = EBUSY;
845 			} else {
846 				prgregset_t prgregset;
847 				klwp_t *lwp = ttolwp(t);
848 
849 				thread_unlock(t);
850 				mutex_exit(&p->p_lock);
851 				prgregset_32ton(lwp, argp->prgregset,
852 				    prgregset);
853 				prsetprregs(lwp, prgregset, 0);
854 				mutex_enter(&p->p_lock);
855 			}
856 		}
857 		break;
858 
859 	case PCSFPREG:	/* set floating-point registers */
860 		if (PROCESS_NOT_32BIT(p))
861 			error = EOVERFLOW;
862 		else
863 			error = pr_setfpregs32(pnp, &argp->prfpregset);
864 		break;
865 
866 	case PCSXREG:	/* set extra registers */
867 #if defined(__sparc)
868 		if (PROCESS_NOT_32BIT(p))
869 			error = EOVERFLOW;
870 		else
871 			error = pr_setxregs(pnp, &argp->prxregset);
872 #else
873 		error = EINVAL;
874 #endif
875 		break;
876 
877 	case PCSVADDR:	/* set virtual address at which to resume */
878 		if (PROCESS_NOT_32BIT(p))
879 			error = EOVERFLOW;
880 		else
881 			error = pr_setvaddr(pnp,
882 			    (caddr_t)(uintptr_t)argp->vaddr);
883 		break;
884 
885 	case PCSHOLD:	/* set signal-hold mask */
886 		pr_sethold(pnp, &argp->sigset);
887 		break;
888 
889 	case PCSFAULT:	/* set mask of traced faults */
890 		pr_setfault(p, &argp->fltset);
891 		break;
892 
893 	case PCCSIG:	/* clear current signal */
894 		error = pr_clearsig(pnp);
895 		break;
896 
897 	case PCCFAULT:	/* clear current fault */
898 		error = pr_clearflt(pnp);
899 		break;
900 
901 	case PCWATCH:	/* set or clear watched areas */
902 		if (PROCESS_NOT_32BIT(p))
903 			error = EOVERFLOW;
904 		else {
905 			prwatch_t prwatch;
906 
907 			prwatch.pr_vaddr = argp->prwatch.pr_vaddr;
908 			prwatch.pr_size = argp->prwatch.pr_size;
909 			prwatch.pr_wflags = argp->prwatch.pr_wflags;
910 			prwatch.pr_pad = argp->prwatch.pr_pad;
911 			error = pr_watch(pnp, &prwatch, &unlocked);
912 			if (error && unlocked)
913 				return (error);
914 		}
915 		break;
916 
917 	case PCAGENT:	/* create the /proc agent lwp in the target process */
918 		if (PROCESS_NOT_32BIT(p))
919 			error = EOVERFLOW;
920 		else {
921 			prgregset_t prgregset;
922 			kthread_t *t = pr_thread(pnp);
923 			klwp_t *lwp = ttolwp(t);
924 			thread_unlock(t);
925 			mutex_exit(&p->p_lock);
926 			prgregset_32ton(lwp, argp->prgregset, prgregset);
927 			mutex_enter(&p->p_lock);
928 			error = pr_agent(pnp, prgregset, &unlocked);
929 			if (error && unlocked)
930 				return (error);
931 		}
932 		break;
933 
934 	case PCREAD:	/* read from the address space */
935 	case PCWRITE:	/* write to the address space */
936 		if (PROCESS_NOT_32BIT(p))
937 			error = EOVERFLOW;
938 		else {
939 			enum uio_rw rw = (cmd == PCREAD)? UIO_READ : UIO_WRITE;
940 			priovec_t priovec;
941 
942 			priovec.pio_base =
943 			    (void *)(uintptr_t)argp->priovec.pio_base;
944 			priovec.pio_len = (size_t)argp->priovec.pio_len;
945 			priovec.pio_offset = (off_t)
946 			    (uint32_t)argp->priovec.pio_offset;
947 			error = pr_rdwr(p, rw, &priovec);
948 		}
949 		break;
950 
951 	case PCSCRED:	/* set the process credentials */
952 	case PCSCREDX:
953 		{
954 			/*
955 			 * All the fields in these structures are exactly the
956 			 * same and so the structures are compatible.  In case
957 			 * this ever changes, we catch this with the ASSERT
958 			 * below.
959 			 */
960 			prcred_t *prcred = (prcred_t *)&argp->prcred;
961 
962 #ifndef __lint
963 			ASSERT(sizeof (prcred_t) == sizeof (prcred32_t));
964 #endif
965 
966 			error = pr_scred(p, prcred, cr, cmd == PCSCREDX);
967 			break;
968 		}
969 
970 	case PCSPRIV:	/* set the process privileges */
971 		error = pr_spriv(p, &argp->prpriv, cr);
972 		break;
973 
974 	case PCSZONE:	/* set the process's zoneid */
975 		error = pr_szoneid(p, (zoneid_t)argp->przoneid, cr);
976 		break;
977 	}
978 
979 	if (error)
980 		prunlock(pnp);
981 	return (error);
982 }
983 
984 #endif	/* _SYSCALL32_IMPL */
985 
986 /*
987  * Return the specific or chosen thread/lwp for a control operation.
988  * Returns with the thread locked via thread_lock(t).
989  */
990 kthread_t *
991 pr_thread(prnode_t *pnp)
992 {
993 	prcommon_t *pcp = pnp->pr_common;
994 	kthread_t *t;
995 
996 	if (pcp->prc_flags & PRC_LWP) {
997 		t = pcp->prc_thread;
998 		ASSERT(t != NULL);
999 		thread_lock(t);
1000 	} else {
1001 		proc_t *p = pcp->prc_proc;
1002 		t = prchoose(p);	/* returns locked thread */
1003 		ASSERT(t != NULL);
1004 	}
1005 
1006 	return (t);
1007 }
1008 
1009 /*
1010  * Direct the process or lwp to stop.
1011  */
1012 void
1013 pr_stop(prnode_t *pnp)
1014 {
1015 	prcommon_t *pcp = pnp->pr_common;
1016 	proc_t *p = pcp->prc_proc;
1017 	kthread_t *t;
1018 	vnode_t *vp;
1019 
1020 	/*
1021 	 * If already stopped, do nothing; otherwise flag
1022 	 * it to be stopped the next time it tries to run.
1023 	 * If sleeping at interruptible priority, set it
1024 	 * running so it will stop within cv_wait_sig().
1025 	 *
1026 	 * Take care to cooperate with jobcontrol: if an lwp
1027 	 * is stopped due to the default action of a jobcontrol
1028 	 * stop signal, flag it to be stopped the next time it
1029 	 * starts due to a SIGCONT signal.
1030 	 */
1031 	if (pcp->prc_flags & PRC_LWP)
1032 		t = pcp->prc_thread;
1033 	else
1034 		t = p->p_tlist;
1035 	ASSERT(t != NULL);
1036 
1037 	do {
1038 		int notify;
1039 
1040 		notify = 0;
1041 		thread_lock(t);
1042 		if (!ISTOPPED(t)) {
1043 			t->t_proc_flag |= TP_PRSTOP;
1044 			t->t_sig_check = 1;	/* do ISSIG */
1045 		}
1046 
1047 		/* Move the thread from wait queue to run queue */
1048 		if (ISWAITING(t))
1049 			setrun_locked(t);
1050 
1051 		if (ISWAKEABLE(t)) {
1052 			if (t->t_wchan0 == NULL)
1053 				setrun_locked(t);
1054 			else if (!VSTOPPED(t)) {
1055 				/*
1056 				 * Mark it virtually stopped.
1057 				 */
1058 				t->t_proc_flag |= TP_PRVSTOP;
1059 				notify = 1;
1060 			}
1061 		}
1062 		/*
1063 		 * force the thread into the kernel
1064 		 * if it is not already there.
1065 		 */
1066 		prpokethread(t);
1067 		thread_unlock(t);
1068 		if (notify &&
1069 		    (vp = p->p_lwpdir[t->t_dslot].ld_entry->le_trace) != NULL)
1070 			prnotify(vp);
1071 		if (pcp->prc_flags & PRC_LWP)
1072 			break;
1073 	} while ((t = t->t_forw) != p->p_tlist);
1074 
1075 	/*
1076 	 * We do this just in case the thread we asked
1077 	 * to stop is in holdlwps() (called from cfork()).
1078 	 */
1079 	cv_broadcast(&p->p_holdlwps);
1080 }
1081 
1082 /*
1083  * Sleep until the lwp stops, but cooperate with
1084  * jobcontrol:  Don't wake up if the lwp is stopped
1085  * due to the default action of a jobcontrol stop signal.
1086  * If this is the process file descriptor, sleep
1087  * until all of the process's lwps stop.
1088  */
1089 int
1090 pr_wait_stop(prnode_t *pnp, time_t timeo)
1091 {
1092 	prcommon_t *pcp = pnp->pr_common;
1093 	proc_t *p = pcp->prc_proc;
1094 	timestruc_t rqtime;
1095 	timestruc_t *rqtp = NULL;
1096 	int timecheck = 0;
1097 	kthread_t *t;
1098 	int error;
1099 
1100 	if (timeo > 0) {	/* millisecond timeout */
1101 		/*
1102 		 * Determine the precise future time of the requested timeout.
1103 		 */
1104 		timestruc_t now;
1105 
1106 		timecheck = timechanged;
1107 		gethrestime(&now);
1108 		rqtp = &rqtime;
1109 		rqtp->tv_sec = timeo / MILLISEC;
1110 		rqtp->tv_nsec = (timeo % MILLISEC) * MICROSEC;
1111 		timespecadd(rqtp, &now);
1112 	}
1113 
1114 	if (pcp->prc_flags & PRC_LWP) {	/* lwp file descriptor */
1115 		t = pcp->prc_thread;
1116 		ASSERT(t != NULL);
1117 		thread_lock(t);
1118 		while (!ISTOPPED(t) && !VSTOPPED(t)) {
1119 			thread_unlock(t);
1120 			mutex_enter(&pcp->prc_mutex);
1121 			prunlock(pnp);
1122 			error = pr_wait(pcp, rqtp, timecheck);
1123 			if (error)	/* -1 is timeout */
1124 				return (error);
1125 			if ((error = prlock(pnp, ZNO)) != 0)
1126 				return (error);
1127 			ASSERT(p == pcp->prc_proc);
1128 			ASSERT(t == pcp->prc_thread);
1129 			thread_lock(t);
1130 		}
1131 		thread_unlock(t);
1132 	} else {			/* process file descriptor */
1133 		t = prchoose(p);	/* returns locked thread */
1134 		ASSERT(t != NULL);
1135 		ASSERT(MUTEX_HELD(&p->p_lock));
1136 		while ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t)) ||
1137 		    (p->p_flag & SEXITLWPS)) {
1138 			thread_unlock(t);
1139 			mutex_enter(&pcp->prc_mutex);
1140 			prunlock(pnp);
1141 			error = pr_wait(pcp, rqtp, timecheck);
1142 			if (error)	/* -1 is timeout */
1143 				return (error);
1144 			if ((error = prlock(pnp, ZNO)) != 0)
1145 				return (error);
1146 			ASSERT(p == pcp->prc_proc);
1147 			t = prchoose(p);	/* returns locked t */
1148 			ASSERT(t != NULL);
1149 		}
1150 		thread_unlock(t);
1151 	}
1152 
1153 	ASSERT(!(pcp->prc_flags & PRC_DESTROY) && p->p_stat != SZOMB &&
1154 	    t != NULL && t->t_state != TS_ZOMB);
1155 
1156 	return (0);
1157 }
1158 
1159 int
1160 pr_setrun(prnode_t *pnp, ulong_t flags)
1161 {
1162 	prcommon_t *pcp = pnp->pr_common;
1163 	proc_t *p = pcp->prc_proc;
1164 	kthread_t *t;
1165 	klwp_t *lwp;
1166 
1167 	/*
1168 	 * Cannot set an lwp running if it is not stopped.
1169 	 * Also, no lwp other than the /proc agent lwp can
1170 	 * be set running so long as the /proc agent lwp exists.
1171 	 */
1172 	t = pr_thread(pnp);	/* returns locked thread */
1173 	if ((!ISTOPPED(t) && !VSTOPPED(t) &&
1174 	    !(t->t_proc_flag & TP_PRSTOP)) ||
1175 	    (p->p_agenttp != NULL &&
1176 	    (t != p->p_agenttp || !(pcp->prc_flags & PRC_LWP)))) {
1177 		thread_unlock(t);
1178 		return (EBUSY);
1179 	}
1180 	thread_unlock(t);
1181 	if (flags & ~(PRCSIG|PRCFAULT|PRSTEP|PRSTOP|PRSABORT))
1182 		return (EINVAL);
1183 	lwp = ttolwp(t);
1184 	if ((flags & PRCSIG) && lwp->lwp_cursig != SIGKILL) {
1185 		/*
1186 		 * Discard current siginfo_t, if any.
1187 		 */
1188 		lwp->lwp_cursig = 0;
1189 		lwp->lwp_extsig = 0;
1190 		if (lwp->lwp_curinfo) {
1191 			siginfofree(lwp->lwp_curinfo);
1192 			lwp->lwp_curinfo = NULL;
1193 		}
1194 	}
1195 	if (flags & PRCFAULT)
1196 		lwp->lwp_curflt = 0;
1197 	/*
1198 	 * We can't hold p->p_lock when we touch the lwp's registers.
1199 	 * It may be swapped out and we will get a page fault.
1200 	 */
1201 	if (flags & PRSTEP) {
1202 		mutex_exit(&p->p_lock);
1203 		prstep(lwp, 0);
1204 		mutex_enter(&p->p_lock);
1205 	}
1206 	if (flags & PRSTOP) {
1207 		t->t_proc_flag |= TP_PRSTOP;
1208 		t->t_sig_check = 1;	/* do ISSIG */
1209 	}
1210 	if (flags & PRSABORT)
1211 		lwp->lwp_sysabort = 1;
1212 	thread_lock(t);
1213 	if ((pcp->prc_flags & PRC_LWP) || (flags & (PRSTEP|PRSTOP))) {
1214 		/*
1215 		 * Here, we are dealing with a single lwp.
1216 		 */
1217 		if (ISTOPPED(t)) {
1218 			t->t_schedflag |= TS_PSTART;
1219 			t->t_dtrace_stop = 0;
1220 			setrun_locked(t);
1221 		} else if (flags & PRSABORT) {
1222 			t->t_proc_flag &=
1223 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1224 			setrun_locked(t);
1225 		} else if (!(flags & PRSTOP)) {
1226 			t->t_proc_flag &=
1227 			    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1228 		}
1229 		thread_unlock(t);
1230 	} else {
1231 		/*
1232 		 * Here, we are dealing with the whole process.
1233 		 */
1234 		if (ISTOPPED(t)) {
1235 			/*
1236 			 * The representative lwp is stopped on an event
1237 			 * of interest.  We demote it to PR_REQUESTED and
1238 			 * choose another representative lwp.  If the new
1239 			 * representative lwp is not stopped on an event of
1240 			 * interest (other than PR_REQUESTED), we set the
1241 			 * whole process running, else we leave the process
1242 			 * stopped showing the next event of interest.
1243 			 */
1244 			kthread_t *tx = NULL;
1245 
1246 			if (!(flags & PRSABORT) &&
1247 			    t->t_whystop == PR_SYSENTRY &&
1248 			    t->t_whatstop == SYS_lwp_exit)
1249 				tx = t;		/* remember the exiting lwp */
1250 			t->t_whystop = PR_REQUESTED;
1251 			t->t_whatstop = 0;
1252 			thread_unlock(t);
1253 			t = prchoose(p);	/* returns locked t */
1254 			ASSERT(ISTOPPED(t) || VSTOPPED(t));
1255 			if (VSTOPPED(t) ||
1256 			    t->t_whystop == PR_REQUESTED) {
1257 				thread_unlock(t);
1258 				allsetrun(p);
1259 			} else {
1260 				thread_unlock(t);
1261 				/*
1262 				 * As a special case, if the old representative
1263 				 * lwp was stopped on entry to _lwp_exit()
1264 				 * (and we are not aborting the system call),
1265 				 * we set the old representative lwp running.
1266 				 * We do this so that the next process stop
1267 				 * will find the exiting lwp gone.
1268 				 */
1269 				if (tx != NULL) {
1270 					thread_lock(tx);
1271 					tx->t_schedflag |= TS_PSTART;
1272 					t->t_dtrace_stop = 0;
1273 					setrun_locked(tx);
1274 					thread_unlock(tx);
1275 				}
1276 			}
1277 		} else {
1278 			/*
1279 			 * No event of interest; set all of the lwps running.
1280 			 */
1281 			if (flags & PRSABORT) {
1282 				t->t_proc_flag &=
1283 				    ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1284 				setrun_locked(t);
1285 			}
1286 			thread_unlock(t);
1287 			allsetrun(p);
1288 		}
1289 	}
1290 	return (0);
1291 }
1292 
1293 /*
1294  * Wait until process/lwp stops or until timer expires.
1295  * Return EINTR for an interruption, -1 for timeout, else 0.
1296  */
1297 int
1298 pr_wait(prcommon_t *pcp,	/* prcommon referring to process/lwp */
1299 	timestruc_t *ts,	/* absolute time of timeout, if any */
1300 	int timecheck)
1301 {
1302 	int rval;
1303 
1304 	ASSERT(MUTEX_HELD(&pcp->prc_mutex));
1305 	rval = cv_waituntil_sig(&pcp->prc_wait, &pcp->prc_mutex, ts, timecheck);
1306 	mutex_exit(&pcp->prc_mutex);
1307 	switch (rval) {
1308 	case 0:
1309 		return (EINTR);
1310 	case -1:
1311 		return (-1);
1312 	default:
1313 		return (0);
1314 	}
1315 }
1316 
1317 /*
1318  * Make all threads in the process runnable.
1319  */
1320 void
1321 allsetrun(proc_t *p)
1322 {
1323 	kthread_t *t;
1324 
1325 	ASSERT(MUTEX_HELD(&p->p_lock));
1326 
1327 	if ((t = p->p_tlist) != NULL) {
1328 		do {
1329 			thread_lock(t);
1330 			ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1331 			t->t_proc_flag &= ~(TP_PRSTOP|TP_PRVSTOP|TP_STOPPING);
1332 			if (ISTOPPED(t)) {
1333 				t->t_schedflag |= TS_PSTART;
1334 				t->t_dtrace_stop = 0;
1335 				setrun_locked(t);
1336 			}
1337 			thread_unlock(t);
1338 		} while ((t = t->t_forw) != p->p_tlist);
1339 	}
1340 }
1341 
1342 /*
1343  * Wait for the process to die.
1344  * We do this after sending SIGKILL because we know it will
1345  * die soon and we want subsequent operations to return ENOENT.
1346  */
1347 void
1348 pr_wait_die(prnode_t *pnp)
1349 {
1350 	proc_t *p;
1351 
1352 	mutex_enter(&pidlock);
1353 	while ((p = pnp->pr_common->prc_proc) != NULL && p->p_stat != SZOMB) {
1354 		if (!cv_wait_sig(&p->p_srwchan_cv, &pidlock))
1355 			break;
1356 	}
1357 	mutex_exit(&pidlock);
1358 }
1359 
1360 static void
1361 pr_settrace(proc_t *p, sigset_t *sp)
1362 {
1363 	prdelset(sp, SIGKILL);
1364 	prassignset(&p->p_sigmask, sp);
1365 	if (!sigisempty(&p->p_sigmask))
1366 		p->p_proc_flag |= P_PR_TRACE;
1367 	else if (prisempty(&p->p_fltmask)) {
1368 		user_t *up = PTOU(p);
1369 		if (up->u_systrap == 0)
1370 			p->p_proc_flag &= ~P_PR_TRACE;
1371 	}
1372 }
1373 
1374 int
1375 pr_setsig(prnode_t *pnp, siginfo_t *sip)
1376 {
1377 	int sig = sip->si_signo;
1378 	prcommon_t *pcp = pnp->pr_common;
1379 	proc_t *p = pcp->prc_proc;
1380 	kthread_t *t;
1381 	klwp_t *lwp;
1382 	int error = 0;
1383 
1384 	t = pr_thread(pnp);	/* returns locked thread */
1385 	thread_unlock(t);
1386 	lwp = ttolwp(t);
1387 	if (sig < 0 || sig >= NSIG)
1388 		/* Zero allowed here */
1389 		error = EINVAL;
1390 	else if (lwp->lwp_cursig == SIGKILL)
1391 		/* "can't happen", but just in case */
1392 		error = EBUSY;
1393 	else if ((lwp->lwp_cursig = (uchar_t)sig) == 0) {
1394 		lwp->lwp_extsig = 0;
1395 		/*
1396 		 * Discard current siginfo_t, if any.
1397 		 */
1398 		if (lwp->lwp_curinfo) {
1399 			siginfofree(lwp->lwp_curinfo);
1400 			lwp->lwp_curinfo = NULL;
1401 		}
1402 	} else {
1403 		kthread_t *tx;
1404 		sigqueue_t *sqp;
1405 
1406 		/* drop p_lock to do kmem_alloc(KM_SLEEP) */
1407 		mutex_exit(&p->p_lock);
1408 		sqp = kmem_zalloc(sizeof (sigqueue_t), KM_SLEEP);
1409 		mutex_enter(&p->p_lock);
1410 
1411 		if (lwp->lwp_curinfo == NULL)
1412 			lwp->lwp_curinfo = sqp;
1413 		else
1414 			kmem_free(sqp, sizeof (sigqueue_t));
1415 		/*
1416 		 * Copy contents of info to current siginfo_t.
1417 		 */
1418 		bcopy(sip, &lwp->lwp_curinfo->sq_info,
1419 		    sizeof (lwp->lwp_curinfo->sq_info));
1420 		/*
1421 		 * Prevent contents published by si_zoneid-unaware /proc
1422 		 * consumers from being incorrectly filtered.  Because
1423 		 * an uninitialized si_zoneid is the same as
1424 		 * GLOBAL_ZONEID, this means that you can't pr_setsig a
1425 		 * process in a non-global zone with a siginfo which
1426 		 * appears to come from the global zone.
1427 		 */
1428 		if (SI_FROMUSER(sip) && sip->si_zoneid == 0)
1429 			lwp->lwp_curinfo->sq_info.si_zoneid =
1430 			    p->p_zone->zone_id;
1431 		/*
1432 		 * Side-effects for SIGKILL and jobcontrol signals.
1433 		 */
1434 		if (sig == SIGKILL) {
1435 			p->p_flag |= SKILLED;
1436 			p->p_flag &= ~SEXTKILLED;
1437 		} else if (sig == SIGCONT) {
1438 			p->p_flag |= SSCONT;
1439 			sigdelq(p, NULL, SIGSTOP);
1440 			sigdelq(p, NULL, SIGTSTP);
1441 			sigdelq(p, NULL, SIGTTOU);
1442 			sigdelq(p, NULL, SIGTTIN);
1443 			sigdiffset(&p->p_sig, &stopdefault);
1444 			sigdiffset(&p->p_extsig, &stopdefault);
1445 			if ((tx = p->p_tlist) != NULL) {
1446 				do {
1447 					sigdelq(p, tx, SIGSTOP);
1448 					sigdelq(p, tx, SIGTSTP);
1449 					sigdelq(p, tx, SIGTTOU);
1450 					sigdelq(p, tx, SIGTTIN);
1451 					sigdiffset(&tx->t_sig, &stopdefault);
1452 					sigdiffset(&tx->t_extsig, &stopdefault);
1453 				} while ((tx = tx->t_forw) != p->p_tlist);
1454 			}
1455 		} else if (sigismember(&stopdefault, sig)) {
1456 			if (PTOU(p)->u_signal[sig-1] == SIG_DFL &&
1457 			    (sig == SIGSTOP || !p->p_pgidp->pid_pgorphaned))
1458 				p->p_flag &= ~SSCONT;
1459 			sigdelq(p, NULL, SIGCONT);
1460 			sigdelset(&p->p_sig, SIGCONT);
1461 			sigdelset(&p->p_extsig, SIGCONT);
1462 			if ((tx = p->p_tlist) != NULL) {
1463 				do {
1464 					sigdelq(p, tx, SIGCONT);
1465 					sigdelset(&tx->t_sig, SIGCONT);
1466 					sigdelset(&tx->t_extsig, SIGCONT);
1467 				} while ((tx = tx->t_forw) != p->p_tlist);
1468 			}
1469 		}
1470 		thread_lock(t);
1471 		if (ISWAKEABLE(t) || ISWAITING(t)) {
1472 			/* Set signaled sleeping/waiting lwp running */
1473 			setrun_locked(t);
1474 		} else if (t->t_state == TS_STOPPED && sig == SIGKILL) {
1475 			/* If SIGKILL, set stopped lwp running */
1476 			p->p_stopsig = 0;
1477 			t->t_schedflag |= TS_XSTART | TS_PSTART;
1478 			t->t_dtrace_stop = 0;
1479 			setrun_locked(t);
1480 		}
1481 		t->t_sig_check = 1;	/* so ISSIG will be done */
1482 		thread_unlock(t);
1483 		/*
1484 		 * More jobcontrol side-effects.
1485 		 */
1486 		if (sig == SIGCONT && (tx = p->p_tlist) != NULL) {
1487 			p->p_stopsig = 0;
1488 			do {
1489 				thread_lock(tx);
1490 				if (tx->t_state == TS_STOPPED &&
1491 				    tx->t_whystop == PR_JOBCONTROL) {
1492 					tx->t_schedflag |= TS_XSTART;
1493 					setrun_locked(tx);
1494 				}
1495 				thread_unlock(tx);
1496 			} while ((tx = tx->t_forw) != p->p_tlist);
1497 		}
1498 	}
1499 	return (error);
1500 }
1501 
1502 int
1503 pr_kill(prnode_t *pnp, int sig, cred_t *cr)
1504 {
1505 	prcommon_t *pcp = pnp->pr_common;
1506 	proc_t *p = pcp->prc_proc;
1507 	k_siginfo_t info;
1508 
1509 	if (sig <= 0 || sig >= NSIG)
1510 		return (EINVAL);
1511 
1512 	bzero(&info, sizeof (info));
1513 	info.si_signo = sig;
1514 	info.si_code = SI_USER;
1515 	info.si_pid = curproc->p_pid;
1516 	info.si_ctid = PRCTID(curproc);
1517 	info.si_zoneid = getzoneid();
1518 	info.si_uid = crgetruid(cr);
1519 	sigaddq(p, (pcp->prc_flags & PRC_LWP)?
1520 	    pcp->prc_thread : NULL, &info, KM_NOSLEEP);
1521 
1522 	return (0);
1523 }
1524 
1525 int
1526 pr_unkill(prnode_t *pnp, int sig)
1527 {
1528 	prcommon_t *pcp = pnp->pr_common;
1529 	proc_t *p = pcp->prc_proc;
1530 	sigqueue_t *infop = NULL;
1531 
1532 	if (sig <= 0 || sig >= NSIG || sig == SIGKILL)
1533 		return (EINVAL);
1534 
1535 	if (pcp->prc_flags & PRC_LWP)
1536 		sigdeq(p, pcp->prc_thread, sig, &infop);
1537 	else
1538 		sigdeq(p, NULL, sig, &infop);
1539 
1540 	if (infop)
1541 		siginfofree(infop);
1542 
1543 	return (0);
1544 }
1545 
1546 int
1547 pr_nice(proc_t *p, int nice, cred_t *cr)
1548 {
1549 	kthread_t *t;
1550 	int err;
1551 	int error = 0;
1552 
1553 	t = p->p_tlist;
1554 	do {
1555 		ASSERT(!(t->t_proc_flag & TP_LWPEXIT));
1556 		err = CL_DONICE(t, cr, nice, (int *)NULL);
1557 		schedctl_set_cidpri(t);
1558 		if (error == 0)
1559 			error = err;
1560 	} while ((t = t->t_forw) != p->p_tlist);
1561 
1562 	return (error);
1563 }
1564 
1565 void
1566 pr_setentryexit(proc_t *p, sysset_t *sysset, int entry)
1567 {
1568 	user_t *up = PTOU(p);
1569 
1570 	if (entry) {
1571 		prassignset(&up->u_entrymask, sysset);
1572 	} else {
1573 		prassignset(&up->u_exitmask, sysset);
1574 	}
1575 	if (!prisempty(&up->u_entrymask) ||
1576 	    !prisempty(&up->u_exitmask)) {
1577 		up->u_systrap = 1;
1578 		p->p_proc_flag |= P_PR_TRACE;
1579 		set_proc_sys(p);	/* set pre and post-sys flags */
1580 	} else {
1581 		up->u_systrap = 0;
1582 		if (sigisempty(&p->p_sigmask) &&
1583 		    prisempty(&p->p_fltmask))
1584 			p->p_proc_flag &= ~P_PR_TRACE;
1585 	}
1586 }
1587 
1588 #define	ALLFLAGS	\
1589 	(PR_FORK|PR_RLC|PR_KLC|PR_ASYNC|PR_BPTADJ|PR_MSACCT|PR_MSFORK|PR_PTRACE)
1590 
1591 int
1592 pr_set(proc_t *p, long flags)
1593 {
1594 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1595 		return (EBUSY);
1596 
1597 	if (flags & ~ALLFLAGS)
1598 		return (EINVAL);
1599 
1600 	if (flags & PR_FORK)
1601 		p->p_proc_flag |= P_PR_FORK;
1602 	if (flags & PR_RLC)
1603 		p->p_proc_flag |= P_PR_RUNLCL;
1604 	if (flags & PR_KLC)
1605 		p->p_proc_flag |= P_PR_KILLCL;
1606 	if (flags & PR_ASYNC)
1607 		p->p_proc_flag |= P_PR_ASYNC;
1608 	if (flags & PR_BPTADJ)
1609 		p->p_proc_flag |= P_PR_BPTADJ;
1610 	if (flags & PR_MSACCT)
1611 		if ((p->p_flag & SMSACCT) == 0)
1612 			estimate_msacct(p->p_tlist, gethrtime());
1613 	if (flags & PR_MSFORK)
1614 		p->p_flag |= SMSFORK;
1615 	if (flags & PR_PTRACE) {
1616 		p->p_proc_flag |= P_PR_PTRACE;
1617 		/* ptraced process must die if parent dead */
1618 		if (p->p_ppid == 1)
1619 			sigtoproc(p, NULL, SIGKILL);
1620 	}
1621 
1622 	return (0);
1623 }
1624 
1625 int
1626 pr_unset(proc_t *p, long flags)
1627 {
1628 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1629 		return (EBUSY);
1630 
1631 	if (flags & ~ALLFLAGS)
1632 		return (EINVAL);
1633 
1634 	if (flags & PR_FORK)
1635 		p->p_proc_flag &= ~P_PR_FORK;
1636 	if (flags & PR_RLC)
1637 		p->p_proc_flag &= ~P_PR_RUNLCL;
1638 	if (flags & PR_KLC)
1639 		p->p_proc_flag &= ~P_PR_KILLCL;
1640 	if (flags & PR_ASYNC)
1641 		p->p_proc_flag &= ~P_PR_ASYNC;
1642 	if (flags & PR_BPTADJ)
1643 		p->p_proc_flag &= ~P_PR_BPTADJ;
1644 	if (flags & PR_MSACCT)
1645 		disable_msacct(p);
1646 	if (flags & PR_MSFORK)
1647 		p->p_flag &= ~SMSFORK;
1648 	if (flags & PR_PTRACE)
1649 		p->p_proc_flag &= ~P_PR_PTRACE;
1650 
1651 	return (0);
1652 }
1653 
1654 static int
1655 pr_setfpregs(prnode_t *pnp, prfpregset_t *prfpregset)
1656 {
1657 	proc_t *p = pnp->pr_common->prc_proc;
1658 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1659 
1660 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1661 		thread_unlock(t);
1662 		return (EBUSY);
1663 	}
1664 	if (!prhasfp()) {
1665 		thread_unlock(t);
1666 		return (EINVAL);	/* No FP support */
1667 	}
1668 
1669 	/* drop p_lock while touching the lwp's stack */
1670 	thread_unlock(t);
1671 	mutex_exit(&p->p_lock);
1672 	prsetprfpregs(ttolwp(t), prfpregset);
1673 	mutex_enter(&p->p_lock);
1674 
1675 	return (0);
1676 }
1677 
1678 #ifdef	_SYSCALL32_IMPL
1679 static int
1680 pr_setfpregs32(prnode_t *pnp, prfpregset32_t *prfpregset)
1681 {
1682 	proc_t *p = pnp->pr_common->prc_proc;
1683 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1684 
1685 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1686 		thread_unlock(t);
1687 		return (EBUSY);
1688 	}
1689 	if (!prhasfp()) {
1690 		thread_unlock(t);
1691 		return (EINVAL);	/* No FP support */
1692 	}
1693 
1694 	/* drop p_lock while touching the lwp's stack */
1695 	thread_unlock(t);
1696 	mutex_exit(&p->p_lock);
1697 	prsetprfpregs32(ttolwp(t), prfpregset);
1698 	mutex_enter(&p->p_lock);
1699 
1700 	return (0);
1701 }
1702 #endif	/* _SYSCALL32_IMPL */
1703 
1704 #if defined(__sparc)
1705 /* ARGSUSED */
1706 static int
1707 pr_setxregs(prnode_t *pnp, prxregset_t *prxregset)
1708 {
1709 	proc_t *p = pnp->pr_common->prc_proc;
1710 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1711 
1712 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1713 		thread_unlock(t);
1714 		return (EBUSY);
1715 	}
1716 	thread_unlock(t);
1717 
1718 	if (!prhasx(p))
1719 		return (EINVAL);	/* No extra register support */
1720 
1721 	/* drop p_lock while touching the lwp's stack */
1722 	mutex_exit(&p->p_lock);
1723 	prsetprxregs(ttolwp(t), (caddr_t)prxregset);
1724 	mutex_enter(&p->p_lock);
1725 
1726 	return (0);
1727 }
1728 
1729 static int
1730 pr_setasrs(prnode_t *pnp, asrset_t asrset)
1731 {
1732 	proc_t *p = pnp->pr_common->prc_proc;
1733 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1734 
1735 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1736 		thread_unlock(t);
1737 		return (EBUSY);
1738 	}
1739 	thread_unlock(t);
1740 
1741 	/* drop p_lock while touching the lwp's stack */
1742 	mutex_exit(&p->p_lock);
1743 	prsetasregs(ttolwp(t), asrset);
1744 	mutex_enter(&p->p_lock);
1745 
1746 	return (0);
1747 }
1748 #endif
1749 
1750 static int
1751 pr_setvaddr(prnode_t *pnp, caddr_t vaddr)
1752 {
1753 	proc_t *p = pnp->pr_common->prc_proc;
1754 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1755 
1756 	if (!ISTOPPED(t) && !VSTOPPED(t) && !DSTOPPED(t)) {
1757 		thread_unlock(t);
1758 		return (EBUSY);
1759 	}
1760 
1761 	/* drop p_lock while touching the lwp's stack */
1762 	thread_unlock(t);
1763 	mutex_exit(&p->p_lock);
1764 	prsvaddr(ttolwp(t), vaddr);
1765 	mutex_enter(&p->p_lock);
1766 
1767 	return (0);
1768 }
1769 
1770 void
1771 pr_sethold(prnode_t *pnp, sigset_t *sp)
1772 {
1773 	proc_t *p = pnp->pr_common->prc_proc;
1774 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1775 
1776 	schedctl_finish_sigblock(t);
1777 	sigutok(sp, &t->t_hold);
1778 	if (ISWAKEABLE(t) &&
1779 	    (fsig(&p->p_sig, t) || fsig(&t->t_sig, t)))
1780 		setrun_locked(t);
1781 	t->t_sig_check = 1;	/* so thread will see new holdmask */
1782 	thread_unlock(t);
1783 }
1784 
1785 void
1786 pr_setfault(proc_t *p, fltset_t *fltp)
1787 {
1788 	prassignset(&p->p_fltmask, fltp);
1789 	if (!prisempty(&p->p_fltmask))
1790 		p->p_proc_flag |= P_PR_TRACE;
1791 	else if (sigisempty(&p->p_sigmask)) {
1792 		user_t *up = PTOU(p);
1793 		if (up->u_systrap == 0)
1794 			p->p_proc_flag &= ~P_PR_TRACE;
1795 	}
1796 }
1797 
1798 static int
1799 pr_clearsig(prnode_t *pnp)
1800 {
1801 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1802 	klwp_t *lwp = ttolwp(t);
1803 
1804 	thread_unlock(t);
1805 	if (lwp->lwp_cursig == SIGKILL)
1806 		return (EBUSY);
1807 
1808 	/*
1809 	 * Discard current siginfo_t, if any.
1810 	 */
1811 	lwp->lwp_cursig = 0;
1812 	lwp->lwp_extsig = 0;
1813 	if (lwp->lwp_curinfo) {
1814 		siginfofree(lwp->lwp_curinfo);
1815 		lwp->lwp_curinfo = NULL;
1816 	}
1817 
1818 	return (0);
1819 }
1820 
1821 static int
1822 pr_clearflt(prnode_t *pnp)
1823 {
1824 	kthread_t *t = pr_thread(pnp);	/* returns locked thread */
1825 
1826 	thread_unlock(t);
1827 	ttolwp(t)->lwp_curflt = 0;
1828 
1829 	return (0);
1830 }
1831 
1832 static int
1833 pr_watch(prnode_t *pnp, prwatch_t *pwp, int *unlocked)
1834 {
1835 	proc_t *p = pnp->pr_common->prc_proc;
1836 	struct as *as = p->p_as;
1837 	uintptr_t vaddr = pwp->pr_vaddr;
1838 	size_t size = pwp->pr_size;
1839 	int wflags = pwp->pr_wflags;
1840 	ulong_t newpage = 0;
1841 	struct watched_area *pwa;
1842 	int error;
1843 
1844 	*unlocked = 0;
1845 
1846 	/*
1847 	 * Can't apply to a system process.
1848 	 */
1849 	if ((p->p_flag & SSYS) || p->p_as == &kas)
1850 		return (EBUSY);
1851 
1852 	/*
1853 	 * Verify that the address range does not wrap
1854 	 * and that only the proper flags were specified.
1855 	 */
1856 	if ((wflags & ~WA_TRAPAFTER) == 0)
1857 		size = 0;
1858 	if (vaddr + size < vaddr ||
1859 	    (wflags & ~(WA_READ|WA_WRITE|WA_EXEC|WA_TRAPAFTER)) != 0 ||
1860 	    ((wflags & ~WA_TRAPAFTER) != 0 && size == 0))
1861 		return (EINVAL);
1862 
1863 	/*
1864 	 * Don't let the address range go above as->a_userlimit.
1865 	 * There is no error here, just a limitation.
1866 	 */
1867 	if (vaddr >= (uintptr_t)as->a_userlimit)
1868 		return (0);
1869 	if (vaddr + size > (uintptr_t)as->a_userlimit)
1870 		size = (uintptr_t)as->a_userlimit - vaddr;
1871 
1872 	/*
1873 	 * Compute maximum number of pages this will add.
1874 	 */
1875 	if ((wflags & ~WA_TRAPAFTER) != 0) {
1876 		ulong_t pagespan = (vaddr + size) - (vaddr & PAGEMASK);
1877 		newpage = btopr(pagespan);
1878 		if (newpage > 2 * prnwatch)
1879 			return (E2BIG);
1880 	}
1881 
1882 	/*
1883 	 * Force the process to be fully stopped.
1884 	 */
1885 	if (p == curproc) {
1886 		prunlock(pnp);
1887 		while (holdwatch() != 0)
1888 			continue;
1889 		if ((error = prlock(pnp, ZNO)) != 0) {
1890 			continuelwps(p);
1891 			*unlocked = 1;
1892 			return (error);
1893 		}
1894 	} else {
1895 		pauselwps(p);
1896 		while (pr_allstopped(p, 0) > 0) {
1897 			/*
1898 			 * This cv/mutex pair is persistent even
1899 			 * if the process disappears after we
1900 			 * unmark it and drop p->p_lock.
1901 			 */
1902 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
1903 			kmutex_t *mp = &p->p_lock;
1904 
1905 			prunmark(p);
1906 			(void) cv_wait(cv, mp);
1907 			mutex_exit(mp);
1908 			if ((error = prlock(pnp, ZNO)) != 0) {
1909 				/*
1910 				 * Unpause the process if it exists.
1911 				 */
1912 				p = pr_p_lock(pnp);
1913 				mutex_exit(&pr_pidlock);
1914 				if (p != NULL) {
1915 					unpauselwps(p);
1916 					prunlock(pnp);
1917 				}
1918 				*unlocked = 1;
1919 				return (error);
1920 			}
1921 		}
1922 	}
1923 
1924 	/*
1925 	 * Drop p->p_lock in order to perform the rest of this.
1926 	 * The process is still locked with the P_PR_LOCK flag.
1927 	 */
1928 	mutex_exit(&p->p_lock);
1929 
1930 	pwa = kmem_alloc(sizeof (struct watched_area), KM_SLEEP);
1931 	pwa->wa_vaddr = (caddr_t)vaddr;
1932 	pwa->wa_eaddr = (caddr_t)vaddr + size;
1933 	pwa->wa_flags = (ulong_t)wflags;
1934 
1935 	error = ((pwa->wa_flags & ~WA_TRAPAFTER) == 0)?
1936 	    clear_watched_area(p, pwa) : set_watched_area(p, pwa);
1937 
1938 	if (p == curproc) {
1939 		setallwatch();
1940 		mutex_enter(&p->p_lock);
1941 		continuelwps(p);
1942 	} else {
1943 		mutex_enter(&p->p_lock);
1944 		unpauselwps(p);
1945 	}
1946 
1947 	return (error);
1948 }
1949 
1950 /* jobcontrol stopped, but with a /proc directed stop in effect */
1951 #define	JDSTOPPED(t)	\
1952 	((t)->t_state == TS_STOPPED && \
1953 	(t)->t_whystop == PR_JOBCONTROL && \
1954 	((t)->t_proc_flag & TP_PRSTOP))
1955 
1956 /*
1957  * pr_agent() creates the agent lwp. If the process is exiting while
1958  * we are creating an agent lwp, then exitlwps() waits until the
1959  * agent has been created using prbarrier().
1960  */
1961 static int
1962 pr_agent(prnode_t *pnp, prgregset_t prgregset, int *unlocked)
1963 {
1964 	proc_t *p = pnp->pr_common->prc_proc;
1965 	prcommon_t *pcp;
1966 	kthread_t *t;
1967 	kthread_t *ct;
1968 	klwp_t *clwp;
1969 	k_sigset_t smask;
1970 	int cid;
1971 	void *bufp = NULL;
1972 	int error;
1973 
1974 	*unlocked = 0;
1975 
1976 	/*
1977 	 * Cannot create the /proc agent lwp if :-
1978 	 * - the process is not fully stopped or directed to stop.
1979 	 * - there is an agent lwp already.
1980 	 * - the process has been killed.
1981 	 * - the process is exiting.
1982 	 * - it's a vfork(2) parent.
1983 	 */
1984 	t = prchoose(p);	/* returns locked thread */
1985 	ASSERT(t != NULL);
1986 
1987 	if ((!ISTOPPED(t) && !VSTOPPED(t) && !SUSPENDED(t) && !JDSTOPPED(t)) ||
1988 	    p->p_agenttp != NULL ||
1989 	    (p->p_flag & (SKILLED | SEXITING | SVFWAIT))) {
1990 		thread_unlock(t);
1991 		return (EBUSY);
1992 	}
1993 
1994 	thread_unlock(t);
1995 	mutex_exit(&p->p_lock);
1996 
1997 	sigfillset(&smask);
1998 	sigdiffset(&smask, &cantmask);
1999 	clwp = lwp_create(lwp_rtt, NULL, 0, p, TS_STOPPED,
2000 	    t->t_pri, &smask, NOCLASS, 0);
2001 	if (clwp == NULL) {
2002 		mutex_enter(&p->p_lock);
2003 		return (ENOMEM);
2004 	}
2005 	prsetprregs(clwp, prgregset, 1);
2006 retry:
2007 	cid = t->t_cid;
2008 	(void) CL_ALLOC(&bufp, cid, KM_SLEEP);
2009 	mutex_enter(&p->p_lock);
2010 	if (cid != t->t_cid) {
2011 		/*
2012 		 * Someone just changed this thread's scheduling class,
2013 		 * so try pre-allocating the buffer again.  Hopefully we
2014 		 * don't hit this often.
2015 		 */
2016 		mutex_exit(&p->p_lock);
2017 		CL_FREE(cid, bufp);
2018 		goto retry;
2019 	}
2020 
2021 	clwp->lwp_ap = clwp->lwp_arg;
2022 	clwp->lwp_eosys = NORMALRETURN;
2023 	ct = lwptot(clwp);
2024 	ct->t_clfuncs = t->t_clfuncs;
2025 	CL_FORK(t, ct, bufp);
2026 	ct->t_cid = t->t_cid;
2027 	ct->t_proc_flag |= TP_PRSTOP;
2028 	/*
2029 	 * Setting t_sysnum to zero causes post_syscall()
2030 	 * to bypass all syscall checks and go directly to
2031 	 *	if (issig()) psig();
2032 	 * so that the agent lwp will stop in issig_forreal()
2033 	 * showing PR_REQUESTED.
2034 	 */
2035 	ct->t_sysnum = 0;
2036 	ct->t_post_sys = 1;
2037 	ct->t_sig_check = 1;
2038 	p->p_agenttp = ct;
2039 	ct->t_proc_flag &= ~TP_HOLDLWP;
2040 
2041 	pcp = pnp->pr_pcommon;
2042 	mutex_enter(&pcp->prc_mutex);
2043 
2044 	lwp_create_done(ct);
2045 
2046 	/*
2047 	 * Don't return until the agent is stopped on PR_REQUESTED.
2048 	 */
2049 
2050 	for (;;) {
2051 		prunlock(pnp);
2052 		*unlocked = 1;
2053 
2054 		/*
2055 		 * Wait for the agent to stop and notify us.
2056 		 * If we've been interrupted, return that information.
2057 		 */
2058 		error = pr_wait(pcp, NULL, 0);
2059 		if (error == EINTR) {
2060 			error = 0;
2061 			break;
2062 		}
2063 
2064 		/*
2065 		 * Confirm that the agent LWP has stopped.
2066 		 */
2067 
2068 		if ((error = prlock(pnp, ZNO)) != 0)
2069 			break;
2070 		*unlocked = 0;
2071 
2072 		/*
2073 		 * Since we dropped the lock on the process, the agent
2074 		 * may have disappeared or changed. Grab the current
2075 		 * agent and check fail if it has disappeared.
2076 		 */
2077 		if ((ct = p->p_agenttp) == NULL) {
2078 			error = ENOENT;
2079 			break;
2080 		}
2081 
2082 		mutex_enter(&pcp->prc_mutex);
2083 		thread_lock(ct);
2084 
2085 		if (ISTOPPED(ct)) {
2086 			thread_unlock(ct);
2087 			mutex_exit(&pcp->prc_mutex);
2088 			break;
2089 		}
2090 
2091 		thread_unlock(ct);
2092 	}
2093 
2094 	return (error ? error : -1);
2095 }
2096 
2097 static int
2098 pr_rdwr(proc_t *p, enum uio_rw rw, priovec_t *pio)
2099 {
2100 	caddr_t base = (caddr_t)pio->pio_base;
2101 	size_t cnt = pio->pio_len;
2102 	uintptr_t offset = (uintptr_t)pio->pio_offset;
2103 	struct uio auio;
2104 	struct iovec aiov;
2105 	int error = 0;
2106 
2107 	if ((p->p_flag & SSYS) || p->p_as == &kas)
2108 		error = EIO;
2109 	else if ((base + cnt) < base || (offset + cnt) < offset)
2110 		error = EINVAL;
2111 	else if (cnt != 0) {
2112 		aiov.iov_base = base;
2113 		aiov.iov_len = cnt;
2114 
2115 		auio.uio_loffset = offset;
2116 		auio.uio_iov = &aiov;
2117 		auio.uio_iovcnt = 1;
2118 		auio.uio_resid = cnt;
2119 		auio.uio_segflg = UIO_USERSPACE;
2120 		auio.uio_llimit = (longlong_t)MAXOFFSET_T;
2121 		auio.uio_fmode = FREAD|FWRITE;
2122 		auio.uio_extflg = UIO_COPY_DEFAULT;
2123 
2124 		mutex_exit(&p->p_lock);
2125 		error = prusrio(p, rw, &auio, 0);
2126 		mutex_enter(&p->p_lock);
2127 
2128 		/*
2129 		 * We have no way to return the i/o count,
2130 		 * like read() or write() would do, so we
2131 		 * return an error if the i/o was truncated.
2132 		 */
2133 		if (auio.uio_resid != 0 && error == 0)
2134 			error = EIO;
2135 	}
2136 
2137 	return (error);
2138 }
2139 
2140 static int
2141 pr_scred(proc_t *p, prcred_t *prcred, cred_t *cr, boolean_t dogrps)
2142 {
2143 	kthread_t *t;
2144 	cred_t *oldcred;
2145 	cred_t *newcred;
2146 	uid_t oldruid;
2147 	int error;
2148 	zone_t *zone = crgetzone(cr);
2149 
2150 	if (!VALID_UID(prcred->pr_euid, zone) ||
2151 	    !VALID_UID(prcred->pr_ruid, zone) ||
2152 	    !VALID_UID(prcred->pr_suid, zone) ||
2153 	    !VALID_GID(prcred->pr_egid, zone) ||
2154 	    !VALID_GID(prcred->pr_rgid, zone) ||
2155 	    !VALID_GID(prcred->pr_sgid, zone))
2156 		return (EINVAL);
2157 
2158 	if (dogrps) {
2159 		int ngrp = prcred->pr_ngroups;
2160 		int i;
2161 
2162 		if (ngrp < 0 || ngrp > ngroups_max)
2163 			return (EINVAL);
2164 
2165 		for (i = 0; i < ngrp; i++) {
2166 			if (!VALID_GID(prcred->pr_groups[i], zone))
2167 				return (EINVAL);
2168 		}
2169 	}
2170 
2171 	error = secpolicy_allow_setid(cr, prcred->pr_euid, B_FALSE);
2172 
2173 	if (error == 0 && prcred->pr_ruid != prcred->pr_euid)
2174 		error = secpolicy_allow_setid(cr, prcred->pr_ruid, B_FALSE);
2175 
2176 	if (error == 0 && prcred->pr_suid != prcred->pr_euid &&
2177 	    prcred->pr_suid != prcred->pr_ruid)
2178 		error = secpolicy_allow_setid(cr, prcred->pr_suid, B_FALSE);
2179 
2180 	if (error)
2181 		return (error);
2182 
2183 	mutex_exit(&p->p_lock);
2184 
2185 	/* hold old cred so it doesn't disappear while we dup it */
2186 	mutex_enter(&p->p_crlock);
2187 	crhold(oldcred = p->p_cred);
2188 	mutex_exit(&p->p_crlock);
2189 	newcred = crdup(oldcred);
2190 	oldruid = crgetruid(oldcred);
2191 	crfree(oldcred);
2192 
2193 	/* Error checking done above */
2194 	(void) crsetresuid(newcred, prcred->pr_ruid, prcred->pr_euid,
2195 	    prcred->pr_suid);
2196 	(void) crsetresgid(newcred, prcred->pr_rgid, prcred->pr_egid,
2197 	    prcred->pr_sgid);
2198 
2199 	if (dogrps) {
2200 		(void) crsetgroups(newcred, prcred->pr_ngroups,
2201 		    prcred->pr_groups);
2202 
2203 	}
2204 
2205 	mutex_enter(&p->p_crlock);
2206 	oldcred = p->p_cred;
2207 	p->p_cred = newcred;
2208 	mutex_exit(&p->p_crlock);
2209 	crfree(oldcred);
2210 
2211 	/*
2212 	 * Keep count of processes per uid consistent.
2213 	 */
2214 	if (oldruid != prcred->pr_ruid) {
2215 		zoneid_t zoneid = crgetzoneid(newcred);
2216 
2217 		mutex_enter(&pidlock);
2218 		upcount_dec(oldruid, zoneid);
2219 		upcount_inc(prcred->pr_ruid, zoneid);
2220 		mutex_exit(&pidlock);
2221 	}
2222 
2223 	/*
2224 	 * Broadcast the cred change to the threads.
2225 	 */
2226 	mutex_enter(&p->p_lock);
2227 	t = p->p_tlist;
2228 	do {
2229 		t->t_pre_sys = 1; /* so syscall will get new cred */
2230 	} while ((t = t->t_forw) != p->p_tlist);
2231 
2232 	return (0);
2233 }
2234 
2235 /*
2236  * Change process credentials to specified zone.  Used to temporarily
2237  * set a process to run in the global zone; only transitions between
2238  * the process's actual zone and the global zone are allowed.
2239  */
2240 static int
2241 pr_szoneid(proc_t *p, zoneid_t zoneid, cred_t *cr)
2242 {
2243 	kthread_t *t;
2244 	cred_t *oldcred;
2245 	cred_t *newcred;
2246 	zone_t *zptr;
2247 	zoneid_t oldzoneid;
2248 
2249 	if (secpolicy_zone_config(cr) != 0)
2250 		return (EPERM);
2251 	if (zoneid != GLOBAL_ZONEID && zoneid != p->p_zone->zone_id)
2252 		return (EINVAL);
2253 	if ((zptr = zone_find_by_id(zoneid)) == NULL)
2254 		return (EINVAL);
2255 	mutex_exit(&p->p_lock);
2256 	mutex_enter(&p->p_crlock);
2257 	oldcred = p->p_cred;
2258 	crhold(oldcred);
2259 	mutex_exit(&p->p_crlock);
2260 	newcred = crdup(oldcred);
2261 	oldzoneid = crgetzoneid(oldcred);
2262 	crfree(oldcred);
2263 
2264 	crsetzone(newcred, zptr);
2265 	zone_rele(zptr);
2266 
2267 	mutex_enter(&p->p_crlock);
2268 	oldcred = p->p_cred;
2269 	p->p_cred = newcred;
2270 	mutex_exit(&p->p_crlock);
2271 	crfree(oldcred);
2272 
2273 	/*
2274 	 * The target process is changing zones (according to its cred), so
2275 	 * update the per-zone upcounts, which are based on process creds.
2276 	 */
2277 	if (oldzoneid != zoneid) {
2278 		uid_t ruid = crgetruid(newcred);
2279 
2280 		mutex_enter(&pidlock);
2281 		upcount_dec(ruid, oldzoneid);
2282 		upcount_inc(ruid, zoneid);
2283 		mutex_exit(&pidlock);
2284 	}
2285 	/*
2286 	 * Broadcast the cred change to the threads.
2287 	 */
2288 	mutex_enter(&p->p_lock);
2289 	t = p->p_tlist;
2290 	do {
2291 		t->t_pre_sys = 1;	/* so syscall will get new cred */
2292 	} while ((t = t->t_forw) != p->p_tlist);
2293 
2294 	return (0);
2295 }
2296 
2297 static int
2298 pr_spriv(proc_t *p, prpriv_t *prpriv, cred_t *cr)
2299 {
2300 	kthread_t *t;
2301 	int err;
2302 
2303 	ASSERT(MUTEX_HELD(&p->p_lock));
2304 
2305 	if ((err = priv_pr_spriv(p, prpriv, cr)) == 0) {
2306 		/*
2307 		 * Broadcast the cred change to the threads.
2308 		 */
2309 		t = p->p_tlist;
2310 		do {
2311 			t->t_pre_sys = 1; /* so syscall will get new cred */
2312 		} while ((t = t->t_forw) != p->p_tlist);
2313 	}
2314 
2315 	return (err);
2316 }
2317 
2318 /*
2319  * Return -1 if the process is the parent of a vfork(1) whose child has yet to
2320  * terminate or perform an exec(2).
2321  *
2322  * Returns 0 if the process is fully stopped except for the current thread (if
2323  * we are operating on our own process), 1 otherwise.
2324  *
2325  * If the watchstop flag is set, then we ignore threads with TP_WATCHSTOP set.
2326  * See holdwatch() for details.
2327  */
2328 int
2329 pr_allstopped(proc_t *p, int watchstop)
2330 {
2331 	kthread_t *t;
2332 	int rv = 0;
2333 
2334 	ASSERT(MUTEX_HELD(&p->p_lock));
2335 
2336 	if (p->p_flag & SVFWAIT)	/* waiting for vfork'd child to exec */
2337 		return (-1);
2338 
2339 	if ((t = p->p_tlist) != NULL) {
2340 		do {
2341 			if (t == curthread || VSTOPPED(t) ||
2342 			    (watchstop && (t->t_proc_flag & TP_WATCHSTOP)))
2343 				continue;
2344 			thread_lock(t);
2345 			switch (t->t_state) {
2346 			case TS_ZOMB:
2347 			case TS_STOPPED:
2348 				break;
2349 			case TS_SLEEP:
2350 				if (!(t->t_flag & T_WAKEABLE) ||
2351 				    t->t_wchan0 == NULL)
2352 					rv = 1;
2353 				break;
2354 			default:
2355 				rv = 1;
2356 				break;
2357 			}
2358 			thread_unlock(t);
2359 		} while (rv == 0 && (t = t->t_forw) != p->p_tlist);
2360 	}
2361 
2362 	return (rv);
2363 }
2364 
2365 /*
2366  * Cause all lwps in the process to pause (for watchpoint operations).
2367  */
2368 static void
2369 pauselwps(proc_t *p)
2370 {
2371 	kthread_t *t;
2372 
2373 	ASSERT(MUTEX_HELD(&p->p_lock));
2374 	ASSERT(p != curproc);
2375 
2376 	if ((t = p->p_tlist) != NULL) {
2377 		do {
2378 			thread_lock(t);
2379 			t->t_proc_flag |= TP_PAUSE;
2380 			aston(t);
2381 			if ((ISWAKEABLE(t) && (t->t_wchan0 == NULL)) ||
2382 			    ISWAITING(t)) {
2383 				setrun_locked(t);
2384 			}
2385 			prpokethread(t);
2386 			thread_unlock(t);
2387 		} while ((t = t->t_forw) != p->p_tlist);
2388 	}
2389 }
2390 
2391 /*
2392  * undo the effects of pauselwps()
2393  */
2394 static void
2395 unpauselwps(proc_t *p)
2396 {
2397 	kthread_t *t;
2398 
2399 	ASSERT(MUTEX_HELD(&p->p_lock));
2400 	ASSERT(p != curproc);
2401 
2402 	if ((t = p->p_tlist) != NULL) {
2403 		do {
2404 			thread_lock(t);
2405 			t->t_proc_flag &= ~TP_PAUSE;
2406 			if (t->t_state == TS_STOPPED) {
2407 				t->t_schedflag |= TS_UNPAUSE;
2408 				t->t_dtrace_stop = 0;
2409 				setrun_locked(t);
2410 			}
2411 			thread_unlock(t);
2412 		} while ((t = t->t_forw) != p->p_tlist);
2413 	}
2414 }
2415 
2416 /*
2417  * Cancel all watched areas.  Called from prclose().
2418  */
2419 proc_t *
2420 pr_cancel_watch(prnode_t *pnp)
2421 {
2422 	proc_t *p = pnp->pr_pcommon->prc_proc;
2423 	struct as *as;
2424 	kthread_t *t;
2425 
2426 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2427 
2428 	if (!pr_watch_active(p))
2429 		return (p);
2430 
2431 	/*
2432 	 * Pause the process before dealing with the watchpoints.
2433 	 */
2434 	if (p == curproc) {
2435 		prunlock(pnp);
2436 		while (holdwatch() != 0)
2437 			continue;
2438 		p = pr_p_lock(pnp);
2439 		mutex_exit(&pr_pidlock);
2440 		ASSERT(p == curproc);
2441 	} else {
2442 		pauselwps(p);
2443 		while (p != NULL && pr_allstopped(p, 0) > 0) {
2444 			/*
2445 			 * This cv/mutex pair is persistent even
2446 			 * if the process disappears after we
2447 			 * unmark it and drop p->p_lock.
2448 			 */
2449 			kcondvar_t *cv = &pr_pid_cv[p->p_slot];
2450 			kmutex_t *mp = &p->p_lock;
2451 
2452 			prunmark(p);
2453 			(void) cv_wait(cv, mp);
2454 			mutex_exit(mp);
2455 			p = pr_p_lock(pnp);  /* NULL if process disappeared */
2456 			mutex_exit(&pr_pidlock);
2457 		}
2458 	}
2459 
2460 	if (p == NULL)		/* the process disappeared */
2461 		return (NULL);
2462 
2463 	ASSERT(p == pnp->pr_pcommon->prc_proc);
2464 	ASSERT(MUTEX_HELD(&p->p_lock) && (p->p_proc_flag & P_PR_LOCK));
2465 
2466 	if (pr_watch_active(p)) {
2467 		pr_free_watchpoints(p);
2468 		if ((t = p->p_tlist) != NULL) {
2469 			do {
2470 				watch_disable(t);
2471 
2472 			} while ((t = t->t_forw) != p->p_tlist);
2473 		}
2474 	}
2475 
2476 	if ((as = p->p_as) != NULL) {
2477 		avl_tree_t *tree;
2478 		struct watched_page *pwp;
2479 
2480 		/*
2481 		 * If this is the parent of a vfork, the watched page
2482 		 * list has been moved temporarily to p->p_wpage.
2483 		 */
2484 		if (avl_numnodes(&p->p_wpage) != 0)
2485 			tree = &p->p_wpage;
2486 		else
2487 			tree = &as->a_wpage;
2488 
2489 		mutex_exit(&p->p_lock);
2490 		AS_LOCK_ENTER(as, &as->a_lock, RW_WRITER);
2491 
2492 		for (pwp = avl_first(tree); pwp != NULL;
2493 		    pwp = AVL_NEXT(tree, pwp)) {
2494 			pwp->wp_read = 0;
2495 			pwp->wp_write = 0;
2496 			pwp->wp_exec = 0;
2497 			if ((pwp->wp_flags & WP_SETPROT) == 0) {
2498 				pwp->wp_flags |= WP_SETPROT;
2499 				pwp->wp_prot = pwp->wp_oprot;
2500 				pwp->wp_list = p->p_wprot;
2501 				p->p_wprot = pwp;
2502 			}
2503 		}
2504 
2505 		AS_LOCK_EXIT(as, &as->a_lock);
2506 		mutex_enter(&p->p_lock);
2507 	}
2508 
2509 	/*
2510 	 * Unpause the process now.
2511 	 */
2512 	if (p == curproc)
2513 		continuelwps(p);
2514 	else
2515 		unpauselwps(p);
2516 
2517 	return (p);
2518 }
2519