xref: /illumos-gate/usr/src/uts/common/disp/priocntl.c (revision dd4eeefd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
27 /*	  All Rights Reserved  	*/
28 
29 
30 #pragma ident	"%Z%%M%	%I%	%E% SMI"
31 
32 #include <sys/types.h>
33 #include <sys/param.h>
34 #include <sys/sysmacros.h>
35 #include <sys/signal.h>
36 #include <sys/pcb.h>
37 #include <sys/user.h>
38 #include <sys/systm.h>
39 #include <sys/sysinfo.h>
40 #include <sys/var.h>
41 #include <sys/errno.h>
42 #include <sys/cred.h>
43 #include <sys/proc.h>
44 #include <sys/procset.h>
45 #include <sys/debug.h>
46 #include <sys/inline.h>
47 #include <sys/priocntl.h>
48 #include <sys/disp.h>
49 #include <sys/class.h>
50 #include <sys/modctl.h>
51 #include <sys/t_lock.h>
52 #include <sys/uadmin.h>
53 #include <sys/cmn_err.h>
54 #include <sys/policy.h>
55 
56 /*
57  * Structure used to pass arguments to the proccmp() function.
58  * The arguments must be passed in a structure because proccmp()
59  * is called indirectly through the dotoprocs() function which
60  * will only pass through a single one word argument.
61  */
62 struct pcmpargs {
63 	id_t	*pcmp_cidp;
64 	int	*pcmp_cntp;
65 	kthread_id_t	*pcmp_retthreadp;
66 };
67 
68 /*
69  * Structure used to pass arguments to the setparms() function
70  * which is called indirectly through dotoprocs().
71  */
72 struct stprmargs {
73 	struct pcparms	*stp_parmsp;	/* pointer to parameters */
74 	int		stp_error;	/* some errors returned here */
75 };
76 
77 #if defined(_SYSCALL32_IMPL) && _LONG_LONG_ALIGNMENT_32 == 4
78 /*
79  * A vaparm_t is an int followed by a long long -- this packs differently
80  * between the 64-bit kernel ABI and the 32-bit user ABI.
81  */
82 static int
83 copyin_vaparms32(caddr_t arg, pc_vaparms_t *vap, uio_seg_t seg)
84 {
85 	pc_vaparms32_t vaparms32;
86 	pc_vaparm32_t *src;
87 	pc_vaparm_t *dst;
88 	uint_t cnt;
89 
90 	ASSERT(get_udatamodel() == DATAMODEL_ILP32);
91 
92 	if ((seg == UIO_USERSPACE ? copyin : kcopy)(arg, &vaparms32,
93 	    sizeof (vaparms32)))
94 		return (EFAULT);
95 
96 	vap->pc_vaparmscnt = vaparms32.pc_vaparmscnt;
97 	if ((cnt = vaparms32.pc_vaparmscnt) > PC_VAPARMCNT)
98 		cnt = PC_VAPARMCNT;
99 	for (src = vaparms32.pc_parms, dst = vap->pc_parms;
100 	    cnt--; src++, dst++) {
101 		dst->pc_key = src->pc_key;
102 		dst->pc_parm = src->pc_parm;
103 	}
104 	return (0);
105 }
106 
107 #define	COPYIN_VAPARMS(arg, vap, size, seg)	\
108 	(get_udatamodel() == DATAMODEL_NATIVE ?	\
109 	(*copyinfn)(arg, vap, size) : copyin_vaparms32(arg, vap, seg))
110 
111 #else
112 
113 #define	COPYIN_VAPARMS(arg, vap, size, seg)	(*copyinfn)(arg, vap, size)
114 
115 #endif
116 
117 static int donice(procset_t *, pcnice_t *);
118 static int proccmp(proc_t *, struct pcmpargs *);
119 static int setparms(proc_t *, struct stprmargs *);
120 extern int threadcmp(struct pcmpargs *, kthread_id_t);
121 
122 /*
123  * The priocntl system call.
124  */
125 long
126 priocntl_common(int pc_version, procset_t *psp, int cmd, caddr_t arg,
127     caddr_t arg2, uio_seg_t seg)
128 {
129 	pcinfo_t		pcinfo;
130 	pcparms_t		pcparms;
131 	pcnice_t		pcnice;
132 	pcadmin_t		pcadmin;
133 	pcpri_t			pcpri;
134 	procset_t		procset;
135 	struct stprmargs	stprmargs;
136 	struct pcmpargs		pcmpargs;
137 	pc_vaparms_t		vaparms;
138 	char			clname[PC_CLNMSZ];
139 	char			*outstr;
140 	int			count;
141 	kthread_id_t		retthreadp;
142 	proc_t			*initpp;
143 	int			clnullflag;
144 	int			error = 0;
145 	int			error1 = 0;
146 	int			rv = 0;
147 	pid_t			saved_pid;
148 	id_t			classid;
149 	int			size;
150 	int (*copyinfn)(const void *, void *, size_t);
151 	int (*copyoutfn)(const void *, void *, size_t);
152 
153 	/*
154 	 * First just check the version number. Right now there is only
155 	 * one version we know about and support.  If we get some other
156 	 * version number from the application it may be that the
157 	 * application was built with some future version and is trying
158 	 * to run on an old release of the system (that's us).  In any
159 	 * case if we don't recognize the version number all we can do is
160 	 * return error.
161 	 */
162 	if (pc_version != PC_VERSION)
163 		return (set_errno(EINVAL));
164 
165 	if (seg == UIO_USERSPACE) {
166 		copyinfn = copyin;
167 		copyoutfn = copyout;
168 	} else {
169 		copyinfn = kcopy;
170 		copyoutfn = kcopy;
171 	}
172 
173 	switch (cmd) {
174 	case PC_GETCID:
175 		/*
176 		 * If the arg pointer is NULL, the user just wants to
177 		 * know the number of classes. If non-NULL, the pointer
178 		 * should point to a valid user pcinfo buffer.  In the
179 		 * dynamic world we need to return the number of loaded
180 		 * classes, not the max number of available classes that
181 		 * can be loaded.
182 		 */
183 		if (arg == NULL) {
184 			rv = loaded_classes;
185 			break;
186 		} else {
187 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
188 				return (set_errno(EFAULT));
189 		}
190 
191 		pcinfo.pc_clname[PC_CLNMSZ-1] = '\0';
192 
193 		/*
194 		 * Get the class ID corresponding to user supplied name.
195 		 */
196 		error = getcid(pcinfo.pc_clname, &pcinfo.pc_cid);
197 		if (error)
198 			return (set_errno(error));
199 
200 		/*
201 		 * Can't get info about the sys class.
202 		 */
203 		if (pcinfo.pc_cid == 0)
204 			return (set_errno(EINVAL));
205 
206 		/*
207 		 * Get the class specific information.
208 		 * we MUST make sure that the class has not already
209 		 * been unloaded before we try the CL_GETCLINFO.
210 		 * If it has then we need to load it.
211 		 */
212 		error =
213 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
214 		if (error)
215 			return (set_errno(error));
216 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
217 		if (error)
218 			return (set_errno(error));
219 
220 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
221 			return (set_errno(EFAULT));
222 
223 		rv = loaded_classes;
224 
225 		break;
226 
227 	case PC_GETCLINFO:
228 		/*
229 		 * If the arg pointer is NULL, the user just wants to know
230 		 * the number of classes. If non-NULL, the pointer should
231 		 * point to a valid user pcinfo buffer.
232 		 */
233 		if (arg == NULL) {
234 			rv = loaded_classes;
235 			break;
236 		} else {
237 			if ((*copyinfn)(arg, &pcinfo, sizeof (pcinfo)))
238 				return (set_errno(EFAULT));
239 		}
240 
241 		if (pcinfo.pc_cid >= loaded_classes || pcinfo.pc_cid < 1)
242 			return (set_errno(EINVAL));
243 
244 		(void) strncpy(pcinfo.pc_clname, sclass[pcinfo.pc_cid].cl_name,
245 		    PC_CLNMSZ);
246 
247 		/*
248 		 * Get the class specific information.  we MUST make sure
249 		 * that the class has not already been unloaded before we
250 		 * try the CL_GETCLINFO.  If it has then we need to load
251 		 * it.
252 		 */
253 		error =
254 		    scheduler_load(pcinfo.pc_clname, &sclass[pcinfo.pc_cid]);
255 		if (error)
256 			return (set_errno(error));
257 		error = CL_GETCLINFO(&sclass[pcinfo.pc_cid], pcinfo.pc_clinfo);
258 		if (error)
259 			return (set_errno(error));
260 
261 		if ((*copyoutfn)(&pcinfo, arg, sizeof (pcinfo)))
262 			return (set_errno(EFAULT));
263 
264 		rv = loaded_classes;
265 		break;
266 
267 	case PC_SETPARMS:
268 	case PC_SETXPARMS:
269 		/*
270 		 * First check the validity of the parameters we got from
271 		 * the user.  We don't do any permissions checking here
272 		 * because it's done on a per thread basis by parmsset().
273 		 */
274 		if (cmd == PC_SETPARMS) {
275 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
276 				return (set_errno(EFAULT));
277 
278 			error = parmsin(&pcparms, NULL);
279 		} else {
280 			if ((*copyinfn)(arg, clname, PC_CLNMSZ) ||
281 			    COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
282 			    seg))
283 				return (set_errno(EFAULT));
284 			clname[PC_CLNMSZ-1] = '\0';
285 
286 			if (getcid(clname, &pcparms.pc_cid))
287 				return (set_errno(EINVAL));
288 
289 			error = parmsin(&pcparms, &vaparms);
290 		}
291 
292 		if (error)
293 			return (set_errno(error));
294 
295 		/*
296 		 * Get the procset from the user.
297 		 */
298 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
299 			return (set_errno(EFAULT));
300 
301 		/*
302 		 * For performance we do a quick check here to catch
303 		 * common cases where the current thread is the only one
304 		 * in the set.  In such cases we can call parmsset()
305 		 * directly, avoiding the relatively lengthy path through
306 		 * dotoprocs().  The underlying classes expect pidlock to
307 		 * be held.
308 		 */
309 		if (cur_inset_only(&procset) == B_TRUE) {
310 			/* do a single LWP */
311 			if ((procset.p_lidtype == P_LWPID) ||
312 			    (procset.p_ridtype == P_LWPID)) {
313 				mutex_enter(&pidlock);
314 				mutex_enter(&curproc->p_lock);
315 				error = parmsset(&pcparms, curthread);
316 				mutex_exit(&curproc->p_lock);
317 				mutex_exit(&pidlock);
318 			} else {
319 				/* do the entire process otherwise */
320 				stprmargs.stp_parmsp = &pcparms;
321 				stprmargs.stp_error = 0;
322 				mutex_enter(&pidlock);
323 				error = setparms(curproc, &stprmargs);
324 				mutex_exit(&pidlock);
325 				if (error == 0 && stprmargs.stp_error != 0)
326 					error = stprmargs.stp_error;
327 			}
328 			if (error)
329 				return (set_errno(error));
330 		} else {
331 			stprmargs.stp_parmsp = &pcparms;
332 			stprmargs.stp_error = 0;
333 
334 			error1 = error = ESRCH;
335 
336 			/*
337 			 * The dotoprocs() call below will cause
338 			 * setparms() to be called for each thread in the
339 			 * specified procset. setparms() will in turn
340 			 * call parmsset() (which does the real work).
341 			 */
342 			if ((procset.p_lidtype != P_LWPID) ||
343 				(procset.p_ridtype != P_LWPID)) {
344 				error1 = dotoprocs(&procset, setparms,
345 				    (char *)&stprmargs);
346 			}
347 
348 			/*
349 			 * take care of the case when any of the
350 			 * operands happen to be LWP's
351 			 */
352 
353 			if ((procset.p_lidtype == P_LWPID) ||
354 			    (procset.p_ridtype == P_LWPID)) {
355 				error = dotolwp(&procset, parmsset,
356 				    (char *)&pcparms);
357 				/*
358 				 * Dotolwp() returns with p_lock held.
359 				 * This is required for the GETPARMS case
360 				 * below. So, here we just release the
361 				 * p_lock.
362 				 */
363 				if (MUTEX_HELD(&curproc->p_lock))
364 					mutex_exit(&curproc->p_lock);
365 			}
366 
367 			/*
368 			 * If setparms() encounters a permissions error
369 			 * for one or more of the threads it returns
370 			 * EPERM in stp_error so dotoprocs() will
371 			 * continue through the thread set.  If
372 			 * dotoprocs() returned an error above, it was
373 			 * more serious than permissions and dotoprocs
374 			 * quit when the error was encountered.  We
375 			 * return the more serious error if there was
376 			 * one, otherwise we return EPERM if we got that
377 			 * back.
378 			 */
379 			if (error1 != ESRCH)
380 				error = error1;
381 			if (error == 0 && stprmargs.stp_error != 0)
382 				error = stprmargs.stp_error;
383 		}
384 		break;
385 
386 	case PC_GETPARMS:
387 	case PC_GETXPARMS:
388 		if (cmd == PC_GETPARMS) {
389 			if ((*copyinfn)(arg, &pcparms, sizeof (pcparms)))
390 				return (set_errno(EFAULT));
391 		} else {
392 			if (arg != NULL) {
393 				if ((*copyinfn)(arg, clname, PC_CLNMSZ))
394 					return (set_errno(EFAULT));
395 
396 				clname[PC_CLNMSZ-1] = '\0';
397 
398 				if (getcid(clname, &pcparms.pc_cid))
399 					return (set_errno(EINVAL));
400 			} else
401 				pcparms.pc_cid = PC_CLNULL;
402 
403 			if (COPYIN_VAPARMS(arg2, &vaparms, sizeof (vaparms),
404 			    seg))
405 				return (set_errno(EFAULT));
406 		}
407 
408 		if (pcparms.pc_cid >= loaded_classes ||
409 		    (pcparms.pc_cid < 1 && pcparms.pc_cid != PC_CLNULL))
410 			return (set_errno(EINVAL));
411 
412 		if ((*copyinfn)(psp, &procset, sizeof (procset)))
413 			return (set_errno(EFAULT));
414 
415 		/*
416 		 * Check to see if the current thread is the only one
417 		 * in the set. If not we must go through the whole set
418 		 * to select a thread.
419 		 */
420 		if (cur_inset_only(&procset) == B_TRUE) {
421 			/* do a single LWP */
422 			if ((procset.p_lidtype == P_LWPID) ||
423 			    (procset.p_ridtype == P_LWPID)) {
424 				if (pcparms.pc_cid != PC_CLNULL &&
425 				    pcparms.pc_cid != curthread->t_cid) {
426 					/*
427 					 * Specified thread not in
428 					 * specified class.
429 					 */
430 					return (set_errno(ESRCH));
431 				} else {
432 					mutex_enter(&curproc->p_lock);
433 					retthreadp = curthread;
434 				}
435 			} else {
436 				count = 0;
437 				retthreadp = NULL;
438 				pcmpargs.pcmp_cidp = &pcparms.pc_cid;
439 				pcmpargs.pcmp_cntp = &count;
440 				pcmpargs.pcmp_retthreadp = &retthreadp;
441 				/*
442 				 * Specified thread not in specified class.
443 				 */
444 				if (pcparms.pc_cid != PC_CLNULL &&
445 				    pcparms.pc_cid != curthread->t_cid)
446 					return (set_errno(ESRCH));
447 				error = proccmp(curproc, &pcmpargs);
448 				if (error) {
449 					if (retthreadp != NULL)
450 						mutex_exit(&(curproc->p_lock));
451 					return (set_errno(error));
452 				}
453 			}
454 		} else {
455 			/*
456 			 * get initpp early to avoid lock ordering problems
457 			 * (we cannot get pidlock while holding any p_lock).
458 			 */
459 			mutex_enter(&pidlock);
460 			initpp = prfind(P_INITPID);
461 			mutex_exit(&pidlock);
462 
463 			/*
464 			 * Select the thread (from the set) whose
465 			 * parameters we are going to return.  First we
466 			 * set up some locations for return values, then
467 			 * we call proccmp() indirectly through
468 			 * dotoprocs().  proccmp() will call a class
469 			 * specific routine which actually does the
470 			 * selection.  To understand how this works take
471 			 * a careful look at the code below, the
472 			 * dotoprocs() function, the proccmp() function,
473 			 * and the class specific cl_proccmp() functions.
474 			 */
475 			if (pcparms.pc_cid == PC_CLNULL)
476 				clnullflag = 1;
477 			else
478 				clnullflag = 0;
479 			count = 0;
480 			retthreadp = NULL;
481 			pcmpargs.pcmp_cidp = &pcparms.pc_cid;
482 			pcmpargs.pcmp_cntp = &count;
483 			pcmpargs.pcmp_retthreadp = &retthreadp;
484 			error1 = error = ESRCH;
485 
486 			if ((procset.p_lidtype != P_LWPID) ||
487 			    (procset.p_ridtype != P_LWPID)) {
488 				error1 = dotoprocs(&procset, proccmp,
489 				    (char *)&pcmpargs);
490 			}
491 
492 			/*
493 			 * take care of combination of LWP and process
494 			 * set case in a procset
495 			 */
496 			if ((procset.p_lidtype == P_LWPID) ||
497 			    (procset.p_ridtype == P_LWPID)) {
498 				error = dotolwp(&procset, threadcmp,
499 				    (char *)&pcmpargs);
500 			}
501 
502 			/*
503 			 * Both proccmp() and threadcmp() return with the
504 			 * p_lock held for the ttoproc(retthreadp). This
505 			 * is required to make sure that the process we
506 			 * chose as the winner doesn't go away
507 			 * i.e. retthreadp has to be a valid pointer.
508 			 *
509 			 * The case below can only happen if the thread
510 			 * with the highest priority was not in your
511 			 * process.  In that case, dotolwp will return
512 			 * holding p_lock for both your process as well
513 			 * as the process in which retthreadp is a
514 			 * thread.
515 			 */
516 			if ((retthreadp != NULL) &&
517 			    (ttoproc(retthreadp) != curproc) &&
518 			    MUTEX_HELD(&(curproc)->p_lock))
519 				mutex_exit(&(curproc)->p_lock);
520 
521 			ASSERT(retthreadp == NULL ||
522 			    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
523 			if (error1 != ESRCH)
524 				error = error1;
525 			if (error) {
526 				if (retthreadp != NULL)
527 				    mutex_exit(&(ttoproc(retthreadp)->p_lock));
528 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
529 				return (set_errno(error));
530 			}
531 			/*
532 			 * dotoprocs() ignores the init process if it is
533 			 * in the set, unless it was the only process found.
534 			 * Since we are getting parameters here rather than
535 			 * setting them, we want to make sure init is not
536 			 * excluded if it is in the set.
537 			 */
538 			if (initpp != NULL &&
539 			    procinset(initpp, &procset) &&
540 			    (retthreadp != NULL) &&
541 			    ttoproc(retthreadp) != initpp)
542 				(void) proccmp(initpp, &pcmpargs);
543 
544 			/*
545 			 * If dotoprocs returned success it found at least
546 			 * one thread in the set.  If proccmp() failed to
547 			 * select a thread it is because the user specified
548 			 * a class and none of the threads in the set
549 			 * belonged to that class, or because the process
550 			 * specified was in the middle of exiting and had
551 			 * cleared its thread list.
552 			 */
553 			if (retthreadp == NULL) {
554 				/*
555 				 * Might be here and still holding p_lock
556 				 * if we did a dotolwp on an lwp that
557 				 * existed but was in the wrong class.
558 				 */
559 				if (MUTEX_HELD(&(curproc)->p_lock))
560 					mutex_exit(&(curproc)->p_lock);
561 				return (set_errno(ESRCH));
562 			}
563 
564 			/*
565 			 * User can only use PC_CLNULL with one thread in set.
566 			 */
567 			if (clnullflag && count > 1) {
568 				if (retthreadp != NULL)
569 					mutex_exit(
570 					    &(ttoproc(retthreadp)->p_lock));
571 				ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
572 				return (set_errno(EINVAL));
573 			}
574 		}
575 
576 		ASSERT(retthreadp == NULL ||
577 		    MUTEX_HELD(&(ttoproc(retthreadp)->p_lock)));
578 		/*
579 		 * It is possible to have retthreadp == NULL. Proccmp()
580 		 * in the rare case (p_tlist == NULL) could return without
581 		 * setting a value for retthreadp.
582 		 */
583 		if (retthreadp == NULL) {
584 			ASSERT(MUTEX_NOT_HELD(&(curproc)->p_lock));
585 			return (set_errno(ESRCH));
586 		}
587 		/*
588 		 * We've selected a thread so now get the parameters.
589 		 */
590 		parmsget(retthreadp, &pcparms);
591 
592 		/*
593 		 * Prepare to return parameters to the user
594 		 */
595 		error = parmsout(&pcparms,
596 		    (cmd == PC_GETPARMS ? NULL : &vaparms));
597 
598 		/*
599 		 * Save pid of selected thread before dropping p_lock.
600 		 */
601 		saved_pid = ttoproc(retthreadp)->p_pid;
602 		mutex_exit(&(ttoproc(retthreadp)->p_lock));
603 		ASSERT(MUTEX_NOT_HELD(&curproc->p_lock));
604 
605 		if (error)
606 			return (set_errno(error));
607 
608 		if (cmd == PC_GETPARMS) {
609 			if ((*copyoutfn)(&pcparms, arg, sizeof (pcparms)))
610 				return (set_errno(EFAULT));
611 		} else if ((error = vaparmsout(arg, &pcparms, &vaparms,
612 		    seg)) != 0)
613 			return (set_errno(error));
614 
615 		/*
616 		 * And finally, return the pid of the selected thread.
617 		 */
618 		rv = saved_pid;
619 		break;
620 
621 	case PC_ADMIN:
622 		if (get_udatamodel() == DATAMODEL_NATIVE) {
623 			if ((*copyinfn)(arg, &pcadmin, sizeof (pcadmin_t)))
624 				return (set_errno(EFAULT));
625 #ifdef _SYSCALL32_IMPL
626 		} else {
627 			/* pcadmin struct from ILP32 callers */
628 			pcadmin32_t pcadmin32;
629 
630 			if ((*copyinfn)(arg, &pcadmin32, sizeof (pcadmin32_t)))
631 				return (set_errno(EFAULT));
632 			pcadmin.pc_cid = pcadmin32.pc_cid;
633 			pcadmin.pc_cladmin = (caddr_t)(uintptr_t)
634 			    pcadmin32.pc_cladmin;
635 #endif /* _SYSCALL32_IMPL */
636 		}
637 
638 		if (pcadmin.pc_cid >= loaded_classes ||
639 		    pcadmin.pc_cid < 1)
640 			return (set_errno(EINVAL));
641 
642 		/*
643 		 * Have the class do whatever the user is requesting.
644 		 */
645 		mutex_enter(&ualock);
646 		error = CL_ADMIN(&sclass[pcadmin.pc_cid], pcadmin.pc_cladmin,
647 				CRED());
648 		mutex_exit(&ualock);
649 		break;
650 
651 	case PC_GETPRIRANGE:
652 		if ((*copyinfn)(arg, &pcpri, sizeof (pcpri_t)))
653 			return (set_errno(EFAULT));
654 
655 		if (pcpri.pc_cid >= loaded_classes || pcpri.pc_cid < 0)
656 			return (set_errno(EINVAL));
657 
658 		error = CL_GETCLPRI(&sclass[pcpri.pc_cid], &pcpri);
659 		if (!error) {
660 			if ((*copyoutfn)(&pcpri, arg, sizeof (pcpri)))
661 				return (set_errno(EFAULT));
662 		}
663 		break;
664 
665 	case PC_DONICE:
666 		/*
667 		 * Get pcnice and procset structures from the user.
668 		 */
669 		if ((*copyinfn)(arg, &pcnice, sizeof (pcnice)) ||
670 		    (*copyinfn)(psp, &procset, sizeof (procset)))
671 			return (set_errno(EFAULT));
672 
673 		error = donice(&procset, &pcnice);
674 
675 		if (!error && (pcnice.pc_op == PC_GETNICE)) {
676 			if ((*copyoutfn)(&pcnice, arg, sizeof (pcnice)))
677 				return (set_errno(EFAULT));
678 		}
679 		break;
680 
681 	case PC_SETDFLCL:
682 		if (secpolicy_dispadm(CRED()) != 0)
683 			return (set_errno(EPERM));
684 
685 		if (copyin(arg, (caddr_t)clname, PC_CLNMSZ) != 0)
686 			return (set_errno(EFAULT));
687 		clname[PC_CLNMSZ-1] = '\0';
688 
689 		if (getcid(clname, &classid) != 0)
690 			return (set_errno(EINVAL));
691 		if (classid == syscid)
692 			return (set_errno(EINVAL));
693 		defaultcid = classid;
694 		ASSERT(defaultcid > 0 && defaultcid < loaded_classes);
695 		break;
696 
697 	case PC_GETDFLCL:
698 		mutex_enter(&class_lock);
699 
700 		if (defaultcid >= loaded_classes)
701 			outstr = "";
702 		else
703 			outstr = sclass[defaultcid].cl_name;
704 		size = strlen(outstr) + 1;
705 		if (arg != NULL)
706 			if ((*copyoutfn)(outstr, arg, size) != 0)
707 				error = EFAULT;
708 
709 		mutex_exit(&class_lock);
710 		break;
711 
712 	default:
713 		error = EINVAL;
714 		break;
715 	}
716 	return (error ? (set_errno(error)) : rv);
717 }
718 
719 long
720 priocntlsys(int pc_version, procset_t *psp, int cmd, caddr_t arg, caddr_t arg2)
721 {
722 	return (priocntl_common(pc_version, psp, cmd, arg, arg2,
723 	    UIO_USERSPACE));
724 }
725 
726 /*
727  * The proccmp() function is part of the implementation of the
728  * PC_GETPARMS command of the priocntl system call.  This function works
729  * with the system call code and with the class specific cl_globpri()
730  * function to select one thread from a specified procset based on class
731  * specific criteria. proccmp() is called indirectly from the priocntl
732  * code through the dotoprocs function.  Basic strategy is dotoprocs()
733  * calls us once for each thread in the set.  We in turn call the class
734  * specific function to compare the current thread from dotoprocs to the
735  * "best" (according to the class criteria) found so far.  We keep the
736  * "best" thread in *pcmp_retthreadp.
737  */
738 static int
739 proccmp(proc_t *pp, struct pcmpargs *argp)
740 {
741 	kthread_id_t	tx, ty;
742 	int		last_pri = -1;
743 	int		tx_pri;
744 	int		found = 0;
745 
746 	mutex_enter(&pp->p_lock);
747 
748 	if (pp->p_tlist == NULL) {
749 		mutex_exit(&pp->p_lock);
750 		return (0);
751 	}
752 	(*argp->pcmp_cntp)++;	/* Increment count of procs in the set */
753 
754 	if (*argp->pcmp_cidp == PC_CLNULL) {
755 		/*
756 		 * If no cid is specified, then lets just pick the first one.
757 		 * It doesn't matter because if the number of processes in the
758 		 * set are more than 1, then we return EINVAL in priocntlsys.
759 		 */
760 		*argp->pcmp_cidp = pp->p_tlist->t_cid;
761 	}
762 	ty = tx = pp->p_tlist;
763 	do {
764 		if (tx->t_cid == *argp->pcmp_cidp) {
765 			/*
766 			 * We found one which matches the required cid.
767 			 */
768 			found = 1;
769 			if ((tx_pri = CL_GLOBPRI(tx)) > last_pri) {
770 				last_pri = tx_pri;
771 				ty = tx;
772 			}
773 		}
774 	} while ((tx = tx->t_forw) != pp->p_tlist);
775 	if (found) {
776 		if (*argp->pcmp_retthreadp == NULL) {
777 			/*
778 			 * First time through for this set.
779 			 * keep the mutex held. He might be the one!
780 			 */
781 			*argp->pcmp_retthreadp = ty;
782 		} else {
783 			tx = *argp->pcmp_retthreadp;
784 			if (CL_GLOBPRI(ty) <= CL_GLOBPRI(tx)) {
785 				mutex_exit(&pp->p_lock);
786 			} else {
787 				mutex_exit(&(ttoproc(tx)->p_lock));
788 				*argp->pcmp_retthreadp = ty;
789 			}
790 		}
791 	} else {
792 		/*
793 		 * We actually didn't find anything of the same cid in
794 		 * this process.
795 		 */
796 		mutex_exit(&pp->p_lock);
797 	}
798 	return (0);
799 }
800 
801 
802 int
803 threadcmp(struct pcmpargs *argp, kthread_id_t tp)
804 {
805 	kthread_id_t	tx;
806 	proc_t		*pp;
807 
808 	ASSERT(MUTEX_HELD(&(ttoproc(tp))->p_lock));
809 
810 	(*argp->pcmp_cntp)++;   /* Increment count of procs in the set */
811 	if (*argp->pcmp_cidp == PC_CLNULL) {
812 		/*
813 		 * If no cid is specified, then lets just pick the first one.
814 		 * It doesn't matter because if the number of threads in the
815 		 * set are more than 1, then we return EINVAL in priocntlsys.
816 		 */
817 		*argp->pcmp_cidp = tp->t_cid;
818 	}
819 	if (tp->t_cid == *argp->pcmp_cidp) {
820 		if (*argp->pcmp_retthreadp == NULL) {
821 			/*
822 			 * First time through for this set.
823 			 */
824 			*argp->pcmp_retthreadp = tp;
825 		} else {
826 			tx = *argp->pcmp_retthreadp;
827 			if (CL_GLOBPRI(tp) > CL_GLOBPRI(tx)) {
828 				/*
829 				 * Unlike proccmp(), we don't release the
830 				 * p_lock of the ttoproc(tp) if tp's global
831 				 * priority is less than tx's. We need to go
832 				 * through the entire list before we can do
833 				 * that. The p_lock is released by the caller
834 				 * of dotolwp().
835 				 */
836 				pp = ttoproc(tx);
837 				ASSERT(MUTEX_HELD(&pp->p_lock));
838 				if (pp != curproc) {
839 					mutex_exit(&pp->p_lock);
840 				}
841 				*argp->pcmp_retthreadp = tp;
842 			}
843 		}
844 	}
845 	return (0);
846 }
847 
848 
849 /*
850  * The setparms() function is called indirectly by priocntlsys()
851  * through the dotoprocs() function.  setparms() acts as an
852  * intermediary between dotoprocs() and the parmsset() function,
853  * calling parmsset() for each thread in the set and handling
854  * the error returns on their way back up to dotoprocs().
855  */
856 static int
857 setparms(proc_t *targpp, struct stprmargs *stprmp)
858 {
859 	int error = 0;
860 	kthread_id_t t;
861 	int err;
862 
863 	mutex_enter(&targpp->p_lock);
864 	if ((t = targpp->p_tlist) == NULL) {
865 		mutex_exit(&targpp->p_lock);
866 		return (0);
867 	}
868 	do {
869 		err = parmsset(stprmp->stp_parmsp, t);
870 		if (error == 0)
871 			error = err;
872 	} while ((t = t->t_forw) != targpp->p_tlist);
873 	mutex_exit(&targpp->p_lock);
874 	if (error) {
875 		if (error == EPERM) {
876 			stprmp->stp_error = EPERM;
877 			return (0);
878 		} else {
879 			return (error);
880 		}
881 	} else
882 		return (0);
883 }
884 
885 int
886 setthreadnice(pcnice_t *pcnice, kthread_t *tp)
887 {
888 	int error = 0;
889 	int nice;
890 	int inc;
891 	id_t rtcid;
892 
893 	ASSERT(MUTEX_HELD(&pidlock));
894 	ASSERT(MUTEX_HELD(&(ttoproc(tp)->p_lock)));
895 
896 	/*
897 	 * The XPG5 standard requires that any realtime process or thread
898 	 * must be unaffected by a call to setpriority().
899 	 */
900 	error = getcidbyname("RT", &rtcid);
901 	if ((error == 0) && (tp->t_cid == rtcid)) {
902 		if (pcnice->pc_op == PC_SETNICE)
903 			return (error);
904 	}
905 
906 	if ((error = CL_DONICE(tp, CRED(), 0, &nice)) != 0)
907 		return (error);
908 
909 	if (pcnice->pc_op == PC_GETNICE) {
910 		/*
911 		 * If there is no change to priority, we should return the
912 		 * highest priority (lowest numerical value) pertaining to
913 		 * any of the specified threads.
914 		 */
915 		if (nice < pcnice->pc_val)
916 			pcnice->pc_val = nice;
917 	} else {
918 		ASSERT(pcnice->pc_op == PC_SETNICE);
919 		/*
920 		 * Try to change the nice value of the thread.
921 		 */
922 		inc = pcnice->pc_val - nice;
923 
924 		error = CL_DONICE(tp, CRED(), inc, &inc);
925 	}
926 
927 	return (error);
928 }
929 
930 int
931 setprocnice(proc_t *pp, pcnice_t *pcnice)
932 {
933 	kthread_t *tp;
934 	int retval = 0;
935 	int error = 0;
936 
937 	ASSERT(MUTEX_HELD(&pidlock));
938 	mutex_enter(&pp->p_lock);
939 
940 	if ((tp = pp->p_tlist) == NULL) {
941 		mutex_exit(&pp->p_lock);
942 		return (ESRCH);
943 	}
944 
945 	/*
946 	 * Check permissions before changing the nice value.
947 	 */
948 	if (pcnice->pc_op == PC_SETNICE) {
949 		if (!prochasprocperm(pp, curproc, CRED())) {
950 			mutex_exit(&pp->p_lock);
951 			return (EPERM);
952 		}
953 	}
954 
955 	do {
956 		error = setthreadnice(pcnice, tp);
957 		if (error)
958 			retval = error;
959 	} while ((tp = tp->t_forw) != pp->p_tlist);
960 
961 	mutex_exit(&pp->p_lock);
962 	return (retval);
963 }
964 
965 /*
966  * Update the nice value of the specified LWP or set of processes.
967  */
968 static int
969 donice(procset_t *procset, pcnice_t *pcnice)
970 {
971 	int err_proc = 0;
972 	int err_thread = 0;
973 	int err = 0;
974 
975 	/*
976 	 * Sanity check.
977 	 */
978 	if (pcnice->pc_op != PC_GETNICE && pcnice->pc_op != PC_SETNICE)
979 		return (EINVAL);
980 
981 	/*
982 	 * If it is PC_GETNICE operation then set pc_val to the largest
983 	 * possible nice value to help us find the lowest nice value
984 	 * pertaining to any of the specified processes.
985 	 */
986 	if (pcnice->pc_op == PC_GETNICE)
987 		pcnice->pc_val = NZERO;
988 
989 	if (procset->p_lidtype != P_LWPID ||
990 	    procset->p_ridtype != P_LWPID)
991 		err_proc = dotoprocs(procset, setprocnice, (char *)pcnice);
992 
993 	if (procset->p_lidtype == P_LWPID || procset->p_ridtype == P_LWPID) {
994 		err_thread = dotolwp(procset, setthreadnice, (char *)pcnice);
995 		/*
996 		 * dotolwp() can return with p_lock held.  This is required
997 		 * for the priocntl GETPARMS case.  So, here we just release
998 		 * the p_lock.
999 		 */
1000 		if (MUTEX_HELD(&curproc->p_lock))
1001 			mutex_exit(&curproc->p_lock);
1002 
1003 		/*
1004 		 * If we were called for a single LWP, then ignore ESRCH
1005 		 * returned by the previous dotoprocs() call.
1006 		 */
1007 		if (err_proc == ESRCH)
1008 			err_proc = 0;
1009 	}
1010 
1011 	/*
1012 	 * dotoprocs() ignores the init process if it is in the set, unless
1013 	 * it was the only process found. We want to make sure init is not
1014 	 * excluded if we're going PC_GETNICE operation.
1015 	 */
1016 	if (pcnice->pc_op == PC_GETNICE) {
1017 		proc_t *initpp;
1018 
1019 		mutex_enter(&pidlock);
1020 		initpp = prfind(P_INITPID);
1021 		if (initpp != NULL && procinset(initpp, procset))
1022 			err = setprocnice(initpp, pcnice);
1023 		mutex_exit(&pidlock);
1024 	}
1025 
1026 	/*
1027 	 * We're returning the latest error here that we've got back from
1028 	 * the setthreadnice() or setprocnice(). That is, err_thread and/or
1029 	 * err_proc can be replaced by err.
1030 	 */
1031 	if (!err)
1032 		err = err_thread ? err_thread : err_proc;
1033 
1034 	return (err);
1035 }
1036