xref: /netbsd/sys/kern/sys_sched.c (revision a8552a3a)
1 /*	$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $	*/
2 
3 /*
4  * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  */
28 
29 /*
30  * System calls relating to the scheduler.
31  *
32  * TODO:
33  *  - Handle pthread_setschedprio() as defined by POSIX;
34  *  - Handle sched_yield() case for SCHED_FIFO as defined by POSIX;
35  */
36 
37 #include <sys/cdefs.h>
38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $");
39 
40 #include <sys/param.h>
41 
42 #include <sys/cpu.h>
43 #include <sys/kauth.h>
44 #include <sys/kmem.h>
45 #include <sys/lwp.h>
46 #include <sys/mutex.h>
47 #include <sys/proc.h>
48 #include <sys/pset.h>
49 #include <sys/sa.h>
50 #include <sys/savar.h>
51 #include <sys/sched.h>
52 #include <sys/syscallargs.h>
53 #include <sys/sysctl.h>
54 #include <sys/systm.h>
55 #include <sys/types.h>
56 #include <sys/unistd.h>
57 
58 #include "opt_sa.h"
59 
60 /*
61  * Convert user priority or the in-kernel priority or convert the current
62  * priority to the appropriate range according to the policy change.
63  */
64 static pri_t
65 convert_pri(lwp_t *l, int policy, pri_t pri)
66 {
67 
68 	/* Convert user priority to the in-kernel */
69 	if (pri != PRI_NONE) {
70 		/* Only for real-time threads */
71 		KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX);
72 		KASSERT(policy != SCHED_OTHER);
73 		return PRI_USER_RT + pri;
74 	}
75 
76 	/* Neither policy, nor priority change */
77 	if (l->l_class == policy)
78 		return l->l_priority;
79 
80 	/* Time-sharing -> real-time */
81 	if (l->l_class == SCHED_OTHER) {
82 		KASSERT(policy == SCHED_FIFO || policy == SCHED_RR);
83 		return PRI_USER_RT;
84 	}
85 
86 	/* Real-time -> time-sharing */
87 	if (policy == SCHED_OTHER) {
88 		KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR);
89 		return l->l_priority - PRI_USER_RT;
90 	}
91 
92 	/* Real-time -> real-time */
93 	return l->l_priority;
94 }
95 
96 int
97 do_sched_setparam(pid_t pid, lwpid_t lid, int policy,
98     const struct sched_param *params)
99 {
100 	struct proc *p;
101 	struct lwp *t;
102 	pri_t pri;
103 	u_int lcnt;
104 	int error;
105 
106 	error = 0;
107 
108 	pri = params->sched_priority;
109 
110 	/* If no parameters specified, just return (this should not happen) */
111 	if (pri == PRI_NONE && policy == SCHED_NONE)
112 		return 0;
113 
114 	/* Validate scheduling class */
115 	if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR))
116 		return EINVAL;
117 
118 	/* Validate priority */
119 	if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX))
120 		return EINVAL;
121 
122 	if (pid != 0) {
123 		/* Find the process */
124 		mutex_enter(proc_lock);
125 		p = p_find(pid, PFIND_LOCKED);
126 		if (p == NULL) {
127 			mutex_exit(proc_lock);
128 			return ESRCH;
129 		}
130 		mutex_enter(p->p_lock);
131 		mutex_exit(proc_lock);
132 		/* Disallow modification of system processes */
133 		if ((p->p_flag & PK_SYSTEM) != 0) {
134 			mutex_exit(p->p_lock);
135 			return EPERM;
136 		}
137 	} else {
138 		/* Use the calling process */
139 		p = curlwp->l_proc;
140 		mutex_enter(p->p_lock);
141 	}
142 
143 	/* Find the LWP(s) */
144 	lcnt = 0;
145 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
146 		pri_t kpri;
147 		int lpolicy;
148 
149 		if (lid && lid != t->l_lid)
150 			continue;
151 
152 		lcnt++;
153 		lwp_lock(t);
154 		lpolicy = (policy == SCHED_NONE) ? t->l_class : policy;
155 
156 		/* Disallow setting of priority for SCHED_OTHER threads */
157 		if (lpolicy == SCHED_OTHER && pri != PRI_NONE) {
158 			lwp_unlock(t);
159 			error = EINVAL;
160 			break;
161 		}
162 
163 		/* Convert priority, if needed */
164 		kpri = convert_pri(t, lpolicy, pri);
165 
166 		/* Check the permission */
167 		error = kauth_authorize_process(kauth_cred_get(),
168 		    KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy),
169 		    KAUTH_ARG(kpri));
170 		if (error) {
171 			lwp_unlock(t);
172 			break;
173 		}
174 
175 		/* Set the scheduling class, change the priority */
176 		t->l_class = lpolicy;
177 		lwp_changepri(t, kpri);
178 		lwp_unlock(t);
179 	}
180 	mutex_exit(p->p_lock);
181 	return (lcnt == 0) ? ESRCH : error;
182 }
183 
184 /*
185  * Set scheduling parameters.
186  */
187 int
188 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap,
189     register_t *retval)
190 {
191 	/* {
192 		syscallarg(pid_t) pid;
193 		syscallarg(lwpid_t) lid;
194 		syscallarg(int) policy;
195 		syscallarg(const struct sched_param *) params;
196 	} */
197 	struct sched_param params;
198 	int error;
199 
200 	/* Get the parameters from the user-space */
201 	error = copyin(SCARG(uap, params), &params, sizeof(params));
202 	if (error)
203 		goto out;
204 
205 	error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid),
206 	    SCARG(uap, policy), &params);
207 
208  out:
209 	return (error);
210 }
211 
212 int
213 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy,
214     struct sched_param *params)
215 {
216 	struct sched_param lparams;
217 	struct lwp *t;
218 	int error, lpolicy;
219 
220 	/* Locks the LWP */
221 	t = lwp_find2(pid, lid);
222 	if (t == NULL)
223 		return ESRCH;
224 
225 	/* Check the permission */
226 	error = kauth_authorize_process(kauth_cred_get(),
227 	    KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL);
228 	if (error != 0) {
229 		mutex_exit(t->l_proc->p_lock);
230 		return error;
231 	}
232 
233 	lwp_lock(t);
234 	lparams.sched_priority = t->l_priority;
235 	lpolicy = t->l_class;
236 
237 	switch (lpolicy) {
238 	case SCHED_OTHER:
239 		lparams.sched_priority -= PRI_USER;
240 		break;
241 	case SCHED_RR:
242 	case SCHED_FIFO:
243 		lparams.sched_priority -= PRI_USER_RT;
244 		break;
245 	}
246 
247 	if (policy != NULL)
248 		*policy = lpolicy;
249 
250 	if (params != NULL)
251 		*params = lparams;
252 
253 	lwp_unlock(t);
254 	mutex_exit(t->l_proc->p_lock);
255 	return error;
256 }
257 
258 /*
259  * Get scheduling parameters.
260  */
261 int
262 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap,
263     register_t *retval)
264 {
265 	/* {
266 		syscallarg(pid_t) pid;
267 		syscallarg(lwpid_t) lid;
268 		syscallarg(int *) policy;
269 		syscallarg(struct sched_param *) params;
270 	} */
271 	struct sched_param params;
272 	int error, policy;
273 
274 	error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy,
275 	    &params);
276 	if (error)
277 		goto out;
278 
279 	error = copyout(&params, SCARG(uap, params), sizeof(params));
280 	if (error == 0 && SCARG(uap, policy) != NULL)
281 		error = copyout(&policy, SCARG(uap, policy), sizeof(int));
282 
283  out:
284 	return (error);
285 }
286 
287 /* Allocate the CPU set, and get it from userspace */
288 static int
289 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size)
290 {
291 	int error;
292 
293 	*dset = kcpuset_create();
294 	error = kcpuset_copyin(sset, *dset, size);
295 	if (error != 0)
296 		kcpuset_unuse(*dset, NULL);
297 	return error;
298 }
299 
300 /*
301  * Set affinity.
302  */
303 int
304 sys__sched_setaffinity(struct lwp *l,
305     const struct sys__sched_setaffinity_args *uap, register_t *retval)
306 {
307 	/* {
308 		syscallarg(pid_t) pid;
309 		syscallarg(lwpid_t) lid;
310 		syscallarg(size_t) size;
311 		syscallarg(const cpuset_t *) cpuset;
312 	} */
313 	kcpuset_t *cpuset, *cpulst = NULL;
314 	struct cpu_info *ci = NULL;
315 	struct proc *p;
316 	struct lwp *t;
317 	CPU_INFO_ITERATOR cii;
318 	lwpid_t lid;
319 	u_int lcnt;
320 	int error;
321 
322 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
323 		return error;
324 
325 	/* Look for a CPU in the set */
326 	for (CPU_INFO_FOREACH(cii, ci)) {
327 		error = kcpuset_isset(cpu_index(ci), cpuset);
328 		if (error) {
329 			if (error == -1) {
330 				error = E2BIG;
331 				goto out;
332 			}
333 			break;
334 		}
335 	}
336 	if (ci == NULL) {
337 		/* Empty set */
338 		kcpuset_unuse(cpuset, NULL);
339 		cpuset = NULL;
340 	}
341 
342 	if (SCARG(uap, pid) != 0) {
343 		/* Find the process */
344 		mutex_enter(proc_lock);
345 		p = p_find(SCARG(uap, pid), PFIND_LOCKED);
346 		if (p == NULL) {
347 			mutex_exit(proc_lock);
348 			error = ESRCH;
349 			goto out;
350 		}
351 		mutex_enter(p->p_lock);
352 		mutex_exit(proc_lock);
353 		/* Disallow modification of system processes. */
354 		if ((p->p_flag & PK_SYSTEM) != 0) {
355 			mutex_exit(p->p_lock);
356 			error = EPERM;
357 			goto out;
358 		}
359 	} else {
360 		/* Use the calling process */
361 		p = l->l_proc;
362 		mutex_enter(p->p_lock);
363 	}
364 
365 	/*
366 	 * Check the permission.
367 	 */
368 	error = kauth_authorize_process(l->l_cred,
369 	    KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL);
370 	if (error != 0) {
371 		mutex_exit(p->p_lock);
372 		goto out;
373 	}
374 
375 #ifdef KERN_SA
376 	/*
377 	 * Don't permit changing the affinity of an SA process. The only
378 	 * thing that would make sense wold be to set the affinity of
379 	 * a VP and all threads running on it. But we don't support that
380 	 * now, so just don't permit it.
381 	 *
382 	 * Test is here so that caller gets auth errors before SA
383 	 * errors.
384 	 */
385 	if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) {
386 		mutex_exit(p->p_lock);
387 		error = EINVAL;
388 		goto out;
389 	}
390 #endif
391 
392 	/* Find the LWP(s) */
393 	lcnt = 0;
394 	lid = SCARG(uap, lid);
395 	LIST_FOREACH(t, &p->p_lwps, l_sibling) {
396 		if (lid && lid != t->l_lid)
397 			continue;
398 		lwp_lock(t);
399 		/* It is not allowed to set the affinity for zombie LWPs */
400 		if (t->l_stat == LSZOMB) {
401 			lwp_unlock(t);
402 			continue;
403 		}
404 		if (cpuset) {
405 			/* Set the affinity flag and new CPU set */
406 			t->l_flag |= LW_AFFINITY;
407 			kcpuset_use(cpuset);
408 			if (t->l_affinity != NULL)
409 				kcpuset_unuse(t->l_affinity, &cpulst);
410 			t->l_affinity = cpuset;
411 			/* Migrate to another CPU, unlocks LWP */
412 			lwp_migrate(t, ci);
413 		} else {
414 			/* Unset the affinity flag */
415 			t->l_flag &= ~LW_AFFINITY;
416 			if (t->l_affinity != NULL)
417 				kcpuset_unuse(t->l_affinity, &cpulst);
418 			t->l_affinity = NULL;
419 			lwp_unlock(t);
420 		}
421 		lcnt++;
422 	}
423 	mutex_exit(p->p_lock);
424 	if (lcnt == 0)
425 		error = ESRCH;
426 out:
427 	if (cpuset != NULL)
428 		kcpuset_unuse(cpuset, &cpulst);
429 	kcpuset_destroy(cpulst);
430 	return error;
431 }
432 
433 /*
434  * Get affinity.
435  */
436 int
437 sys__sched_getaffinity(struct lwp *l,
438     const struct sys__sched_getaffinity_args *uap, register_t *retval)
439 {
440 	/* {
441 		syscallarg(pid_t) pid;
442 		syscallarg(lwpid_t) lid;
443 		syscallarg(size_t) size;
444 		syscallarg(cpuset_t *) cpuset;
445 	} */
446 	struct lwp *t;
447 	kcpuset_t *cpuset;
448 	int error;
449 
450 	if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size))))
451 		return error;
452 
453 	/* Locks the LWP */
454 	t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid));
455 	if (t == NULL) {
456 		error = ESRCH;
457 		goto out;
458 	}
459 	/* Check the permission */
460 	if (kauth_authorize_process(l->l_cred,
461 	    KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) {
462 		mutex_exit(t->l_proc->p_lock);
463 		error = EPERM;
464 		goto out;
465 	}
466 	lwp_lock(t);
467 	if (t->l_flag & LW_AFFINITY) {
468 		KASSERT(t->l_affinity != NULL);
469 		kcpuset_copy(cpuset, t->l_affinity);
470 	} else
471 		kcpuset_zero(cpuset);
472 	lwp_unlock(t);
473 	mutex_exit(t->l_proc->p_lock);
474 
475 	error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size));
476 out:
477 	kcpuset_unuse(cpuset, NULL);
478 	return error;
479 }
480 
481 /*
482  * Yield.
483  */
484 int
485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval)
486 {
487 
488 	yield();
489 #ifdef KERN_SA
490 	if (l->l_flag & LW_SA) {
491 		sa_preempt(l);
492 	}
493 #endif
494 	return 0;
495 }
496 
497 /*
498  * Sysctl nodes and initialization.
499  */
500 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup")
501 {
502 	const struct sysctlnode *node = NULL;
503 
504 	sysctl_createv(clog, 0, NULL, NULL,
505 		CTLFLAG_PERMANENT,
506 		CTLTYPE_NODE, "kern", NULL,
507 		NULL, 0, NULL, 0,
508 		CTL_KERN, CTL_EOL);
509 	sysctl_createv(clog, 0, NULL, NULL,
510 		CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE,
511 		CTLTYPE_INT, "posix_sched",
512 		SYSCTL_DESCR("Version of IEEE Std 1003.1 and its "
513 			     "Process Scheduling option to which the "
514 			     "system attempts to conform"),
515 		NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0,
516 		CTL_KERN, CTL_CREATE, CTL_EOL);
517 	sysctl_createv(clog, 0, NULL, &node,
518 		CTLFLAG_PERMANENT,
519 		CTLTYPE_NODE, "sched",
520 		SYSCTL_DESCR("Scheduler options"),
521 		NULL, 0, NULL, 0,
522 		CTL_KERN, CTL_CREATE, CTL_EOL);
523 
524 	if (node == NULL)
525 		return;
526 
527 	sysctl_createv(clog, 0, &node, NULL,
528 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
529 		CTLTYPE_INT, "pri_min",
530 		SYSCTL_DESCR("Minimal POSIX real-time priority"),
531 		NULL, SCHED_PRI_MIN, NULL, 0,
532 		CTL_CREATE, CTL_EOL);
533 	sysctl_createv(clog, 0, &node, NULL,
534 		CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE,
535 		CTLTYPE_INT, "pri_max",
536 		SYSCTL_DESCR("Maximal POSIX real-time priority"),
537 		NULL, SCHED_PRI_MAX, NULL, 0,
538 		CTL_CREATE, CTL_EOL);
539 }
540