1 /* $NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $ */ 2 3 /* 4 * Copyright (c) 2008, Mindaugas Rasiukevicius <rmind at NetBSD org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 /* 30 * System calls relating to the scheduler. 31 * 32 * TODO: 33 * - Handle pthread_setschedprio() as defined by POSIX; 34 * - Handle sched_yield() case for SCHED_FIFO as defined by POSIX; 35 */ 36 37 #include <sys/cdefs.h> 38 __KERNEL_RCSID(0, "$NetBSD: sys_sched.c,v 1.30 2008/10/18 19:24:04 rmind Exp $"); 39 40 #include <sys/param.h> 41 42 #include <sys/cpu.h> 43 #include <sys/kauth.h> 44 #include <sys/kmem.h> 45 #include <sys/lwp.h> 46 #include <sys/mutex.h> 47 #include <sys/proc.h> 48 #include <sys/pset.h> 49 #include <sys/sa.h> 50 #include <sys/savar.h> 51 #include <sys/sched.h> 52 #include <sys/syscallargs.h> 53 #include <sys/sysctl.h> 54 #include <sys/systm.h> 55 #include <sys/types.h> 56 #include <sys/unistd.h> 57 58 #include "opt_sa.h" 59 60 /* 61 * Convert user priority or the in-kernel priority or convert the current 62 * priority to the appropriate range according to the policy change. 63 */ 64 static pri_t 65 convert_pri(lwp_t *l, int policy, pri_t pri) 66 { 67 68 /* Convert user priority to the in-kernel */ 69 if (pri != PRI_NONE) { 70 /* Only for real-time threads */ 71 KASSERT(pri >= SCHED_PRI_MIN && pri <= SCHED_PRI_MAX); 72 KASSERT(policy != SCHED_OTHER); 73 return PRI_USER_RT + pri; 74 } 75 76 /* Neither policy, nor priority change */ 77 if (l->l_class == policy) 78 return l->l_priority; 79 80 /* Time-sharing -> real-time */ 81 if (l->l_class == SCHED_OTHER) { 82 KASSERT(policy == SCHED_FIFO || policy == SCHED_RR); 83 return PRI_USER_RT; 84 } 85 86 /* Real-time -> time-sharing */ 87 if (policy == SCHED_OTHER) { 88 KASSERT(l->l_class == SCHED_FIFO || l->l_class == SCHED_RR); 89 return l->l_priority - PRI_USER_RT; 90 } 91 92 /* Real-time -> real-time */ 93 return l->l_priority; 94 } 95 96 int 97 do_sched_setparam(pid_t pid, lwpid_t lid, int policy, 98 const struct sched_param *params) 99 { 100 struct proc *p; 101 struct lwp *t; 102 pri_t pri; 103 u_int lcnt; 104 int error; 105 106 error = 0; 107 108 pri = params->sched_priority; 109 110 /* If no parameters specified, just return (this should not happen) */ 111 if (pri == PRI_NONE && policy == SCHED_NONE) 112 return 0; 113 114 /* Validate scheduling class */ 115 if (policy != SCHED_NONE && (policy < SCHED_OTHER || policy > SCHED_RR)) 116 return EINVAL; 117 118 /* Validate priority */ 119 if (pri != PRI_NONE && (pri < SCHED_PRI_MIN || pri > SCHED_PRI_MAX)) 120 return EINVAL; 121 122 if (pid != 0) { 123 /* Find the process */ 124 mutex_enter(proc_lock); 125 p = p_find(pid, PFIND_LOCKED); 126 if (p == NULL) { 127 mutex_exit(proc_lock); 128 return ESRCH; 129 } 130 mutex_enter(p->p_lock); 131 mutex_exit(proc_lock); 132 /* Disallow modification of system processes */ 133 if ((p->p_flag & PK_SYSTEM) != 0) { 134 mutex_exit(p->p_lock); 135 return EPERM; 136 } 137 } else { 138 /* Use the calling process */ 139 p = curlwp->l_proc; 140 mutex_enter(p->p_lock); 141 } 142 143 /* Find the LWP(s) */ 144 lcnt = 0; 145 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 146 pri_t kpri; 147 int lpolicy; 148 149 if (lid && lid != t->l_lid) 150 continue; 151 152 lcnt++; 153 lwp_lock(t); 154 lpolicy = (policy == SCHED_NONE) ? t->l_class : policy; 155 156 /* Disallow setting of priority for SCHED_OTHER threads */ 157 if (lpolicy == SCHED_OTHER && pri != PRI_NONE) { 158 lwp_unlock(t); 159 error = EINVAL; 160 break; 161 } 162 163 /* Convert priority, if needed */ 164 kpri = convert_pri(t, lpolicy, pri); 165 166 /* Check the permission */ 167 error = kauth_authorize_process(kauth_cred_get(), 168 KAUTH_PROCESS_SCHEDULER_SETPARAM, p, t, KAUTH_ARG(lpolicy), 169 KAUTH_ARG(kpri)); 170 if (error) { 171 lwp_unlock(t); 172 break; 173 } 174 175 /* Set the scheduling class, change the priority */ 176 t->l_class = lpolicy; 177 lwp_changepri(t, kpri); 178 lwp_unlock(t); 179 } 180 mutex_exit(p->p_lock); 181 return (lcnt == 0) ? ESRCH : error; 182 } 183 184 /* 185 * Set scheduling parameters. 186 */ 187 int 188 sys__sched_setparam(struct lwp *l, const struct sys__sched_setparam_args *uap, 189 register_t *retval) 190 { 191 /* { 192 syscallarg(pid_t) pid; 193 syscallarg(lwpid_t) lid; 194 syscallarg(int) policy; 195 syscallarg(const struct sched_param *) params; 196 } */ 197 struct sched_param params; 198 int error; 199 200 /* Get the parameters from the user-space */ 201 error = copyin(SCARG(uap, params), ¶ms, sizeof(params)); 202 if (error) 203 goto out; 204 205 error = do_sched_setparam(SCARG(uap, pid), SCARG(uap, lid), 206 SCARG(uap, policy), ¶ms); 207 208 out: 209 return (error); 210 } 211 212 int 213 do_sched_getparam(pid_t pid, lwpid_t lid, int *policy, 214 struct sched_param *params) 215 { 216 struct sched_param lparams; 217 struct lwp *t; 218 int error, lpolicy; 219 220 /* Locks the LWP */ 221 t = lwp_find2(pid, lid); 222 if (t == NULL) 223 return ESRCH; 224 225 /* Check the permission */ 226 error = kauth_authorize_process(kauth_cred_get(), 227 KAUTH_PROCESS_SCHEDULER_GETPARAM, t->l_proc, NULL, NULL, NULL); 228 if (error != 0) { 229 mutex_exit(t->l_proc->p_lock); 230 return error; 231 } 232 233 lwp_lock(t); 234 lparams.sched_priority = t->l_priority; 235 lpolicy = t->l_class; 236 237 switch (lpolicy) { 238 case SCHED_OTHER: 239 lparams.sched_priority -= PRI_USER; 240 break; 241 case SCHED_RR: 242 case SCHED_FIFO: 243 lparams.sched_priority -= PRI_USER_RT; 244 break; 245 } 246 247 if (policy != NULL) 248 *policy = lpolicy; 249 250 if (params != NULL) 251 *params = lparams; 252 253 lwp_unlock(t); 254 mutex_exit(t->l_proc->p_lock); 255 return error; 256 } 257 258 /* 259 * Get scheduling parameters. 260 */ 261 int 262 sys__sched_getparam(struct lwp *l, const struct sys__sched_getparam_args *uap, 263 register_t *retval) 264 { 265 /* { 266 syscallarg(pid_t) pid; 267 syscallarg(lwpid_t) lid; 268 syscallarg(int *) policy; 269 syscallarg(struct sched_param *) params; 270 } */ 271 struct sched_param params; 272 int error, policy; 273 274 error = do_sched_getparam(SCARG(uap, pid), SCARG(uap, lid), &policy, 275 ¶ms); 276 if (error) 277 goto out; 278 279 error = copyout(¶ms, SCARG(uap, params), sizeof(params)); 280 if (error == 0 && SCARG(uap, policy) != NULL) 281 error = copyout(&policy, SCARG(uap, policy), sizeof(int)); 282 283 out: 284 return (error); 285 } 286 287 /* Allocate the CPU set, and get it from userspace */ 288 static int 289 genkcpuset(kcpuset_t **dset, const cpuset_t *sset, size_t size) 290 { 291 int error; 292 293 *dset = kcpuset_create(); 294 error = kcpuset_copyin(sset, *dset, size); 295 if (error != 0) 296 kcpuset_unuse(*dset, NULL); 297 return error; 298 } 299 300 /* 301 * Set affinity. 302 */ 303 int 304 sys__sched_setaffinity(struct lwp *l, 305 const struct sys__sched_setaffinity_args *uap, register_t *retval) 306 { 307 /* { 308 syscallarg(pid_t) pid; 309 syscallarg(lwpid_t) lid; 310 syscallarg(size_t) size; 311 syscallarg(const cpuset_t *) cpuset; 312 } */ 313 kcpuset_t *cpuset, *cpulst = NULL; 314 struct cpu_info *ci = NULL; 315 struct proc *p; 316 struct lwp *t; 317 CPU_INFO_ITERATOR cii; 318 lwpid_t lid; 319 u_int lcnt; 320 int error; 321 322 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)))) 323 return error; 324 325 /* Look for a CPU in the set */ 326 for (CPU_INFO_FOREACH(cii, ci)) { 327 error = kcpuset_isset(cpu_index(ci), cpuset); 328 if (error) { 329 if (error == -1) { 330 error = E2BIG; 331 goto out; 332 } 333 break; 334 } 335 } 336 if (ci == NULL) { 337 /* Empty set */ 338 kcpuset_unuse(cpuset, NULL); 339 cpuset = NULL; 340 } 341 342 if (SCARG(uap, pid) != 0) { 343 /* Find the process */ 344 mutex_enter(proc_lock); 345 p = p_find(SCARG(uap, pid), PFIND_LOCKED); 346 if (p == NULL) { 347 mutex_exit(proc_lock); 348 error = ESRCH; 349 goto out; 350 } 351 mutex_enter(p->p_lock); 352 mutex_exit(proc_lock); 353 /* Disallow modification of system processes. */ 354 if ((p->p_flag & PK_SYSTEM) != 0) { 355 mutex_exit(p->p_lock); 356 error = EPERM; 357 goto out; 358 } 359 } else { 360 /* Use the calling process */ 361 p = l->l_proc; 362 mutex_enter(p->p_lock); 363 } 364 365 /* 366 * Check the permission. 367 */ 368 error = kauth_authorize_process(l->l_cred, 369 KAUTH_PROCESS_SCHEDULER_SETAFFINITY, p, NULL, NULL, NULL); 370 if (error != 0) { 371 mutex_exit(p->p_lock); 372 goto out; 373 } 374 375 #ifdef KERN_SA 376 /* 377 * Don't permit changing the affinity of an SA process. The only 378 * thing that would make sense wold be to set the affinity of 379 * a VP and all threads running on it. But we don't support that 380 * now, so just don't permit it. 381 * 382 * Test is here so that caller gets auth errors before SA 383 * errors. 384 */ 385 if ((p->p_sflag & (PS_SA | PS_WEXIT)) != 0 || p->p_sa != NULL) { 386 mutex_exit(p->p_lock); 387 error = EINVAL; 388 goto out; 389 } 390 #endif 391 392 /* Find the LWP(s) */ 393 lcnt = 0; 394 lid = SCARG(uap, lid); 395 LIST_FOREACH(t, &p->p_lwps, l_sibling) { 396 if (lid && lid != t->l_lid) 397 continue; 398 lwp_lock(t); 399 /* It is not allowed to set the affinity for zombie LWPs */ 400 if (t->l_stat == LSZOMB) { 401 lwp_unlock(t); 402 continue; 403 } 404 if (cpuset) { 405 /* Set the affinity flag and new CPU set */ 406 t->l_flag |= LW_AFFINITY; 407 kcpuset_use(cpuset); 408 if (t->l_affinity != NULL) 409 kcpuset_unuse(t->l_affinity, &cpulst); 410 t->l_affinity = cpuset; 411 /* Migrate to another CPU, unlocks LWP */ 412 lwp_migrate(t, ci); 413 } else { 414 /* Unset the affinity flag */ 415 t->l_flag &= ~LW_AFFINITY; 416 if (t->l_affinity != NULL) 417 kcpuset_unuse(t->l_affinity, &cpulst); 418 t->l_affinity = NULL; 419 lwp_unlock(t); 420 } 421 lcnt++; 422 } 423 mutex_exit(p->p_lock); 424 if (lcnt == 0) 425 error = ESRCH; 426 out: 427 if (cpuset != NULL) 428 kcpuset_unuse(cpuset, &cpulst); 429 kcpuset_destroy(cpulst); 430 return error; 431 } 432 433 /* 434 * Get affinity. 435 */ 436 int 437 sys__sched_getaffinity(struct lwp *l, 438 const struct sys__sched_getaffinity_args *uap, register_t *retval) 439 { 440 /* { 441 syscallarg(pid_t) pid; 442 syscallarg(lwpid_t) lid; 443 syscallarg(size_t) size; 444 syscallarg(cpuset_t *) cpuset; 445 } */ 446 struct lwp *t; 447 kcpuset_t *cpuset; 448 int error; 449 450 if ((error = genkcpuset(&cpuset, SCARG(uap, cpuset), SCARG(uap, size)))) 451 return error; 452 453 /* Locks the LWP */ 454 t = lwp_find2(SCARG(uap, pid), SCARG(uap, lid)); 455 if (t == NULL) { 456 error = ESRCH; 457 goto out; 458 } 459 /* Check the permission */ 460 if (kauth_authorize_process(l->l_cred, 461 KAUTH_PROCESS_SCHEDULER_GETAFFINITY, t->l_proc, NULL, NULL, NULL)) { 462 mutex_exit(t->l_proc->p_lock); 463 error = EPERM; 464 goto out; 465 } 466 lwp_lock(t); 467 if (t->l_flag & LW_AFFINITY) { 468 KASSERT(t->l_affinity != NULL); 469 kcpuset_copy(cpuset, t->l_affinity); 470 } else 471 kcpuset_zero(cpuset); 472 lwp_unlock(t); 473 mutex_exit(t->l_proc->p_lock); 474 475 error = kcpuset_copyout(cpuset, SCARG(uap, cpuset), SCARG(uap, size)); 476 out: 477 kcpuset_unuse(cpuset, NULL); 478 return error; 479 } 480 481 /* 482 * Yield. 483 */ 484 int 485 sys_sched_yield(struct lwp *l, const void *v, register_t *retval) 486 { 487 488 yield(); 489 #ifdef KERN_SA 490 if (l->l_flag & LW_SA) { 491 sa_preempt(l); 492 } 493 #endif 494 return 0; 495 } 496 497 /* 498 * Sysctl nodes and initialization. 499 */ 500 SYSCTL_SETUP(sysctl_sched_setup, "sysctl sched setup") 501 { 502 const struct sysctlnode *node = NULL; 503 504 sysctl_createv(clog, 0, NULL, NULL, 505 CTLFLAG_PERMANENT, 506 CTLTYPE_NODE, "kern", NULL, 507 NULL, 0, NULL, 0, 508 CTL_KERN, CTL_EOL); 509 sysctl_createv(clog, 0, NULL, NULL, 510 CTLFLAG_PERMANENT|CTLFLAG_IMMEDIATE, 511 CTLTYPE_INT, "posix_sched", 512 SYSCTL_DESCR("Version of IEEE Std 1003.1 and its " 513 "Process Scheduling option to which the " 514 "system attempts to conform"), 515 NULL, _POSIX_PRIORITY_SCHEDULING, NULL, 0, 516 CTL_KERN, CTL_CREATE, CTL_EOL); 517 sysctl_createv(clog, 0, NULL, &node, 518 CTLFLAG_PERMANENT, 519 CTLTYPE_NODE, "sched", 520 SYSCTL_DESCR("Scheduler options"), 521 NULL, 0, NULL, 0, 522 CTL_KERN, CTL_CREATE, CTL_EOL); 523 524 if (node == NULL) 525 return; 526 527 sysctl_createv(clog, 0, &node, NULL, 528 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 529 CTLTYPE_INT, "pri_min", 530 SYSCTL_DESCR("Minimal POSIX real-time priority"), 531 NULL, SCHED_PRI_MIN, NULL, 0, 532 CTL_CREATE, CTL_EOL); 533 sysctl_createv(clog, 0, &node, NULL, 534 CTLFLAG_PERMANENT | CTLFLAG_IMMEDIATE, 535 CTLTYPE_INT, "pri_max", 536 SYSCTL_DESCR("Maximal POSIX real-time priority"), 537 NULL, SCHED_PRI_MAX, NULL, 0, 538 CTL_CREATE, CTL_EOL); 539 } 540