1 /* 2 * Copyright (c) 2005 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Sergey Glushchenko <deen@smz.com.ua> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 */ 35 36 #include <sys/cpumask.h> 37 #include <sys/errno.h> 38 #include <sys/globaldata.h> /* curthread */ 39 #include <sys/proc.h> 40 #include <sys/priv.h> 41 #include <sys/sysmsg.h> /* struct usched_set_args */ 42 #include <sys/systm.h> /* strcmp() */ 43 #include <sys/usched.h> 44 45 #include <machine/smp.h> 46 47 static TAILQ_HEAD(, usched) usched_list = TAILQ_HEAD_INITIALIZER(usched_list); 48 49 cpumask_t usched_mastermask = CPUMASK_INITIALIZER_ALLONES; 50 51 static int setaffinity_lp(struct lwp *lp, cpumask_t *mask); 52 53 /* 54 * Called from very low level boot code, sys/kern/init_main.c:mi_proc0init(). 55 * We cannot do anything fancy. no malloc's, no nothing other then 56 * static initialization. 57 */ 58 struct usched * 59 usched_init(void) 60 { 61 const char *defsched; 62 63 defsched = kgetenv("kern.user_scheduler"); 64 65 /* 66 * Add various userland schedulers to the system. 67 */ 68 usched_ctl(&usched_bsd4, USCH_ADD); 69 usched_ctl(&usched_dfly, USCH_ADD); 70 usched_ctl(&usched_dummy, USCH_ADD); 71 if (defsched == NULL ) 72 return(&usched_dfly); 73 if (strcmp(defsched, "bsd4") == 0) 74 return(&usched_bsd4); 75 if (strcmp(defsched, "dfly") == 0) 76 return(&usched_dfly); 77 kprintf("WARNING: Running dummy userland scheduler\n"); 78 return(&usched_dummy); 79 } 80 81 /* 82 * USCHED_CTL 83 * 84 * SYNOPSIS: 85 * Add/remove usched to/from list. 86 * 87 * ARGUMENTS: 88 * usched - pointer to target scheduler 89 * action - addition or removal ? 90 * 91 * RETURN VALUES: 92 * 0 - success 93 * EINVAL - error 94 */ 95 int 96 usched_ctl(struct usched *usched, int action) 97 { 98 struct usched *item; /* temporaly for TAILQ processing */ 99 int error = 0; 100 101 switch(action) { 102 case USCH_ADD: 103 /* 104 * Make sure it isn't already on the list 105 */ 106 #ifdef INVARIANTS 107 TAILQ_FOREACH(item, &usched_list, entry) { 108 KKASSERT(item != usched); 109 } 110 #endif 111 /* 112 * Optional callback to the scheduler before we officially 113 * add it to the list. 114 */ 115 if (usched->usched_register) 116 usched->usched_register(); 117 TAILQ_INSERT_TAIL(&usched_list, usched, entry); 118 break; 119 case USCH_REM: 120 /* 121 * Do not allow the default scheduler to be removed 122 */ 123 if (strcmp(usched->name, "bsd4") == 0) { 124 error = EINVAL; 125 break; 126 } 127 TAILQ_FOREACH(item, &usched_list, entry) { 128 if (item == usched) 129 break; 130 } 131 if (item) { 132 if (item->usched_unregister) 133 item->usched_unregister(); 134 TAILQ_REMOVE(&usched_list, item, entry); 135 } else { 136 error = EINVAL; 137 } 138 break; 139 default: 140 error = EINVAL; 141 break; 142 } 143 return (error); 144 } 145 146 /* 147 * Called from the scheduler clock on each cpu independently at the 148 * common scheduling rate. If th scheduler clock interrupted a running 149 * lwp the lp will be non-NULL. 150 */ 151 void 152 usched_schedulerclock(struct lwp *lp, sysclock_t periodic, sysclock_t time) 153 { 154 struct usched *item; 155 156 TAILQ_FOREACH(item, &usched_list, entry) { 157 if (lp && lp->lwp_proc->p_usched == item) 158 item->schedulerclock(lp, periodic, time); 159 else 160 item->schedulerclock(NULL, periodic, time); 161 } 162 } 163 164 /* 165 * USCHED_SET(syscall) 166 * 167 * SYNOPSIS: 168 * Setting up a proc's usched. 169 * 170 * ARGUMENTS: 171 * pid - 172 * cmd - 173 * data - 174 * bytes - 175 * RETURN VALUES: 176 * 0 - success 177 * EFBIG - error (invalid cpu#) 178 * EPERM - error (failed to delete cpu#) 179 * EINVAL - error (other reasons) 180 * 181 * MPALMOSTSAFE 182 */ 183 int 184 sys_usched_set(struct sysmsg *sysmsg, const struct usched_set_args *uap) 185 { 186 struct proc *p = curthread->td_proc; 187 struct usched *item; /* temporaly for TAILQ processing */ 188 int error; 189 char buffer[NAME_LENGTH]; 190 cpumask_t mask; 191 struct lwp *lp; 192 int cpuid; 193 194 if (uap->pid != 0 && uap->pid != curthread->td_proc->p_pid) 195 return (EINVAL); 196 197 lp = curthread->td_lwp; 198 lwkt_gettoken(&lp->lwp_token); 199 200 switch (uap->cmd) { 201 case USCHED_SET_SCHEDULER: 202 if ((error = priv_check(curthread, PRIV_SCHED_SET)) != 0) 203 break; 204 error = copyinstr(uap->data, buffer, sizeof(buffer), NULL); 205 if (error) 206 break; 207 TAILQ_FOREACH(item, &usched_list, entry) { 208 if ((strcmp(item->name, buffer) == 0)) 209 break; 210 } 211 212 /* 213 * If the scheduler for a process is being changed, disassociate 214 * the old scheduler before switching to the new one. 215 * 216 * XXX we might have to add an additional ABI call to do a 'full 217 * disassociation' and another ABI call to do a 'full 218 * reassociation' 219 */ 220 /* XXX lwp have to deal with multiple lwps here */ 221 if (p->p_nthreads != 1) { 222 error = EINVAL; 223 break; 224 } 225 if (item && item != p->p_usched) { 226 /* XXX lwp */ 227 p->p_usched->release_curproc(ONLY_LWP_IN_PROC(p)); 228 p->p_usched->heuristic_exiting(ONLY_LWP_IN_PROC(p), p); 229 p->p_usched = item; 230 } else if (item == NULL) { 231 error = EINVAL; 232 } 233 break; 234 case USCHED_SET_CPU: 235 if ((error = priv_check(curthread, PRIV_SCHED_CPUSET)) != 0) 236 break; 237 if (uap->bytes != sizeof(int)) { 238 error = EINVAL; 239 break; 240 } 241 error = copyin(uap->data, &cpuid, sizeof(int)); 242 if (error) 243 break; 244 if (cpuid < 0 || cpuid >= ncpus) { 245 error = EFBIG; 246 break; 247 } 248 if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) { 249 error = EINVAL; 250 break; 251 } 252 CPUMASK_ASSBIT(lp->lwp_cpumask, cpuid); 253 if (cpuid != mycpu->gd_cpuid) { 254 lwkt_migratecpu(cpuid); 255 p->p_usched->changedcpu(lp); 256 } 257 break; 258 case USCHED_GET_CPU: 259 /* USCHED_GET_CPU doesn't require special privileges. */ 260 if (uap->bytes != sizeof(int)) { 261 error = EINVAL; 262 break; 263 } 264 error = copyout(&(mycpu->gd_cpuid), uap->data, sizeof(int)); 265 break; 266 case USCHED_GET_CPUMASK: 267 /* USCHED_GET_CPUMASK doesn't require special privileges. */ 268 if (uap->bytes != sizeof(cpumask_t)) { 269 error = EINVAL; 270 break; 271 } 272 mask = lp->lwp_cpumask; 273 CPUMASK_ANDMASK(mask, smp_active_mask); 274 error = copyout(&mask, uap->data, sizeof(cpumask_t)); 275 break; 276 case USCHED_ADD_CPU: 277 if ((error = priv_check(curthread, PRIV_SCHED_CPUSET)) != 0) 278 break; 279 if (uap->bytes != sizeof(int)) { 280 error = EINVAL; 281 break; 282 } 283 error = copyin(uap->data, &cpuid, sizeof(int)); 284 if (error) 285 break; 286 if (cpuid < 0 || cpuid >= ncpus) { 287 error = EFBIG; 288 break; 289 } 290 if (CPUMASK_TESTBIT(smp_active_mask, cpuid) == 0) { 291 error = EINVAL; 292 break; 293 } 294 CPUMASK_ORBIT(lp->lwp_cpumask, cpuid); 295 break; 296 case USCHED_DEL_CPU: 297 /* USCHED_DEL_CPU doesn't require special privileges. */ 298 if (uap->bytes != sizeof(int)) { 299 error = EINVAL; 300 break; 301 } 302 error = copyin(uap->data, &cpuid, sizeof(int)); 303 if (error) 304 break; 305 if (cpuid < 0 || cpuid >= ncpus) { 306 error = EFBIG; 307 break; 308 } 309 lp = curthread->td_lwp; 310 mask = lp->lwp_cpumask; 311 CPUMASK_ANDMASK(mask, smp_active_mask); 312 CPUMASK_NANDBIT(mask, cpuid); 313 if (CPUMASK_TESTZERO(mask)) { 314 error = EPERM; 315 } else { 316 CPUMASK_NANDBIT(lp->lwp_cpumask, cpuid); 317 if (CPUMASK_TESTMASK(lp->lwp_cpumask, 318 mycpu->gd_cpumask) == 0) { 319 mask = lp->lwp_cpumask; 320 CPUMASK_ANDMASK(mask, smp_active_mask); 321 cpuid = BSFCPUMASK(mask); 322 lwkt_migratecpu(cpuid); 323 p->p_usched->changedcpu(lp); 324 } 325 } 326 break; 327 case USCHED_SET_CPUMASK: 328 if ((error = priv_check(curthread, PRIV_SCHED_CPUSET)) != 0) 329 break; 330 if (uap->bytes != sizeof(mask)) { 331 error = EINVAL; 332 break; 333 } 334 error = copyin(uap->data, &mask, sizeof(mask)); 335 if (error) 336 break; 337 338 CPUMASK_ANDMASK(mask, smp_active_mask); 339 if (CPUMASK_TESTZERO(mask)) { 340 error = EPERM; 341 break; 342 } 343 /* Commit the new cpumask. */ 344 lp->lwp_cpumask = mask; 345 346 /* Migrate if necessary. */ 347 if (CPUMASK_TESTMASK(lp->lwp_cpumask, mycpu->gd_cpumask) == 0) { 348 cpuid = BSFCPUMASK(lp->lwp_cpumask); 349 lwkt_migratecpu(cpuid); 350 p->p_usched->changedcpu(lp); 351 } 352 break; 353 default: 354 error = EINVAL; 355 break; 356 } 357 lwkt_reltoken(&lp->lwp_token); 358 359 return (error); 360 } 361 362 int 363 sys_lwp_getaffinity(struct sysmsg *sysmsg, 364 const struct lwp_getaffinity_args *uap) 365 { 366 struct proc *p; 367 cpumask_t mask; 368 struct lwp *lp; 369 int error = 0; 370 371 if (uap->pid < 0) 372 return (EINVAL); 373 374 if (uap->pid == 0) { 375 p = curproc; 376 PHOLD(p); 377 } else { 378 p = pfind(uap->pid); /* pfind() holds (p) */ 379 if (p == NULL) 380 return (ESRCH); 381 } 382 lwkt_gettoken(&p->p_token); 383 384 if (uap->tid < 0) { 385 lp = RB_FIRST(lwp_rb_tree, &p->p_lwp_tree); 386 } else { 387 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 388 } 389 if (lp == NULL) { 390 error = ESRCH; 391 } else { 392 /* Take a snapshot for copyout, which may block. */ 393 LWPHOLD(lp); 394 lwkt_gettoken(&lp->lwp_token); 395 mask = lp->lwp_cpumask; 396 CPUMASK_ANDMASK(mask, smp_active_mask); 397 lwkt_reltoken(&lp->lwp_token); 398 LWPRELE(lp); 399 } 400 401 lwkt_reltoken(&p->p_token); 402 PRELE(p); 403 404 if (error == 0) 405 error = copyout(&mask, uap->mask, sizeof(cpumask_t)); 406 407 return (error); 408 } 409 410 int 411 sys_lwp_setaffinity(struct sysmsg *sysmsg, 412 const struct lwp_setaffinity_args *uap) 413 { 414 struct proc *p; 415 cpumask_t mask; 416 struct lwp *lp; 417 int error; 418 419 /* 420 * NOTE: 421 * Always allow change self CPU affinity. 422 */ 423 if ((error = priv_check(curthread, PRIV_SCHED_CPUSET)) != 0 && 424 uap->pid != 0) 425 return (error); 426 427 error = copyin(uap->mask, &mask, sizeof(mask)); 428 if (error) 429 return (error); 430 431 CPUMASK_ANDMASK(mask, smp_active_mask); 432 if (CPUMASK_TESTZERO(mask)) 433 return (EPERM); 434 if (uap->pid < 0) 435 return (EINVAL); 436 437 /* 438 * Locate the process 439 */ 440 if (uap->pid == 0) { 441 p = curproc; 442 PHOLD(p); 443 } else { 444 p = pfind(uap->pid); /* pfind() holds (p) */ 445 if (p == NULL) 446 return (ESRCH); 447 } 448 lwkt_gettoken(&p->p_token); 449 450 if (uap->tid < 0) { 451 FOREACH_LWP_IN_PROC(lp, p) { 452 error = setaffinity_lp(lp, &mask); 453 } 454 /* not an error if no LPs left in process */ 455 } else { 456 lp = lwp_rb_tree_RB_LOOKUP(&p->p_lwp_tree, uap->tid); 457 error = setaffinity_lp(lp, &mask); 458 } 459 lwkt_reltoken(&p->p_token); 460 PRELE(p); 461 462 return (error); 463 } 464 465 static int 466 setaffinity_lp(struct lwp *lp, cpumask_t *mask) 467 { 468 if (lp == NULL) 469 return ESRCH; 470 471 LWPHOLD(lp); 472 lwkt_gettoken(&lp->lwp_token); 473 lp->lwp_cpumask = *mask; 474 475 /* 476 * NOTE: When adjusting a thread that is not our own the migration 477 * will occur at the next reschedule. 478 */ 479 if (lp == curthread->td_lwp) { 480 /* 481 * Self migration can be done immediately, 482 * if necessary. 483 */ 484 if (CPUMASK_TESTBIT(lp->lwp_cpumask, 485 mycpu->gd_cpuid) == 0) { 486 lwkt_migratecpu(BSFCPUMASK(lp->lwp_cpumask)); 487 lp->lwp_proc->p_usched->changedcpu(lp); 488 } 489 } 490 lwkt_reltoken(&lp->lwp_token); 491 LWPRELE(lp); 492 493 return 0; 494 } 495