1 /* $OpenBSD: kern_sched.c,v 1.100 2024/07/09 08:44:36 claudio Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 20 #include <sys/sched.h> 21 #include <sys/proc.h> 22 #include <sys/kthread.h> 23 #include <sys/systm.h> 24 #include <sys/clockintr.h> 25 #include <sys/resourcevar.h> 26 #include <sys/task.h> 27 #include <sys/time.h> 28 #include <sys/smr.h> 29 #include <sys/tracepoint.h> 30 31 #include <uvm/uvm_extern.h> 32 33 void sched_kthreads_create(void *); 34 35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); 36 struct proc *sched_steal_proc(struct cpu_info *); 37 38 /* 39 * To help choosing which cpu should run which process we keep track 40 * of cpus which are currently idle and which cpus have processes 41 * queued. 42 */ 43 struct cpuset sched_idle_cpus; 44 struct cpuset sched_queued_cpus; 45 struct cpuset sched_all_cpus; 46 47 /* 48 * Some general scheduler counters. 49 */ 50 uint64_t sched_nmigrations; /* Cpu migration counter */ 51 uint64_t sched_nomigrations; /* Cpu no migration counter */ 52 uint64_t sched_noidle; /* Times we didn't pick the idle task */ 53 uint64_t sched_stolen; /* Times we stole proc from other cpus */ 54 uint64_t sched_choose; /* Times we chose a cpu */ 55 uint64_t sched_wasidle; /* Times we came out of idle */ 56 57 int sched_smt; 58 59 /* 60 * A few notes about cpu_switchto that is implemented in MD code. 61 * 62 * cpu_switchto takes two arguments, the old proc and the proc 63 * it should switch to. The new proc will never be NULL, so we always have 64 * a saved state that we need to switch to. The old proc however can 65 * be NULL if the process is exiting. NULL for the old proc simply 66 * means "don't bother saving old state". 67 * 68 * cpu_switchto is supposed to atomically load the new state of the process 69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the 70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other 71 * cpus in the system must not depend on this state being consistent. 72 * Therefore no locking is necessary in cpu_switchto other than blocking 73 * interrupts during the context switch. 74 */ 75 76 /* 77 * sched_init_cpu is called from main() for the boot cpu, then it's the 78 * responsibility of the MD code to call it for all other cpus. 79 */ 80 void 81 sched_init_cpu(struct cpu_info *ci) 82 { 83 struct schedstate_percpu *spc = &ci->ci_schedstate; 84 int i; 85 86 for (i = 0; i < SCHED_NQS; i++) 87 TAILQ_INIT(&spc->spc_qs[i]); 88 89 spc->spc_idleproc = NULL; 90 91 clockintr_bind(&spc->spc_itimer, ci, itimer_update, NULL); 92 clockintr_bind(&spc->spc_profclock, ci, profclock, NULL); 93 clockintr_bind(&spc->spc_roundrobin, ci, roundrobin, NULL); 94 clockintr_bind(&spc->spc_statclock, ci, statclock, NULL); 95 96 kthread_create_deferred(sched_kthreads_create, ci); 97 98 LIST_INIT(&spc->spc_deadproc); 99 SIMPLEQ_INIT(&spc->spc_deferred); 100 101 /* 102 * Slight hack here until the cpuset code handles cpu_info 103 * structures. 104 */ 105 cpuset_init_cpu(ci); 106 107 #ifdef __HAVE_CPU_TOPOLOGY 108 if (!sched_smt && ci->ci_smt_id > 0) 109 return; 110 #endif 111 cpuset_add(&sched_all_cpus, ci); 112 } 113 114 void 115 sched_kthreads_create(void *v) 116 { 117 struct cpu_info *ci = v; 118 struct schedstate_percpu *spc = &ci->ci_schedstate; 119 static int num; 120 121 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| 122 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL, 123 &spc->spc_idleproc)) 124 panic("fork idle"); 125 126 /* Name it as specified. */ 127 snprintf(spc->spc_idleproc->p_p->ps_comm, 128 sizeof(spc->spc_idleproc->p_p->ps_comm), 129 "idle%d", num); 130 131 num++; 132 } 133 134 void 135 sched_idle(void *v) 136 { 137 struct schedstate_percpu *spc; 138 struct proc *p = curproc; 139 struct cpu_info *ci = v; 140 141 KERNEL_UNLOCK(); 142 143 spc = &ci->ci_schedstate; 144 145 /* 146 * First time we enter here, we're not supposed to idle, 147 * just go away for a while. 148 */ 149 SCHED_LOCK(); 150 cpuset_add(&sched_idle_cpus, ci); 151 p->p_stat = SSLEEP; 152 p->p_cpu = ci; 153 atomic_setbits_int(&p->p_flag, P_CPUPEG); 154 mi_switch(); 155 cpuset_del(&sched_idle_cpus, ci); 156 SCHED_UNLOCK(); 157 158 KASSERT(ci == curcpu()); 159 KASSERT(curproc == spc->spc_idleproc); 160 161 while (1) { 162 while (!cpu_is_idle(curcpu())) { 163 struct proc *dead; 164 165 SCHED_LOCK(); 166 p->p_stat = SSLEEP; 167 mi_switch(); 168 SCHED_UNLOCK(); 169 170 while ((dead = LIST_FIRST(&spc->spc_deadproc))) { 171 LIST_REMOVE(dead, p_hash); 172 exit2(dead); 173 } 174 } 175 176 splassert(IPL_NONE); 177 178 smr_idle(); 179 180 cpuset_add(&sched_idle_cpus, ci); 181 cpu_idle_enter(); 182 while (spc->spc_whichqs == 0) { 183 #ifdef MULTIPROCESSOR 184 if (spc->spc_schedflags & SPCF_SHOULDHALT && 185 (spc->spc_schedflags & SPCF_HALTED) == 0) { 186 cpuset_del(&sched_idle_cpus, ci); 187 SCHED_LOCK(); 188 atomic_setbits_int(&spc->spc_schedflags, 189 spc->spc_whichqs ? 0 : SPCF_HALTED); 190 SCHED_UNLOCK(); 191 wakeup(spc); 192 } 193 #endif 194 cpu_idle_cycle(); 195 } 196 cpu_idle_leave(); 197 cpuset_del(&sched_idle_cpus, ci); 198 } 199 } 200 201 /* 202 * To free our address space we have to jump through a few hoops. 203 * The freeing is done by the reaper, but until we have one reaper 204 * per cpu, we have no way of putting this proc on the deadproc list 205 * and waking up the reaper without risking having our address space and 206 * stack torn from under us before we manage to switch to another proc. 207 * Therefore we have a per-cpu list of dead processes where we put this 208 * proc and have idle clean up that list and move it to the reaper list. 209 * All this will be unnecessary once we can bind the reaper this cpu 210 * and not risk having it switch to another in case it sleeps. 211 */ 212 void 213 sched_exit(struct proc *p) 214 { 215 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 216 struct timespec ts; 217 218 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); 219 220 /* update the tu_runtime one last time */ 221 nanouptime(&ts); 222 if (timespeccmp(&ts, &spc->spc_runtime, <)) 223 timespecclear(&ts); 224 else 225 timespecsub(&ts, &spc->spc_runtime, &ts); 226 227 /* add the time counts for this thread */ 228 tu_enter(&p->p_tu); 229 timespecadd(&p->p_tu.tu_runtime, &ts, &p->p_tu.tu_runtime); 230 tu_leave(&p->p_tu); 231 232 KERNEL_ASSERT_LOCKED(); 233 sched_toidle(); 234 } 235 236 void 237 sched_toidle(void) 238 { 239 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 240 struct proc *idle; 241 242 #ifdef MULTIPROCESSOR 243 /* This process no longer needs to hold the kernel lock. */ 244 if (_kernel_lock_held()) 245 __mp_release_all(&kernel_lock); 246 #endif 247 248 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { 249 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); 250 clockintr_cancel(&spc->spc_itimer); 251 } 252 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { 253 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 254 clockintr_cancel(&spc->spc_profclock); 255 } 256 257 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR); 258 259 SCHED_LOCK(); 260 idle = spc->spc_idleproc; 261 idle->p_stat = SRUN; 262 263 uvmexp.swtch++; 264 if (curproc != NULL) 265 TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET, 266 idle->p_p->ps_pid); 267 cpu_switchto(NULL, idle); 268 panic("cpu_switchto returned"); 269 } 270 271 /* 272 * Run queue management. 273 */ 274 void 275 sched_init_runqueues(void) 276 { 277 } 278 279 void 280 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) 281 { 282 struct schedstate_percpu *spc; 283 int queue = prio >> 2; 284 285 if (ci == NULL) 286 ci = sched_choosecpu(p); 287 288 KASSERT(ci != NULL); 289 SCHED_ASSERT_LOCKED(); 290 KASSERT(p->p_wchan == NULL); 291 292 p->p_cpu = ci; 293 p->p_stat = SRUN; 294 p->p_runpri = prio; 295 296 spc = &p->p_cpu->ci_schedstate; 297 spc->spc_nrun++; 298 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET, 299 p->p_p->ps_pid); 300 301 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); 302 spc->spc_whichqs |= (1U << queue); 303 cpuset_add(&sched_queued_cpus, p->p_cpu); 304 305 if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) 306 cpu_unidle(p->p_cpu); 307 else if (prio < spc->spc_curpriority) 308 need_resched(ci); 309 } 310 311 void 312 remrunqueue(struct proc *p) 313 { 314 struct schedstate_percpu *spc; 315 int queue = p->p_runpri >> 2; 316 317 SCHED_ASSERT_LOCKED(); 318 spc = &p->p_cpu->ci_schedstate; 319 spc->spc_nrun--; 320 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET, 321 p->p_p->ps_pid); 322 323 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); 324 if (TAILQ_EMPTY(&spc->spc_qs[queue])) { 325 spc->spc_whichqs &= ~(1U << queue); 326 if (spc->spc_whichqs == 0) 327 cpuset_del(&sched_queued_cpus, p->p_cpu); 328 } 329 } 330 331 struct proc * 332 sched_chooseproc(void) 333 { 334 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 335 struct proc *p; 336 int queue; 337 338 SCHED_ASSERT_LOCKED(); 339 340 #ifdef MULTIPROCESSOR 341 if (spc->spc_schedflags & SPCF_SHOULDHALT) { 342 if (spc->spc_whichqs) { 343 for (queue = 0; queue < SCHED_NQS; queue++) { 344 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) { 345 remrunqueue(p); 346 setrunqueue(NULL, p, p->p_runpri); 347 if (p->p_cpu == curcpu()) { 348 KASSERT(p->p_flag & P_CPUPEG); 349 goto again; 350 } 351 } 352 } 353 } 354 p = spc->spc_idleproc; 355 if (p == NULL) 356 panic("no idleproc set on CPU%d", 357 CPU_INFO_UNIT(curcpu())); 358 p->p_stat = SRUN; 359 KASSERT(p->p_wchan == NULL); 360 return (p); 361 } 362 again: 363 #endif 364 365 if (spc->spc_whichqs) { 366 queue = ffs(spc->spc_whichqs) - 1; 367 p = TAILQ_FIRST(&spc->spc_qs[queue]); 368 remrunqueue(p); 369 sched_noidle++; 370 if (p->p_stat != SRUN) 371 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); 372 } else if ((p = sched_steal_proc(curcpu())) == NULL) { 373 p = spc->spc_idleproc; 374 if (p == NULL) 375 panic("no idleproc set on CPU%d", 376 CPU_INFO_UNIT(curcpu())); 377 p->p_stat = SRUN; 378 } 379 380 KASSERT(p->p_wchan == NULL); 381 return (p); 382 } 383 384 struct cpu_info * 385 sched_choosecpu_fork(struct proc *parent, int flags) 386 { 387 #ifdef MULTIPROCESSOR 388 struct cpu_info *choice = NULL; 389 int run, best_run = INT_MAX; 390 struct cpu_info *ci; 391 struct cpuset set; 392 393 #if 0 394 /* 395 * XXX 396 * Don't do this until we have a painless way to move the cpu in exec. 397 * Preferably when nuking the old pmap and getting a new one on a 398 * new cpu. 399 */ 400 /* 401 * PPWAIT forks are simple. We know that the parent will not 402 * run until we exec and choose another cpu, so we just steal its 403 * cpu. 404 */ 405 if (flags & FORK_PPWAIT) 406 return (parent->p_cpu); 407 #endif 408 409 /* 410 * Look at all cpus that are currently idle and have nothing queued. 411 * If there are none, pick the one with least queued procs first, 412 * then the one with lowest load average. 413 */ 414 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 415 cpuset_intersection(&set, &set, &sched_all_cpus); 416 if (cpuset_first(&set) == NULL) 417 cpuset_copy(&set, &sched_all_cpus); 418 419 while ((ci = cpuset_first(&set)) != NULL) { 420 cpuset_del(&set, ci); 421 422 run = ci->ci_schedstate.spc_nrun; 423 424 if (choice == NULL || run < best_run) { 425 choice = ci; 426 best_run = run; 427 } 428 } 429 430 return (choice); 431 #else 432 return (curcpu()); 433 #endif 434 } 435 436 struct cpu_info * 437 sched_choosecpu(struct proc *p) 438 { 439 #ifdef MULTIPROCESSOR 440 struct cpu_info *choice = NULL; 441 int last_cost = INT_MAX; 442 struct cpu_info *ci; 443 struct cpuset set; 444 445 /* 446 * If pegged to a cpu, don't allow it to move. 447 */ 448 if (p->p_flag & P_CPUPEG) 449 return (p->p_cpu); 450 451 sched_choose++; 452 453 /* 454 * Look at all cpus that are currently idle and have nothing queued. 455 * If there are none, pick the cheapest of those. 456 * (idle + queued could mean that the cpu is handling an interrupt 457 * at this moment and haven't had time to leave idle yet). 458 */ 459 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 460 cpuset_intersection(&set, &set, &sched_all_cpus); 461 462 /* 463 * First, just check if our current cpu is in that set, if it is, 464 * this is simple. 465 * Also, our cpu might not be idle, but if it's the current cpu 466 * and it has nothing else queued and we're curproc, take it. 467 */ 468 if (cpuset_isset(&set, p->p_cpu) || 469 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && 470 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 && 471 curproc == p)) { 472 sched_wasidle++; 473 return (p->p_cpu); 474 } 475 476 if (cpuset_first(&set) == NULL) 477 cpuset_copy(&set, &sched_all_cpus); 478 479 while ((ci = cpuset_first(&set)) != NULL) { 480 int cost = sched_proc_to_cpu_cost(ci, p); 481 482 if (choice == NULL || cost < last_cost) { 483 choice = ci; 484 last_cost = cost; 485 } 486 cpuset_del(&set, ci); 487 } 488 489 if (p->p_cpu != choice) 490 sched_nmigrations++; 491 else 492 sched_nomigrations++; 493 494 return (choice); 495 #else 496 return (curcpu()); 497 #endif 498 } 499 500 /* 501 * Attempt to steal a proc from some cpu. 502 */ 503 struct proc * 504 sched_steal_proc(struct cpu_info *self) 505 { 506 struct proc *best = NULL; 507 #ifdef MULTIPROCESSOR 508 struct schedstate_percpu *spc; 509 int bestcost = INT_MAX; 510 struct cpu_info *ci; 511 struct cpuset set; 512 513 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0); 514 515 /* Don't steal if we don't want to schedule processes in this CPU. */ 516 if (!cpuset_isset(&sched_all_cpus, self)) 517 return (NULL); 518 519 cpuset_copy(&set, &sched_queued_cpus); 520 521 while ((ci = cpuset_first(&set)) != NULL) { 522 struct proc *p; 523 int queue; 524 int cost; 525 526 cpuset_del(&set, ci); 527 528 spc = &ci->ci_schedstate; 529 530 queue = ffs(spc->spc_whichqs) - 1; 531 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { 532 if (p->p_flag & P_CPUPEG) 533 continue; 534 535 cost = sched_proc_to_cpu_cost(self, p); 536 537 if (best == NULL || cost < bestcost) { 538 best = p; 539 bestcost = cost; 540 } 541 } 542 } 543 if (best == NULL) 544 return (NULL); 545 546 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET, 547 best->p_p->ps_pid, CPU_INFO_UNIT(self)); 548 549 remrunqueue(best); 550 best->p_cpu = self; 551 552 sched_stolen++; 553 #endif 554 return (best); 555 } 556 557 #ifdef MULTIPROCESSOR 558 /* 559 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). 560 */ 561 static int 562 log2(unsigned int i) 563 { 564 int ret = 0; 565 566 while (i >>= 1) 567 ret++; 568 569 return (ret); 570 } 571 572 /* 573 * Calculate the cost of moving the proc to this cpu. 574 * 575 * What we want is some guesstimate of how much "performance" it will 576 * cost us to move the proc here. Not just for caches and TLBs and NUMA 577 * memory, but also for the proc itself. A highly loaded cpu might not 578 * be the best candidate for this proc since it won't get run. 579 * 580 * Just total guesstimates for now. 581 */ 582 583 int sched_cost_load = 1; 584 int sched_cost_priority = 1; 585 int sched_cost_runnable = 3; 586 int sched_cost_resident = 1; 587 #endif 588 589 int 590 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) 591 { 592 int cost = 0; 593 #ifdef MULTIPROCESSOR 594 struct schedstate_percpu *spc; 595 int l2resident = 0; 596 597 spc = &ci->ci_schedstate; 598 599 /* 600 * First, account for the priority of the proc we want to move. 601 * More willing to move, the lower the priority of the destination 602 * and the higher the priority of the proc. 603 */ 604 if (!cpuset_isset(&sched_idle_cpus, ci)) { 605 cost += (p->p_usrpri - spc->spc_curpriority) * 606 sched_cost_priority; 607 cost += sched_cost_runnable; 608 } 609 if (cpuset_isset(&sched_queued_cpus, ci)) 610 cost += spc->spc_nrun * sched_cost_runnable; 611 612 /* 613 * Try to avoid the primary cpu as it handles hardware interrupts. 614 * 615 * XXX Needs to be revisited when we distribute interrupts 616 * over cpus. 617 */ 618 if (CPU_IS_PRIMARY(ci)) 619 cost += sched_cost_runnable; 620 621 /* 622 * If the proc is on this cpu already, lower the cost by how much 623 * it has been running and an estimate of its footprint. 624 */ 625 if (p->p_cpu == ci && p->p_slptime == 0) { 626 l2resident = 627 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)); 628 cost -= l2resident * sched_cost_resident; 629 } 630 #endif 631 return (cost); 632 } 633 634 /* 635 * Peg a proc to a cpu. 636 */ 637 void 638 sched_peg_curproc(struct cpu_info *ci) 639 { 640 struct proc *p = curproc; 641 642 SCHED_LOCK(); 643 atomic_setbits_int(&p->p_flag, P_CPUPEG); 644 setrunqueue(ci, p, p->p_usrpri); 645 p->p_ru.ru_nvcsw++; 646 mi_switch(); 647 SCHED_UNLOCK(); 648 } 649 650 void 651 sched_unpeg_curproc(void) 652 { 653 struct proc *p = curproc; 654 655 atomic_clearbits_int(&p->p_flag, P_CPUPEG); 656 } 657 658 #ifdef MULTIPROCESSOR 659 660 void 661 sched_start_secondary_cpus(void) 662 { 663 CPU_INFO_ITERATOR cii; 664 struct cpu_info *ci; 665 666 CPU_INFO_FOREACH(cii, ci) { 667 struct schedstate_percpu *spc = &ci->ci_schedstate; 668 669 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 670 continue; 671 atomic_clearbits_int(&spc->spc_schedflags, 672 SPCF_SHOULDHALT | SPCF_HALTED); 673 #ifdef __HAVE_CPU_TOPOLOGY 674 if (!sched_smt && ci->ci_smt_id > 0) 675 continue; 676 #endif 677 cpuset_add(&sched_all_cpus, ci); 678 } 679 } 680 681 void 682 sched_stop_secondary_cpus(void) 683 { 684 CPU_INFO_ITERATOR cii; 685 struct cpu_info *ci; 686 687 /* 688 * Make sure we stop the secondary CPUs. 689 */ 690 CPU_INFO_FOREACH(cii, ci) { 691 struct schedstate_percpu *spc = &ci->ci_schedstate; 692 693 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 694 continue; 695 cpuset_del(&sched_all_cpus, ci); 696 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT); 697 } 698 CPU_INFO_FOREACH(cii, ci) { 699 struct schedstate_percpu *spc = &ci->ci_schedstate; 700 701 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 702 continue; 703 while ((spc->spc_schedflags & SPCF_HALTED) == 0) { 704 sleep_setup(spc, PZERO, "schedstate"); 705 sleep_finish(0, 706 (spc->spc_schedflags & SPCF_HALTED) == 0); 707 } 708 } 709 } 710 711 struct sched_barrier_state { 712 struct cpu_info *ci; 713 struct cond cond; 714 }; 715 716 void 717 sched_barrier_task(void *arg) 718 { 719 struct sched_barrier_state *sb = arg; 720 struct cpu_info *ci = sb->ci; 721 722 sched_peg_curproc(ci); 723 cond_signal(&sb->cond); 724 sched_unpeg_curproc(); 725 } 726 727 void 728 sched_barrier(struct cpu_info *ci) 729 { 730 struct sched_barrier_state sb; 731 struct task task; 732 CPU_INFO_ITERATOR cii; 733 734 if (ci == NULL) { 735 CPU_INFO_FOREACH(cii, ci) { 736 if (CPU_IS_PRIMARY(ci)) 737 break; 738 } 739 } 740 KASSERT(ci != NULL); 741 742 if (ci == curcpu()) 743 return; 744 745 sb.ci = ci; 746 cond_init(&sb.cond); 747 task_set(&task, sched_barrier_task, &sb); 748 749 task_add(systqmp, &task); 750 cond_wait(&sb.cond, "sbar"); 751 } 752 753 #else 754 755 void 756 sched_barrier(struct cpu_info *ci) 757 { 758 } 759 760 #endif 761 762 /* 763 * Functions to manipulate cpu sets. 764 */ 765 struct cpu_info *cpuset_infos[MAXCPUS]; 766 static struct cpuset cpuset_all; 767 768 void 769 cpuset_init_cpu(struct cpu_info *ci) 770 { 771 cpuset_add(&cpuset_all, ci); 772 cpuset_infos[CPU_INFO_UNIT(ci)] = ci; 773 } 774 775 void 776 cpuset_clear(struct cpuset *cs) 777 { 778 memset(cs, 0, sizeof(*cs)); 779 } 780 781 void 782 cpuset_add(struct cpuset *cs, struct cpu_info *ci) 783 { 784 unsigned int num = CPU_INFO_UNIT(ci); 785 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 786 } 787 788 void 789 cpuset_del(struct cpuset *cs, struct cpu_info *ci) 790 { 791 unsigned int num = CPU_INFO_UNIT(ci); 792 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 793 } 794 795 int 796 cpuset_isset(struct cpuset *cs, struct cpu_info *ci) 797 { 798 unsigned int num = CPU_INFO_UNIT(ci); 799 return (cs->cs_set[num/32] & (1U << (num % 32))); 800 } 801 802 void 803 cpuset_add_all(struct cpuset *cs) 804 { 805 cpuset_copy(cs, &cpuset_all); 806 } 807 808 void 809 cpuset_copy(struct cpuset *to, struct cpuset *from) 810 { 811 memcpy(to, from, sizeof(*to)); 812 } 813 814 struct cpu_info * 815 cpuset_first(struct cpuset *cs) 816 { 817 int i; 818 819 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 820 if (cs->cs_set[i]) 821 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); 822 823 return (NULL); 824 } 825 826 void 827 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) 828 { 829 int i; 830 831 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 832 to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; 833 } 834 835 void 836 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) 837 { 838 int i; 839 840 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 841 to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; 842 } 843 844 void 845 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) 846 { 847 int i; 848 849 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 850 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; 851 } 852 853 int 854 cpuset_cardinality(struct cpuset *cs) 855 { 856 int cardinality, i, n; 857 858 cardinality = 0; 859 860 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 861 for (n = cs->cs_set[i]; n != 0; n &= n - 1) 862 cardinality++; 863 864 return (cardinality); 865 } 866 867 int 868 sysctl_hwncpuonline(void) 869 { 870 return cpuset_cardinality(&sched_all_cpus); 871 } 872 873 int 874 cpu_is_online(struct cpu_info *ci) 875 { 876 return cpuset_isset(&sched_all_cpus, ci); 877 } 878 879 #ifdef __HAVE_CPU_TOPOLOGY 880 881 #include <sys/sysctl.h> 882 883 int 884 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 885 { 886 CPU_INFO_ITERATOR cii; 887 struct cpu_info *ci; 888 int err, newsmt; 889 890 newsmt = sched_smt; 891 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); 892 if (err) 893 return err; 894 if (newsmt == sched_smt) 895 return 0; 896 897 sched_smt = newsmt; 898 CPU_INFO_FOREACH(cii, ci) { 899 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 900 continue; 901 if (ci->ci_smt_id == 0) 902 continue; 903 if (sched_smt) 904 cpuset_add(&sched_all_cpus, ci); 905 else 906 cpuset_del(&sched_all_cpus, ci); 907 } 908 909 return 0; 910 } 911 912 #endif 913