1 /* $OpenBSD: kern_sched.c,v 1.87 2023/08/29 16:19:34 claudio Exp $ */ 2 /* 3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org> 4 * 5 * Permission to use, copy, modify, and distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include <sys/param.h> 19 20 #include <sys/sched.h> 21 #include <sys/proc.h> 22 #include <sys/kthread.h> 23 #include <sys/systm.h> 24 #include <sys/clockintr.h> 25 #include <sys/resourcevar.h> 26 #include <sys/task.h> 27 #include <sys/time.h> 28 #include <sys/smr.h> 29 #include <sys/tracepoint.h> 30 31 #include <uvm/uvm_extern.h> 32 33 void sched_kthreads_create(void *); 34 35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p); 36 struct proc *sched_steal_proc(struct cpu_info *); 37 38 /* 39 * To help choosing which cpu should run which process we keep track 40 * of cpus which are currently idle and which cpus have processes 41 * queued. 42 */ 43 struct cpuset sched_idle_cpus; 44 struct cpuset sched_queued_cpus; 45 struct cpuset sched_all_cpus; 46 47 /* 48 * Some general scheduler counters. 49 */ 50 uint64_t sched_nmigrations; /* Cpu migration counter */ 51 uint64_t sched_nomigrations; /* Cpu no migration counter */ 52 uint64_t sched_noidle; /* Times we didn't pick the idle task */ 53 uint64_t sched_stolen; /* Times we stole proc from other cpus */ 54 uint64_t sched_choose; /* Times we chose a cpu */ 55 uint64_t sched_wasidle; /* Times we came out of idle */ 56 57 int sched_smt; 58 59 /* 60 * A few notes about cpu_switchto that is implemented in MD code. 61 * 62 * cpu_switchto takes two arguments, the old proc and the proc 63 * it should switch to. The new proc will never be NULL, so we always have 64 * a saved state that we need to switch to. The old proc however can 65 * be NULL if the process is exiting. NULL for the old proc simply 66 * means "don't bother saving old state". 67 * 68 * cpu_switchto is supposed to atomically load the new state of the process 69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the 70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other 71 * cpus in the system must not depend on this state being consistent. 72 * Therefore no locking is necessary in cpu_switchto other than blocking 73 * interrupts during the context switch. 74 */ 75 76 /* 77 * sched_init_cpu is called from main() for the boot cpu, then it's the 78 * responsibility of the MD code to call it for all other cpus. 79 */ 80 void 81 sched_init_cpu(struct cpu_info *ci) 82 { 83 struct schedstate_percpu *spc = &ci->ci_schedstate; 84 int i; 85 86 for (i = 0; i < SCHED_NQS; i++) 87 TAILQ_INIT(&spc->spc_qs[i]); 88 89 spc->spc_idleproc = NULL; 90 91 if (spc->spc_itimer == NULL) { 92 spc->spc_itimer = clockintr_establish(&ci->ci_queue, 93 itimer_update); 94 if (spc->spc_itimer == NULL) { 95 panic("%s: clockintr_establish itimer_update", 96 __func__); 97 } 98 } 99 if (spc->spc_profclock == NULL) { 100 spc->spc_profclock = clockintr_establish(&ci->ci_queue, 101 profclock); 102 if (spc->spc_profclock == NULL) 103 panic("%s: clockintr_establish profclock", __func__); 104 } 105 if (spc->spc_roundrobin == NULL) { 106 spc->spc_roundrobin = clockintr_establish(&ci->ci_queue, 107 roundrobin); 108 if (spc->spc_roundrobin == NULL) 109 panic("%s: clockintr_establish roundrobin", __func__); 110 } 111 112 kthread_create_deferred(sched_kthreads_create, ci); 113 114 LIST_INIT(&spc->spc_deadproc); 115 SIMPLEQ_INIT(&spc->spc_deferred); 116 117 /* 118 * Slight hack here until the cpuset code handles cpu_info 119 * structures. 120 */ 121 cpuset_init_cpu(ci); 122 123 #ifdef __HAVE_CPU_TOPOLOGY 124 if (!sched_smt && ci->ci_smt_id > 0) 125 return; 126 #endif 127 cpuset_add(&sched_all_cpus, ci); 128 } 129 130 void 131 sched_kthreads_create(void *v) 132 { 133 struct cpu_info *ci = v; 134 struct schedstate_percpu *spc = &ci->ci_schedstate; 135 static int num; 136 137 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE| 138 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL, 139 &spc->spc_idleproc)) 140 panic("fork idle"); 141 142 /* Name it as specified. */ 143 snprintf(spc->spc_idleproc->p_p->ps_comm, 144 sizeof(spc->spc_idleproc->p_p->ps_comm), 145 "idle%d", num); 146 147 num++; 148 } 149 150 void 151 sched_idle(void *v) 152 { 153 struct schedstate_percpu *spc; 154 struct proc *p = curproc; 155 struct cpu_info *ci = v; 156 int s; 157 158 KERNEL_UNLOCK(); 159 160 spc = &ci->ci_schedstate; 161 162 /* 163 * First time we enter here, we're not supposed to idle, 164 * just go away for a while. 165 */ 166 SCHED_LOCK(s); 167 cpuset_add(&sched_idle_cpus, ci); 168 p->p_stat = SSLEEP; 169 p->p_cpu = ci; 170 atomic_setbits_int(&p->p_flag, P_CPUPEG); 171 mi_switch(); 172 cpuset_del(&sched_idle_cpus, ci); 173 SCHED_UNLOCK(s); 174 175 KASSERT(ci == curcpu()); 176 KASSERT(curproc == spc->spc_idleproc); 177 178 while (1) { 179 while (!cpu_is_idle(curcpu())) { 180 struct proc *dead; 181 182 SCHED_LOCK(s); 183 p->p_stat = SSLEEP; 184 mi_switch(); 185 SCHED_UNLOCK(s); 186 187 while ((dead = LIST_FIRST(&spc->spc_deadproc))) { 188 LIST_REMOVE(dead, p_hash); 189 exit2(dead); 190 } 191 } 192 193 splassert(IPL_NONE); 194 195 smr_idle(); 196 197 cpuset_add(&sched_idle_cpus, ci); 198 cpu_idle_enter(); 199 while (spc->spc_whichqs == 0) { 200 #ifdef MULTIPROCESSOR 201 if (spc->spc_schedflags & SPCF_SHOULDHALT && 202 (spc->spc_schedflags & SPCF_HALTED) == 0) { 203 cpuset_del(&sched_idle_cpus, ci); 204 SCHED_LOCK(s); 205 atomic_setbits_int(&spc->spc_schedflags, 206 spc->spc_whichqs ? 0 : SPCF_HALTED); 207 SCHED_UNLOCK(s); 208 wakeup(spc); 209 } 210 #endif 211 cpu_idle_cycle(); 212 } 213 cpu_idle_leave(); 214 cpuset_del(&sched_idle_cpus, ci); 215 } 216 } 217 218 /* 219 * To free our address space we have to jump through a few hoops. 220 * The freeing is done by the reaper, but until we have one reaper 221 * per cpu, we have no way of putting this proc on the deadproc list 222 * and waking up the reaper without risking having our address space and 223 * stack torn from under us before we manage to switch to another proc. 224 * Therefore we have a per-cpu list of dead processes where we put this 225 * proc and have idle clean up that list and move it to the reaper list. 226 * All this will be unnecessary once we can bind the reaper this cpu 227 * and not risk having it switch to another in case it sleeps. 228 */ 229 void 230 sched_exit(struct proc *p) 231 { 232 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 233 struct proc *idle; 234 int s; 235 236 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) { 237 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER); 238 clockintr_cancel(spc->spc_itimer); 239 } 240 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) { 241 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK); 242 clockintr_cancel(spc->spc_profclock); 243 } 244 245 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash); 246 247 #ifdef MULTIPROCESSOR 248 /* This process no longer needs to hold the kernel lock. */ 249 KERNEL_ASSERT_LOCKED(); 250 __mp_release_all(&kernel_lock); 251 #endif 252 253 SCHED_LOCK(s); 254 idle = spc->spc_idleproc; 255 idle->p_stat = SRUN; 256 cpu_switchto(NULL, idle); 257 panic("cpu_switchto returned"); 258 } 259 260 /* 261 * Run queue management. 262 */ 263 void 264 sched_init_runqueues(void) 265 { 266 } 267 268 void 269 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio) 270 { 271 struct schedstate_percpu *spc; 272 int queue = prio >> 2; 273 274 if (ci == NULL) 275 ci = sched_choosecpu(p); 276 277 KASSERT(ci != NULL); 278 SCHED_ASSERT_LOCKED(); 279 280 p->p_cpu = ci; 281 p->p_stat = SRUN; 282 p->p_runpri = prio; 283 284 spc = &p->p_cpu->ci_schedstate; 285 spc->spc_nrun++; 286 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET, 287 p->p_p->ps_pid); 288 289 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq); 290 spc->spc_whichqs |= (1U << queue); 291 cpuset_add(&sched_queued_cpus, p->p_cpu); 292 293 if (cpuset_isset(&sched_idle_cpus, p->p_cpu)) 294 cpu_unidle(p->p_cpu); 295 296 if (prio < spc->spc_curpriority) 297 need_resched(ci); 298 } 299 300 void 301 remrunqueue(struct proc *p) 302 { 303 struct schedstate_percpu *spc; 304 int queue = p->p_runpri >> 2; 305 306 SCHED_ASSERT_LOCKED(); 307 spc = &p->p_cpu->ci_schedstate; 308 spc->spc_nrun--; 309 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET, 310 p->p_p->ps_pid); 311 312 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq); 313 if (TAILQ_EMPTY(&spc->spc_qs[queue])) { 314 spc->spc_whichqs &= ~(1U << queue); 315 if (spc->spc_whichqs == 0) 316 cpuset_del(&sched_queued_cpus, p->p_cpu); 317 } 318 } 319 320 struct proc * 321 sched_chooseproc(void) 322 { 323 struct schedstate_percpu *spc = &curcpu()->ci_schedstate; 324 struct proc *p; 325 int queue; 326 327 SCHED_ASSERT_LOCKED(); 328 329 #ifdef MULTIPROCESSOR 330 if (spc->spc_schedflags & SPCF_SHOULDHALT) { 331 if (spc->spc_whichqs) { 332 for (queue = 0; queue < SCHED_NQS; queue++) { 333 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) { 334 remrunqueue(p); 335 setrunqueue(NULL, p, p->p_runpri); 336 if (p->p_cpu == curcpu()) { 337 KASSERT(p->p_flag & P_CPUPEG); 338 goto again; 339 } 340 } 341 } 342 } 343 p = spc->spc_idleproc; 344 KASSERT(p); 345 KASSERT(p->p_wchan == NULL); 346 p->p_stat = SRUN; 347 return (p); 348 } 349 #endif 350 351 again: 352 if (spc->spc_whichqs) { 353 queue = ffs(spc->spc_whichqs) - 1; 354 p = TAILQ_FIRST(&spc->spc_qs[queue]); 355 remrunqueue(p); 356 sched_noidle++; 357 if (p->p_stat != SRUN) 358 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat); 359 } else if ((p = sched_steal_proc(curcpu())) == NULL) { 360 p = spc->spc_idleproc; 361 if (p == NULL) { 362 int s; 363 /* 364 * We get here if someone decides to switch during 365 * boot before forking kthreads, bleh. 366 * This is kind of like a stupid idle loop. 367 */ 368 #ifdef MULTIPROCESSOR 369 __mp_unlock(&sched_lock); 370 #endif 371 spl0(); 372 delay(10); 373 SCHED_LOCK(s); 374 goto again; 375 } 376 KASSERT(p); 377 p->p_stat = SRUN; 378 } 379 380 KASSERT(p->p_wchan == NULL); 381 return (p); 382 } 383 384 struct cpu_info * 385 sched_choosecpu_fork(struct proc *parent, int flags) 386 { 387 #ifdef MULTIPROCESSOR 388 struct cpu_info *choice = NULL; 389 int run, best_run = INT_MAX; 390 struct cpu_info *ci; 391 struct cpuset set; 392 393 #if 0 394 /* 395 * XXX 396 * Don't do this until we have a painless way to move the cpu in exec. 397 * Preferably when nuking the old pmap and getting a new one on a 398 * new cpu. 399 */ 400 /* 401 * PPWAIT forks are simple. We know that the parent will not 402 * run until we exec and choose another cpu, so we just steal its 403 * cpu. 404 */ 405 if (flags & FORK_PPWAIT) 406 return (parent->p_cpu); 407 #endif 408 409 /* 410 * Look at all cpus that are currently idle and have nothing queued. 411 * If there are none, pick the one with least queued procs first, 412 * then the one with lowest load average. 413 */ 414 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 415 cpuset_intersection(&set, &set, &sched_all_cpus); 416 if (cpuset_first(&set) == NULL) 417 cpuset_copy(&set, &sched_all_cpus); 418 419 while ((ci = cpuset_first(&set)) != NULL) { 420 cpuset_del(&set, ci); 421 422 run = ci->ci_schedstate.spc_nrun; 423 424 if (choice == NULL || run < best_run) { 425 choice = ci; 426 best_run = run; 427 } 428 } 429 430 return (choice); 431 #else 432 return (curcpu()); 433 #endif 434 } 435 436 struct cpu_info * 437 sched_choosecpu(struct proc *p) 438 { 439 #ifdef MULTIPROCESSOR 440 struct cpu_info *choice = NULL; 441 int last_cost = INT_MAX; 442 struct cpu_info *ci; 443 struct cpuset set; 444 445 /* 446 * If pegged to a cpu, don't allow it to move. 447 */ 448 if (p->p_flag & P_CPUPEG) 449 return (p->p_cpu); 450 451 sched_choose++; 452 453 /* 454 * Look at all cpus that are currently idle and have nothing queued. 455 * If there are none, pick the cheapest of those. 456 * (idle + queued could mean that the cpu is handling an interrupt 457 * at this moment and haven't had time to leave idle yet). 458 */ 459 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus); 460 cpuset_intersection(&set, &set, &sched_all_cpus); 461 462 /* 463 * First, just check if our current cpu is in that set, if it is, 464 * this is simple. 465 * Also, our cpu might not be idle, but if it's the current cpu 466 * and it has nothing else queued and we're curproc, take it. 467 */ 468 if (cpuset_isset(&set, p->p_cpu) || 469 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 && 470 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 && 471 curproc == p)) { 472 sched_wasidle++; 473 return (p->p_cpu); 474 } 475 476 if (cpuset_first(&set) == NULL) 477 cpuset_copy(&set, &sched_all_cpus); 478 479 while ((ci = cpuset_first(&set)) != NULL) { 480 int cost = sched_proc_to_cpu_cost(ci, p); 481 482 if (choice == NULL || cost < last_cost) { 483 choice = ci; 484 last_cost = cost; 485 } 486 cpuset_del(&set, ci); 487 } 488 489 if (p->p_cpu != choice) 490 sched_nmigrations++; 491 else 492 sched_nomigrations++; 493 494 return (choice); 495 #else 496 return (curcpu()); 497 #endif 498 } 499 500 /* 501 * Attempt to steal a proc from some cpu. 502 */ 503 struct proc * 504 sched_steal_proc(struct cpu_info *self) 505 { 506 struct proc *best = NULL; 507 #ifdef MULTIPROCESSOR 508 struct schedstate_percpu *spc; 509 int bestcost = INT_MAX; 510 struct cpu_info *ci; 511 struct cpuset set; 512 513 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0); 514 515 /* Don't steal if we don't want to schedule processes in this CPU. */ 516 if (!cpuset_isset(&sched_all_cpus, self)) 517 return (NULL); 518 519 cpuset_copy(&set, &sched_queued_cpus); 520 521 while ((ci = cpuset_first(&set)) != NULL) { 522 struct proc *p; 523 int queue; 524 int cost; 525 526 cpuset_del(&set, ci); 527 528 spc = &ci->ci_schedstate; 529 530 queue = ffs(spc->spc_whichqs) - 1; 531 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) { 532 if (p->p_flag & P_CPUPEG) 533 continue; 534 535 cost = sched_proc_to_cpu_cost(self, p); 536 537 if (best == NULL || cost < bestcost) { 538 best = p; 539 bestcost = cost; 540 } 541 } 542 } 543 if (best == NULL) 544 return (NULL); 545 546 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET, 547 best->p_p->ps_pid, CPU_INFO_UNIT(self)); 548 549 remrunqueue(best); 550 best->p_cpu = self; 551 552 sched_stolen++; 553 #endif 554 return (best); 555 } 556 557 #ifdef MULTIPROCESSOR 558 /* 559 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know). 560 */ 561 static int 562 log2(unsigned int i) 563 { 564 int ret = 0; 565 566 while (i >>= 1) 567 ret++; 568 569 return (ret); 570 } 571 572 /* 573 * Calculate the cost of moving the proc to this cpu. 574 * 575 * What we want is some guesstimate of how much "performance" it will 576 * cost us to move the proc here. Not just for caches and TLBs and NUMA 577 * memory, but also for the proc itself. A highly loaded cpu might not 578 * be the best candidate for this proc since it won't get run. 579 * 580 * Just total guesstimates for now. 581 */ 582 583 int sched_cost_load = 1; 584 int sched_cost_priority = 1; 585 int sched_cost_runnable = 3; 586 int sched_cost_resident = 1; 587 #endif 588 589 int 590 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p) 591 { 592 int cost = 0; 593 #ifdef MULTIPROCESSOR 594 struct schedstate_percpu *spc; 595 int l2resident = 0; 596 597 spc = &ci->ci_schedstate; 598 599 /* 600 * First, account for the priority of the proc we want to move. 601 * More willing to move, the lower the priority of the destination 602 * and the higher the priority of the proc. 603 */ 604 if (!cpuset_isset(&sched_idle_cpus, ci)) { 605 cost += (p->p_usrpri - spc->spc_curpriority) * 606 sched_cost_priority; 607 cost += sched_cost_runnable; 608 } 609 if (cpuset_isset(&sched_queued_cpus, ci)) 610 cost += spc->spc_nrun * sched_cost_runnable; 611 612 /* 613 * Try to avoid the primary cpu as it handles hardware interrupts. 614 * 615 * XXX Needs to be revisited when we distribute interrupts 616 * over cpus. 617 */ 618 if (CPU_IS_PRIMARY(ci)) 619 cost += sched_cost_runnable; 620 621 /* 622 * If the proc is on this cpu already, lower the cost by how much 623 * it has been running and an estimate of its footprint. 624 */ 625 if (p->p_cpu == ci && p->p_slptime == 0) { 626 l2resident = 627 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap)); 628 cost -= l2resident * sched_cost_resident; 629 } 630 #endif 631 return (cost); 632 } 633 634 /* 635 * Peg a proc to a cpu. 636 */ 637 void 638 sched_peg_curproc(struct cpu_info *ci) 639 { 640 struct proc *p = curproc; 641 int s; 642 643 SCHED_LOCK(s); 644 atomic_setbits_int(&p->p_flag, P_CPUPEG); 645 setrunqueue(ci, p, p->p_usrpri); 646 p->p_ru.ru_nvcsw++; 647 mi_switch(); 648 SCHED_UNLOCK(s); 649 } 650 651 #ifdef MULTIPROCESSOR 652 653 void 654 sched_start_secondary_cpus(void) 655 { 656 CPU_INFO_ITERATOR cii; 657 struct cpu_info *ci; 658 659 CPU_INFO_FOREACH(cii, ci) { 660 struct schedstate_percpu *spc = &ci->ci_schedstate; 661 662 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 663 continue; 664 atomic_clearbits_int(&spc->spc_schedflags, 665 SPCF_SHOULDHALT | SPCF_HALTED); 666 #ifdef __HAVE_CPU_TOPOLOGY 667 if (!sched_smt && ci->ci_smt_id > 0) 668 continue; 669 #endif 670 cpuset_add(&sched_all_cpus, ci); 671 } 672 } 673 674 void 675 sched_stop_secondary_cpus(void) 676 { 677 CPU_INFO_ITERATOR cii; 678 struct cpu_info *ci; 679 680 /* 681 * Make sure we stop the secondary CPUs. 682 */ 683 CPU_INFO_FOREACH(cii, ci) { 684 struct schedstate_percpu *spc = &ci->ci_schedstate; 685 686 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 687 continue; 688 cpuset_del(&sched_all_cpus, ci); 689 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT); 690 } 691 CPU_INFO_FOREACH(cii, ci) { 692 struct schedstate_percpu *spc = &ci->ci_schedstate; 693 694 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 695 continue; 696 while ((spc->spc_schedflags & SPCF_HALTED) == 0) { 697 sleep_setup(spc, PZERO, "schedstate"); 698 sleep_finish(0, 699 (spc->spc_schedflags & SPCF_HALTED) == 0); 700 } 701 } 702 } 703 704 struct sched_barrier_state { 705 struct cpu_info *ci; 706 struct cond cond; 707 }; 708 709 void 710 sched_barrier_task(void *arg) 711 { 712 struct sched_barrier_state *sb = arg; 713 struct cpu_info *ci = sb->ci; 714 715 sched_peg_curproc(ci); 716 cond_signal(&sb->cond); 717 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG); 718 } 719 720 void 721 sched_barrier(struct cpu_info *ci) 722 { 723 struct sched_barrier_state sb; 724 struct task task; 725 CPU_INFO_ITERATOR cii; 726 727 if (ci == NULL) { 728 CPU_INFO_FOREACH(cii, ci) { 729 if (CPU_IS_PRIMARY(ci)) 730 break; 731 } 732 } 733 KASSERT(ci != NULL); 734 735 if (ci == curcpu()) 736 return; 737 738 sb.ci = ci; 739 cond_init(&sb.cond); 740 task_set(&task, sched_barrier_task, &sb); 741 742 task_add(systqmp, &task); 743 cond_wait(&sb.cond, "sbar"); 744 } 745 746 #else 747 748 void 749 sched_barrier(struct cpu_info *ci) 750 { 751 } 752 753 #endif 754 755 /* 756 * Functions to manipulate cpu sets. 757 */ 758 struct cpu_info *cpuset_infos[MAXCPUS]; 759 static struct cpuset cpuset_all; 760 761 void 762 cpuset_init_cpu(struct cpu_info *ci) 763 { 764 cpuset_add(&cpuset_all, ci); 765 cpuset_infos[CPU_INFO_UNIT(ci)] = ci; 766 } 767 768 void 769 cpuset_clear(struct cpuset *cs) 770 { 771 memset(cs, 0, sizeof(*cs)); 772 } 773 774 void 775 cpuset_add(struct cpuset *cs, struct cpu_info *ci) 776 { 777 unsigned int num = CPU_INFO_UNIT(ci); 778 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 779 } 780 781 void 782 cpuset_del(struct cpuset *cs, struct cpu_info *ci) 783 { 784 unsigned int num = CPU_INFO_UNIT(ci); 785 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32))); 786 } 787 788 int 789 cpuset_isset(struct cpuset *cs, struct cpu_info *ci) 790 { 791 unsigned int num = CPU_INFO_UNIT(ci); 792 return (cs->cs_set[num/32] & (1U << (num % 32))); 793 } 794 795 void 796 cpuset_add_all(struct cpuset *cs) 797 { 798 cpuset_copy(cs, &cpuset_all); 799 } 800 801 void 802 cpuset_copy(struct cpuset *to, struct cpuset *from) 803 { 804 memcpy(to, from, sizeof(*to)); 805 } 806 807 struct cpu_info * 808 cpuset_first(struct cpuset *cs) 809 { 810 int i; 811 812 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 813 if (cs->cs_set[i]) 814 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]); 815 816 return (NULL); 817 } 818 819 void 820 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b) 821 { 822 int i; 823 824 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 825 to->cs_set[i] = a->cs_set[i] | b->cs_set[i]; 826 } 827 828 void 829 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b) 830 { 831 int i; 832 833 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 834 to->cs_set[i] = a->cs_set[i] & b->cs_set[i]; 835 } 836 837 void 838 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b) 839 { 840 int i; 841 842 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 843 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i]; 844 } 845 846 int 847 cpuset_cardinality(struct cpuset *cs) 848 { 849 int cardinality, i, n; 850 851 cardinality = 0; 852 853 for (i = 0; i < CPUSET_ASIZE(ncpus); i++) 854 for (n = cs->cs_set[i]; n != 0; n &= n - 1) 855 cardinality++; 856 857 return (cardinality); 858 } 859 860 int 861 sysctl_hwncpuonline(void) 862 { 863 return cpuset_cardinality(&sched_all_cpus); 864 } 865 866 int 867 cpu_is_online(struct cpu_info *ci) 868 { 869 return cpuset_isset(&sched_all_cpus, ci); 870 } 871 872 #ifdef __HAVE_CPU_TOPOLOGY 873 874 #include <sys/sysctl.h> 875 876 int 877 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen) 878 { 879 CPU_INFO_ITERATOR cii; 880 struct cpu_info *ci; 881 int err, newsmt; 882 883 newsmt = sched_smt; 884 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1); 885 if (err) 886 return err; 887 if (newsmt == sched_smt) 888 return 0; 889 890 sched_smt = newsmt; 891 CPU_INFO_FOREACH(cii, ci) { 892 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci)) 893 continue; 894 if (ci->ci_smt_id == 0) 895 continue; 896 if (sched_smt) 897 cpuset_add(&sched_all_cpus, ci); 898 else 899 cpuset_del(&sched_all_cpus, ci); 900 } 901 902 return 0; 903 } 904 905 #endif 906