1 /* $OpenBSD: kern_sched.c,v 1.103 2024/11/24 13:05:14 claudio Exp $ */
2 /*
3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/param.h>
19
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/clockintr.h>
25 #include <sys/resourcevar.h>
26 #include <sys/task.h>
27 #include <sys/time.h>
28 #include <sys/smr.h>
29 #include <sys/tracepoint.h>
30
31 #include <uvm/uvm_extern.h>
32
33 void sched_kthreads_create(void *);
34
35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
36 struct proc *sched_steal_proc(struct cpu_info *);
37
38 /*
39 * To help choosing which cpu should run which process we keep track
40 * of cpus which are currently idle and which cpus have processes
41 * queued.
42 */
43 struct cpuset sched_idle_cpus;
44 struct cpuset sched_queued_cpus;
45 struct cpuset sched_all_cpus;
46
47 /*
48 * Some general scheduler counters.
49 */
50 uint64_t sched_nmigrations; /* Cpu migration counter */
51 uint64_t sched_nomigrations; /* Cpu no migration counter */
52 uint64_t sched_noidle; /* Times we didn't pick the idle task */
53 uint64_t sched_stolen; /* Times we stole proc from other cpus */
54 uint64_t sched_choose; /* Times we chose a cpu */
55 uint64_t sched_wasidle; /* Times we came out of idle */
56
57 int sched_smt;
58
59 /*
60 * A few notes about cpu_switchto that is implemented in MD code.
61 *
62 * cpu_switchto takes two arguments, the old proc and the proc
63 * it should switch to. The new proc will never be NULL, so we always have
64 * a saved state that we need to switch to. The old proc however can
65 * be NULL if the process is exiting. NULL for the old proc simply
66 * means "don't bother saving old state".
67 *
68 * cpu_switchto is supposed to atomically load the new state of the process
69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the
70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
71 * cpus in the system must not depend on this state being consistent.
72 * Therefore no locking is necessary in cpu_switchto other than blocking
73 * interrupts during the context switch.
74 */
75
76 /*
77 * sched_init_cpu is called from main() for the boot cpu, then it's the
78 * responsibility of the MD code to call it for all other cpus.
79 */
80 void
sched_init_cpu(struct cpu_info * ci)81 sched_init_cpu(struct cpu_info *ci)
82 {
83 struct schedstate_percpu *spc = &ci->ci_schedstate;
84 int i;
85
86 for (i = 0; i < SCHED_NQS; i++)
87 TAILQ_INIT(&spc->spc_qs[i]);
88
89 spc->spc_idleproc = NULL;
90
91 clockintr_bind(&spc->spc_itimer, ci, itimer_update, NULL);
92 clockintr_bind(&spc->spc_profclock, ci, profclock, NULL);
93 clockintr_bind(&spc->spc_roundrobin, ci, roundrobin, NULL);
94 clockintr_bind(&spc->spc_statclock, ci, statclock, NULL);
95
96 kthread_create_deferred(sched_kthreads_create, ci);
97
98 LIST_INIT(&spc->spc_deadproc);
99 SIMPLEQ_INIT(&spc->spc_deferred);
100
101 /*
102 * Slight hack here until the cpuset code handles cpu_info
103 * structures.
104 */
105 cpuset_init_cpu(ci);
106
107 #ifdef __HAVE_CPU_TOPOLOGY
108 if (!sched_smt && ci->ci_smt_id > 0)
109 return;
110 #endif
111 cpuset_add(&sched_all_cpus, ci);
112 }
113
114 void
sched_kthreads_create(void * v)115 sched_kthreads_create(void *v)
116 {
117 struct cpu_info *ci = v;
118 struct schedstate_percpu *spc = &ci->ci_schedstate;
119 static int num;
120
121 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
122 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL,
123 &spc->spc_idleproc))
124 panic("fork idle");
125
126 /* Name it as specified. */
127 snprintf(spc->spc_idleproc->p_p->ps_comm,
128 sizeof(spc->spc_idleproc->p_p->ps_comm),
129 "idle%d", num);
130
131 num++;
132 }
133
134 void
sched_idle(void * v)135 sched_idle(void *v)
136 {
137 struct schedstate_percpu *spc;
138 struct proc *p = curproc;
139 struct cpu_info *ci = v;
140
141 KERNEL_UNLOCK();
142
143 spc = &ci->ci_schedstate;
144
145 /*
146 * First time we enter here, we're not supposed to idle,
147 * just go away for a while.
148 */
149 SCHED_LOCK();
150 cpuset_add(&sched_idle_cpus, ci);
151 p->p_stat = SSLEEP;
152 p->p_cpu = ci;
153 atomic_setbits_int(&p->p_flag, P_CPUPEG);
154 mi_switch();
155 cpuset_del(&sched_idle_cpus, ci);
156 SCHED_UNLOCK();
157
158 KASSERT(ci == curcpu());
159 KASSERT(curproc == spc->spc_idleproc);
160
161 while (1) {
162 while (!cpu_is_idle(curcpu())) {
163 struct proc *dead;
164
165 SCHED_LOCK();
166 p->p_stat = SSLEEP;
167 mi_switch();
168 SCHED_UNLOCK();
169
170 while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
171 LIST_REMOVE(dead, p_hash);
172 exit2(dead);
173 }
174 }
175
176 splassert(IPL_NONE);
177
178 smr_idle();
179
180 cpuset_add(&sched_idle_cpus, ci);
181 cpu_idle_enter();
182 while (spc->spc_whichqs == 0) {
183 #ifdef MULTIPROCESSOR
184 if (spc->spc_schedflags & SPCF_SHOULDHALT &&
185 (spc->spc_schedflags & SPCF_HALTED) == 0) {
186 cpuset_del(&sched_idle_cpus, ci);
187 SCHED_LOCK();
188 atomic_setbits_int(&spc->spc_schedflags,
189 spc->spc_whichqs ? 0 : SPCF_HALTED);
190 SCHED_UNLOCK();
191 wakeup(spc);
192 }
193 #endif
194 cpu_idle_cycle();
195 }
196 cpu_idle_leave();
197 cpuset_del(&sched_idle_cpus, ci);
198 }
199 }
200
201 /*
202 * To free our address space we have to jump through a few hoops.
203 * The freeing is done by the reaper, but until we have one reaper
204 * per cpu, we have no way of putting this proc on the deadproc list
205 * and waking up the reaper without risking having our address space and
206 * stack torn from under us before we manage to switch to another proc.
207 * Therefore we have a per-cpu list of dead processes where we put this
208 * proc and have idle clean up that list and move it to the reaper list.
209 * All this will be unnecessary once we can bind the reaper this cpu
210 * and not risk having it switch to another in case it sleeps.
211 */
212 void
sched_exit(struct proc * p)213 sched_exit(struct proc *p)
214 {
215 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
216
217 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
218
219 tuagg_add_runtime();
220
221 KERNEL_ASSERT_LOCKED();
222 sched_toidle();
223 }
224
225 void
sched_toidle(void)226 sched_toidle(void)
227 {
228 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
229 struct proc *idle;
230
231 #ifdef MULTIPROCESSOR
232 /* This process no longer needs to hold the kernel lock. */
233 if (_kernel_lock_held())
234 __mp_release_all(&kernel_lock);
235 #endif
236
237 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
238 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
239 clockintr_cancel(&spc->spc_itimer);
240 }
241 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
242 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
243 clockintr_cancel(&spc->spc_profclock);
244 }
245
246 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
247
248 SCHED_LOCK();
249 idle = spc->spc_idleproc;
250 idle->p_stat = SRUN;
251
252 uvmexp.swtch++;
253 if (curproc != NULL)
254 TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,
255 idle->p_p->ps_pid);
256 cpu_switchto(NULL, idle);
257 panic("cpu_switchto returned");
258 }
259
260 /*
261 * Run queue management.
262 */
263 void
sched_init_runqueues(void)264 sched_init_runqueues(void)
265 {
266 }
267
268 void
setrunqueue(struct cpu_info * ci,struct proc * p,uint8_t prio)269 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
270 {
271 struct schedstate_percpu *spc;
272 int queue = prio >> 2;
273
274 if (ci == NULL)
275 ci = sched_choosecpu(p);
276
277 KASSERT(ci != NULL);
278 SCHED_ASSERT_LOCKED();
279 KASSERT(p->p_wchan == NULL);
280 KASSERT(!ISSET(p->p_flag, P_WSLEEP));
281
282 p->p_cpu = ci;
283 p->p_stat = SRUN;
284 p->p_runpri = prio;
285
286 spc = &p->p_cpu->ci_schedstate;
287 spc->spc_nrun++;
288 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,
289 p->p_p->ps_pid);
290
291 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
292 spc->spc_whichqs |= (1U << queue);
293 cpuset_add(&sched_queued_cpus, p->p_cpu);
294
295 if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
296 cpu_unidle(p->p_cpu);
297 else if (prio < spc->spc_curpriority)
298 need_resched(ci);
299 }
300
301 void
remrunqueue(struct proc * p)302 remrunqueue(struct proc *p)
303 {
304 struct schedstate_percpu *spc;
305 int queue = p->p_runpri >> 2;
306
307 SCHED_ASSERT_LOCKED();
308 spc = &p->p_cpu->ci_schedstate;
309 spc->spc_nrun--;
310 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,
311 p->p_p->ps_pid);
312
313 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
314 if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
315 spc->spc_whichqs &= ~(1U << queue);
316 if (spc->spc_whichqs == 0)
317 cpuset_del(&sched_queued_cpus, p->p_cpu);
318 }
319 }
320
321 struct proc *
sched_chooseproc(void)322 sched_chooseproc(void)
323 {
324 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
325 struct proc *p;
326 int queue;
327
328 SCHED_ASSERT_LOCKED();
329
330 #ifdef MULTIPROCESSOR
331 if (spc->spc_schedflags & SPCF_SHOULDHALT) {
332 if (spc->spc_whichqs) {
333 for (queue = 0; queue < SCHED_NQS; queue++) {
334 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
335 remrunqueue(p);
336 setrunqueue(NULL, p, p->p_runpri);
337 if (p->p_cpu == curcpu()) {
338 KASSERT(p->p_flag & P_CPUPEG);
339 goto again;
340 }
341 }
342 }
343 }
344 p = spc->spc_idleproc;
345 if (p == NULL)
346 panic("no idleproc set on CPU%d",
347 CPU_INFO_UNIT(curcpu()));
348 p->p_stat = SRUN;
349 KASSERT(p->p_wchan == NULL);
350 return (p);
351 }
352 again:
353 #endif
354
355 if (spc->spc_whichqs) {
356 queue = ffs(spc->spc_whichqs) - 1;
357 p = TAILQ_FIRST(&spc->spc_qs[queue]);
358 remrunqueue(p);
359 sched_noidle++;
360 if (p->p_stat != SRUN)
361 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
362 } else if ((p = sched_steal_proc(curcpu())) == NULL) {
363 p = spc->spc_idleproc;
364 if (p == NULL)
365 panic("no idleproc set on CPU%d",
366 CPU_INFO_UNIT(curcpu()));
367 p->p_stat = SRUN;
368 }
369
370 KASSERT(p->p_wchan == NULL);
371 KASSERT(!ISSET(p->p_flag, P_WSLEEP));
372 return (p);
373 }
374
375 struct cpu_info *
sched_choosecpu_fork(struct proc * parent,int flags)376 sched_choosecpu_fork(struct proc *parent, int flags)
377 {
378 #ifdef MULTIPROCESSOR
379 struct cpu_info *choice = NULL;
380 int run, best_run = INT_MAX;
381 struct cpu_info *ci;
382 struct cpuset set;
383
384 #if 0
385 /*
386 * XXX
387 * Don't do this until we have a painless way to move the cpu in exec.
388 * Preferably when nuking the old pmap and getting a new one on a
389 * new cpu.
390 */
391 /*
392 * PPWAIT forks are simple. We know that the parent will not
393 * run until we exec and choose another cpu, so we just steal its
394 * cpu.
395 */
396 if (flags & FORK_PPWAIT)
397 return (parent->p_cpu);
398 #endif
399
400 /*
401 * Look at all cpus that are currently idle and have nothing queued.
402 * If there are none, pick the one with least queued procs first,
403 * then the one with lowest load average.
404 */
405 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
406 cpuset_intersection(&set, &set, &sched_all_cpus);
407 if (cpuset_first(&set) == NULL)
408 cpuset_copy(&set, &sched_all_cpus);
409
410 while ((ci = cpuset_first(&set)) != NULL) {
411 cpuset_del(&set, ci);
412
413 run = ci->ci_schedstate.spc_nrun;
414
415 if (choice == NULL || run < best_run) {
416 choice = ci;
417 best_run = run;
418 }
419 }
420
421 return (choice);
422 #else
423 return (curcpu());
424 #endif
425 }
426
427 struct cpu_info *
sched_choosecpu(struct proc * p)428 sched_choosecpu(struct proc *p)
429 {
430 #ifdef MULTIPROCESSOR
431 struct cpu_info *choice = NULL;
432 int last_cost = INT_MAX;
433 struct cpu_info *ci;
434 struct cpuset set;
435
436 /*
437 * If pegged to a cpu, don't allow it to move.
438 */
439 if (p->p_flag & P_CPUPEG)
440 return (p->p_cpu);
441
442 sched_choose++;
443
444 /*
445 * Look at all cpus that are currently idle and have nothing queued.
446 * If there are none, pick the cheapest of those.
447 * (idle + queued could mean that the cpu is handling an interrupt
448 * at this moment and haven't had time to leave idle yet).
449 */
450 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
451 cpuset_intersection(&set, &set, &sched_all_cpus);
452
453 /*
454 * First, just check if our current cpu is in that set, if it is,
455 * this is simple.
456 * Also, our cpu might not be idle, but if it's the current cpu
457 * and it has nothing else queued and we're curproc, take it.
458 */
459 if (cpuset_isset(&set, p->p_cpu) ||
460 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
461 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
462 curproc == p)) {
463 sched_wasidle++;
464 return (p->p_cpu);
465 }
466
467 if (cpuset_first(&set) == NULL)
468 cpuset_copy(&set, &sched_all_cpus);
469
470 while ((ci = cpuset_first(&set)) != NULL) {
471 int cost = sched_proc_to_cpu_cost(ci, p);
472
473 if (choice == NULL || cost < last_cost) {
474 choice = ci;
475 last_cost = cost;
476 }
477 cpuset_del(&set, ci);
478 }
479
480 if (p->p_cpu != choice)
481 sched_nmigrations++;
482 else
483 sched_nomigrations++;
484
485 return (choice);
486 #else
487 return (curcpu());
488 #endif
489 }
490
491 /*
492 * Attempt to steal a proc from some cpu.
493 */
494 struct proc *
sched_steal_proc(struct cpu_info * self)495 sched_steal_proc(struct cpu_info *self)
496 {
497 struct proc *best = NULL;
498 #ifdef MULTIPROCESSOR
499 struct schedstate_percpu *spc;
500 int bestcost = INT_MAX;
501 struct cpu_info *ci;
502 struct cpuset set;
503
504 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
505
506 /* Don't steal if we don't want to schedule processes in this CPU. */
507 if (!cpuset_isset(&sched_all_cpus, self))
508 return (NULL);
509
510 cpuset_copy(&set, &sched_queued_cpus);
511
512 while ((ci = cpuset_first(&set)) != NULL) {
513 struct proc *p;
514 int queue;
515 int cost;
516
517 cpuset_del(&set, ci);
518
519 spc = &ci->ci_schedstate;
520
521 queue = ffs(spc->spc_whichqs) - 1;
522 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
523 if (p->p_flag & P_CPUPEG)
524 continue;
525
526 cost = sched_proc_to_cpu_cost(self, p);
527
528 if (best == NULL || cost < bestcost) {
529 best = p;
530 bestcost = cost;
531 }
532 }
533 }
534 if (best == NULL)
535 return (NULL);
536
537 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET,
538 best->p_p->ps_pid, CPU_INFO_UNIT(self));
539
540 remrunqueue(best);
541 best->p_cpu = self;
542
543 sched_stolen++;
544 #endif
545 return (best);
546 }
547
548 #ifdef MULTIPROCESSOR
549 /*
550 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
551 */
552 static int
log2(unsigned int i)553 log2(unsigned int i)
554 {
555 int ret = 0;
556
557 while (i >>= 1)
558 ret++;
559
560 return (ret);
561 }
562
563 /*
564 * Calculate the cost of moving the proc to this cpu.
565 *
566 * What we want is some guesstimate of how much "performance" it will
567 * cost us to move the proc here. Not just for caches and TLBs and NUMA
568 * memory, but also for the proc itself. A highly loaded cpu might not
569 * be the best candidate for this proc since it won't get run.
570 *
571 * Just total guesstimates for now.
572 */
573
574 int sched_cost_priority = 1;
575 int sched_cost_runnable = 3;
576 int sched_cost_resident = 1;
577 #endif
578
579 int
sched_proc_to_cpu_cost(struct cpu_info * ci,struct proc * p)580 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
581 {
582 int cost = 0;
583 #ifdef MULTIPROCESSOR
584 struct schedstate_percpu *spc;
585 int l2resident = 0;
586
587 spc = &ci->ci_schedstate;
588
589 /*
590 * First, account for the priority of the proc we want to move.
591 * More willing to move, the lower the priority of the destination
592 * and the higher the priority of the proc.
593 */
594 if (!cpuset_isset(&sched_idle_cpus, ci)) {
595 cost += (p->p_usrpri - spc->spc_curpriority) *
596 sched_cost_priority;
597 cost += sched_cost_runnable;
598 }
599 if (cpuset_isset(&sched_queued_cpus, ci))
600 cost += spc->spc_nrun * sched_cost_runnable;
601
602 /*
603 * Try to avoid the primary cpu as it handles hardware interrupts.
604 *
605 * XXX Needs to be revisited when we distribute interrupts
606 * over cpus.
607 */
608 if (CPU_IS_PRIMARY(ci))
609 cost += sched_cost_runnable;
610
611 /*
612 * If the proc is on this cpu already, lower the cost by how much
613 * it has been running and an estimate of its footprint.
614 */
615 if (p->p_cpu == ci && p->p_slptime == 0) {
616 l2resident =
617 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
618 cost -= l2resident * sched_cost_resident;
619 }
620 #endif
621 return (cost);
622 }
623
624 /*
625 * Peg a proc to a cpu.
626 */
627 void
sched_peg_curproc(struct cpu_info * ci)628 sched_peg_curproc(struct cpu_info *ci)
629 {
630 struct proc *p = curproc;
631
632 SCHED_LOCK();
633 atomic_setbits_int(&p->p_flag, P_CPUPEG);
634 setrunqueue(ci, p, p->p_usrpri);
635 p->p_ru.ru_nvcsw++;
636 mi_switch();
637 SCHED_UNLOCK();
638 }
639
640 void
sched_unpeg_curproc(void)641 sched_unpeg_curproc(void)
642 {
643 struct proc *p = curproc;
644
645 atomic_clearbits_int(&p->p_flag, P_CPUPEG);
646 }
647
648 #ifdef MULTIPROCESSOR
649
650 void
sched_start_secondary_cpus(void)651 sched_start_secondary_cpus(void)
652 {
653 CPU_INFO_ITERATOR cii;
654 struct cpu_info *ci;
655
656 CPU_INFO_FOREACH(cii, ci) {
657 struct schedstate_percpu *spc = &ci->ci_schedstate;
658
659 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
660 continue;
661 atomic_clearbits_int(&spc->spc_schedflags,
662 SPCF_SHOULDHALT | SPCF_HALTED);
663 #ifdef __HAVE_CPU_TOPOLOGY
664 if (!sched_smt && ci->ci_smt_id > 0)
665 continue;
666 #endif
667 cpuset_add(&sched_all_cpus, ci);
668 }
669 }
670
671 void
sched_stop_secondary_cpus(void)672 sched_stop_secondary_cpus(void)
673 {
674 CPU_INFO_ITERATOR cii;
675 struct cpu_info *ci;
676
677 /*
678 * Make sure we stop the secondary CPUs.
679 */
680 CPU_INFO_FOREACH(cii, ci) {
681 struct schedstate_percpu *spc = &ci->ci_schedstate;
682
683 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
684 continue;
685 cpuset_del(&sched_all_cpus, ci);
686 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
687 }
688 CPU_INFO_FOREACH(cii, ci) {
689 struct schedstate_percpu *spc = &ci->ci_schedstate;
690
691 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
692 continue;
693 while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
694 sleep_setup(spc, PZERO, "schedstate");
695 sleep_finish(0,
696 (spc->spc_schedflags & SPCF_HALTED) == 0);
697 }
698 }
699 }
700
701 struct sched_barrier_state {
702 struct cpu_info *ci;
703 struct cond cond;
704 };
705
706 void
sched_barrier_task(void * arg)707 sched_barrier_task(void *arg)
708 {
709 struct sched_barrier_state *sb = arg;
710 struct cpu_info *ci = sb->ci;
711
712 sched_peg_curproc(ci);
713 cond_signal(&sb->cond);
714 sched_unpeg_curproc();
715 }
716
717 void
sched_barrier(struct cpu_info * ci)718 sched_barrier(struct cpu_info *ci)
719 {
720 struct sched_barrier_state sb;
721 struct task task;
722 CPU_INFO_ITERATOR cii;
723
724 if (ci == NULL) {
725 CPU_INFO_FOREACH(cii, ci) {
726 if (CPU_IS_PRIMARY(ci))
727 break;
728 }
729 }
730 KASSERT(ci != NULL);
731
732 if (ci == curcpu())
733 return;
734
735 sb.ci = ci;
736 cond_init(&sb.cond);
737 task_set(&task, sched_barrier_task, &sb);
738
739 task_add(systqmp, &task);
740 cond_wait(&sb.cond, "sbar");
741 }
742
743 #else
744
745 void
sched_barrier(struct cpu_info * ci)746 sched_barrier(struct cpu_info *ci)
747 {
748 }
749
750 #endif
751
752 /*
753 * Functions to manipulate cpu sets.
754 */
755 struct cpu_info *cpuset_infos[MAXCPUS];
756 static struct cpuset cpuset_all;
757
758 void
cpuset_init_cpu(struct cpu_info * ci)759 cpuset_init_cpu(struct cpu_info *ci)
760 {
761 cpuset_add(&cpuset_all, ci);
762 cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
763 }
764
765 void
cpuset_clear(struct cpuset * cs)766 cpuset_clear(struct cpuset *cs)
767 {
768 memset(cs, 0, sizeof(*cs));
769 }
770
771 void
cpuset_add(struct cpuset * cs,struct cpu_info * ci)772 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
773 {
774 unsigned int num = CPU_INFO_UNIT(ci);
775 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
776 }
777
778 void
cpuset_del(struct cpuset * cs,struct cpu_info * ci)779 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
780 {
781 unsigned int num = CPU_INFO_UNIT(ci);
782 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
783 }
784
785 int
cpuset_isset(struct cpuset * cs,struct cpu_info * ci)786 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
787 {
788 unsigned int num = CPU_INFO_UNIT(ci);
789 return (cs->cs_set[num/32] & (1U << (num % 32)));
790 }
791
792 void
cpuset_add_all(struct cpuset * cs)793 cpuset_add_all(struct cpuset *cs)
794 {
795 cpuset_copy(cs, &cpuset_all);
796 }
797
798 void
cpuset_copy(struct cpuset * to,struct cpuset * from)799 cpuset_copy(struct cpuset *to, struct cpuset *from)
800 {
801 memcpy(to, from, sizeof(*to));
802 }
803
804 struct cpu_info *
cpuset_first(struct cpuset * cs)805 cpuset_first(struct cpuset *cs)
806 {
807 int i;
808
809 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
810 if (cs->cs_set[i])
811 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
812
813 return (NULL);
814 }
815
816 void
cpuset_union(struct cpuset * to,struct cpuset * a,struct cpuset * b)817 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
818 {
819 int i;
820
821 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
822 to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
823 }
824
825 void
cpuset_intersection(struct cpuset * to,struct cpuset * a,struct cpuset * b)826 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
827 {
828 int i;
829
830 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
831 to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
832 }
833
834 void
cpuset_complement(struct cpuset * to,struct cpuset * a,struct cpuset * b)835 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
836 {
837 int i;
838
839 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
840 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
841 }
842
843 int
cpuset_cardinality(struct cpuset * cs)844 cpuset_cardinality(struct cpuset *cs)
845 {
846 int cardinality, i, n;
847
848 cardinality = 0;
849
850 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
851 for (n = cs->cs_set[i]; n != 0; n &= n - 1)
852 cardinality++;
853
854 return (cardinality);
855 }
856
857 int
sysctl_hwncpuonline(void)858 sysctl_hwncpuonline(void)
859 {
860 return cpuset_cardinality(&sched_all_cpus);
861 }
862
863 int
cpu_is_online(struct cpu_info * ci)864 cpu_is_online(struct cpu_info *ci)
865 {
866 return cpuset_isset(&sched_all_cpus, ci);
867 }
868
869 #ifdef __HAVE_CPU_TOPOLOGY
870
871 #include <sys/sysctl.h>
872
873 int
sysctl_hwsmt(void * oldp,size_t * oldlenp,void * newp,size_t newlen)874 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
875 {
876 CPU_INFO_ITERATOR cii;
877 struct cpu_info *ci;
878 int err, newsmt;
879
880 newsmt = sched_smt;
881 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
882 if (err)
883 return err;
884 if (newsmt == sched_smt)
885 return 0;
886
887 sched_smt = newsmt;
888 CPU_INFO_FOREACH(cii, ci) {
889 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
890 continue;
891 if (ci->ci_smt_id == 0)
892 continue;
893 if (sched_smt)
894 cpuset_add(&sched_all_cpus, ci);
895 else
896 cpuset_del(&sched_all_cpus, ci);
897 }
898
899 return 0;
900 }
901
902 #endif
903