1 /* $OpenBSD: kern_sched.c,v 1.96 2024/06/03 12:48:25 claudio Exp $ */
2 /*
3 * Copyright (c) 2007, 2008 Artur Grabowski <art@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 */
17
18 #include <sys/param.h>
19
20 #include <sys/sched.h>
21 #include <sys/proc.h>
22 #include <sys/kthread.h>
23 #include <sys/systm.h>
24 #include <sys/clockintr.h>
25 #include <sys/resourcevar.h>
26 #include <sys/task.h>
27 #include <sys/time.h>
28 #include <sys/smr.h>
29 #include <sys/tracepoint.h>
30
31 #include <uvm/uvm_extern.h>
32
33 void sched_kthreads_create(void *);
34
35 int sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p);
36 struct proc *sched_steal_proc(struct cpu_info *);
37
38 /*
39 * To help choosing which cpu should run which process we keep track
40 * of cpus which are currently idle and which cpus have processes
41 * queued.
42 */
43 struct cpuset sched_idle_cpus;
44 struct cpuset sched_queued_cpus;
45 struct cpuset sched_all_cpus;
46
47 /*
48 * Some general scheduler counters.
49 */
50 uint64_t sched_nmigrations; /* Cpu migration counter */
51 uint64_t sched_nomigrations; /* Cpu no migration counter */
52 uint64_t sched_noidle; /* Times we didn't pick the idle task */
53 uint64_t sched_stolen; /* Times we stole proc from other cpus */
54 uint64_t sched_choose; /* Times we chose a cpu */
55 uint64_t sched_wasidle; /* Times we came out of idle */
56
57 int sched_smt;
58
59 /*
60 * A few notes about cpu_switchto that is implemented in MD code.
61 *
62 * cpu_switchto takes two arguments, the old proc and the proc
63 * it should switch to. The new proc will never be NULL, so we always have
64 * a saved state that we need to switch to. The old proc however can
65 * be NULL if the process is exiting. NULL for the old proc simply
66 * means "don't bother saving old state".
67 *
68 * cpu_switchto is supposed to atomically load the new state of the process
69 * including the pcb, pmap and setting curproc, the p_cpu pointer in the
70 * proc and p_stat to SONPROC. Atomically with respect to interrupts, other
71 * cpus in the system must not depend on this state being consistent.
72 * Therefore no locking is necessary in cpu_switchto other than blocking
73 * interrupts during the context switch.
74 */
75
76 /*
77 * sched_init_cpu is called from main() for the boot cpu, then it's the
78 * responsibility of the MD code to call it for all other cpus.
79 */
80 void
sched_init_cpu(struct cpu_info * ci)81 sched_init_cpu(struct cpu_info *ci)
82 {
83 struct schedstate_percpu *spc = &ci->ci_schedstate;
84 int i;
85
86 for (i = 0; i < SCHED_NQS; i++)
87 TAILQ_INIT(&spc->spc_qs[i]);
88
89 spc->spc_idleproc = NULL;
90
91 clockintr_bind(&spc->spc_itimer, ci, itimer_update, NULL);
92 clockintr_bind(&spc->spc_profclock, ci, profclock, NULL);
93 clockintr_bind(&spc->spc_roundrobin, ci, roundrobin, NULL);
94 clockintr_bind(&spc->spc_statclock, ci, statclock, NULL);
95
96 kthread_create_deferred(sched_kthreads_create, ci);
97
98 LIST_INIT(&spc->spc_deadproc);
99 SIMPLEQ_INIT(&spc->spc_deferred);
100
101 /*
102 * Slight hack here until the cpuset code handles cpu_info
103 * structures.
104 */
105 cpuset_init_cpu(ci);
106
107 #ifdef __HAVE_CPU_TOPOLOGY
108 if (!sched_smt && ci->ci_smt_id > 0)
109 return;
110 #endif
111 cpuset_add(&sched_all_cpus, ci);
112 }
113
114 void
sched_kthreads_create(void * v)115 sched_kthreads_create(void *v)
116 {
117 struct cpu_info *ci = v;
118 struct schedstate_percpu *spc = &ci->ci_schedstate;
119 static int num;
120
121 if (fork1(&proc0, FORK_SHAREVM|FORK_SHAREFILES|FORK_NOZOMBIE|
122 FORK_SYSTEM|FORK_IDLE, sched_idle, ci, NULL,
123 &spc->spc_idleproc))
124 panic("fork idle");
125
126 /* Name it as specified. */
127 snprintf(spc->spc_idleproc->p_p->ps_comm,
128 sizeof(spc->spc_idleproc->p_p->ps_comm),
129 "idle%d", num);
130
131 num++;
132 }
133
134 void
sched_idle(void * v)135 sched_idle(void *v)
136 {
137 struct schedstate_percpu *spc;
138 struct proc *p = curproc;
139 struct cpu_info *ci = v;
140
141 KERNEL_UNLOCK();
142
143 spc = &ci->ci_schedstate;
144
145 /*
146 * First time we enter here, we're not supposed to idle,
147 * just go away for a while.
148 */
149 SCHED_LOCK();
150 cpuset_add(&sched_idle_cpus, ci);
151 p->p_stat = SSLEEP;
152 p->p_cpu = ci;
153 atomic_setbits_int(&p->p_flag, P_CPUPEG);
154 mi_switch();
155 cpuset_del(&sched_idle_cpus, ci);
156 SCHED_UNLOCK();
157
158 KASSERT(ci == curcpu());
159 KASSERT(curproc == spc->spc_idleproc);
160
161 while (1) {
162 while (!cpu_is_idle(curcpu())) {
163 struct proc *dead;
164
165 SCHED_LOCK();
166 p->p_stat = SSLEEP;
167 mi_switch();
168 SCHED_UNLOCK();
169
170 while ((dead = LIST_FIRST(&spc->spc_deadproc))) {
171 LIST_REMOVE(dead, p_hash);
172 exit2(dead);
173 }
174 }
175
176 splassert(IPL_NONE);
177
178 smr_idle();
179
180 cpuset_add(&sched_idle_cpus, ci);
181 cpu_idle_enter();
182 while (spc->spc_whichqs == 0) {
183 #ifdef MULTIPROCESSOR
184 if (spc->spc_schedflags & SPCF_SHOULDHALT &&
185 (spc->spc_schedflags & SPCF_HALTED) == 0) {
186 cpuset_del(&sched_idle_cpus, ci);
187 SCHED_LOCK();
188 atomic_setbits_int(&spc->spc_schedflags,
189 spc->spc_whichqs ? 0 : SPCF_HALTED);
190 SCHED_UNLOCK();
191 wakeup(spc);
192 }
193 #endif
194 cpu_idle_cycle();
195 }
196 cpu_idle_leave();
197 cpuset_del(&sched_idle_cpus, ci);
198 }
199 }
200
201 /*
202 * To free our address space we have to jump through a few hoops.
203 * The freeing is done by the reaper, but until we have one reaper
204 * per cpu, we have no way of putting this proc on the deadproc list
205 * and waking up the reaper without risking having our address space and
206 * stack torn from under us before we manage to switch to another proc.
207 * Therefore we have a per-cpu list of dead processes where we put this
208 * proc and have idle clean up that list and move it to the reaper list.
209 * All this will be unnecessary once we can bind the reaper this cpu
210 * and not risk having it switch to another in case it sleeps.
211 */
212 void
sched_exit(struct proc * p)213 sched_exit(struct proc *p)
214 {
215 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
216
217 LIST_INSERT_HEAD(&spc->spc_deadproc, p, p_hash);
218
219 KERNEL_ASSERT_LOCKED();
220 sched_toidle();
221 }
222
223 void
sched_toidle(void)224 sched_toidle(void)
225 {
226 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
227 struct proc *idle;
228
229 #ifdef MULTIPROCESSOR
230 /* This process no longer needs to hold the kernel lock. */
231 if (_kernel_lock_held())
232 __mp_release_all(&kernel_lock);
233 #endif
234
235 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
236 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
237 clockintr_cancel(&spc->spc_itimer);
238 }
239 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
240 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
241 clockintr_cancel(&spc->spc_profclock);
242 }
243
244 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
245
246 SCHED_LOCK();
247 idle = spc->spc_idleproc;
248 idle->p_stat = SRUN;
249
250 uvmexp.swtch++;
251 TRACEPOINT(sched, off__cpu, idle->p_tid + THREAD_PID_OFFSET,
252 idle->p_p->ps_pid);
253 cpu_switchto(NULL, idle);
254 panic("cpu_switchto returned");
255 }
256
257 /*
258 * Run queue management.
259 */
260 void
sched_init_runqueues(void)261 sched_init_runqueues(void)
262 {
263 }
264
265 void
setrunqueue(struct cpu_info * ci,struct proc * p,uint8_t prio)266 setrunqueue(struct cpu_info *ci, struct proc *p, uint8_t prio)
267 {
268 struct schedstate_percpu *spc;
269 int queue = prio >> 2;
270
271 if (ci == NULL)
272 ci = sched_choosecpu(p);
273
274 KASSERT(ci != NULL);
275 SCHED_ASSERT_LOCKED();
276 KASSERT(p->p_wchan == NULL);
277
278 p->p_cpu = ci;
279 p->p_stat = SRUN;
280 p->p_runpri = prio;
281
282 spc = &p->p_cpu->ci_schedstate;
283 spc->spc_nrun++;
284 TRACEPOINT(sched, enqueue, p->p_tid + THREAD_PID_OFFSET,
285 p->p_p->ps_pid);
286
287 TAILQ_INSERT_TAIL(&spc->spc_qs[queue], p, p_runq);
288 spc->spc_whichqs |= (1U << queue);
289 cpuset_add(&sched_queued_cpus, p->p_cpu);
290
291 if (cpuset_isset(&sched_idle_cpus, p->p_cpu))
292 cpu_unidle(p->p_cpu);
293 else if (prio < spc->spc_curpriority)
294 need_resched(ci);
295 }
296
297 void
remrunqueue(struct proc * p)298 remrunqueue(struct proc *p)
299 {
300 struct schedstate_percpu *spc;
301 int queue = p->p_runpri >> 2;
302
303 SCHED_ASSERT_LOCKED();
304 spc = &p->p_cpu->ci_schedstate;
305 spc->spc_nrun--;
306 TRACEPOINT(sched, dequeue, p->p_tid + THREAD_PID_OFFSET,
307 p->p_p->ps_pid);
308
309 TAILQ_REMOVE(&spc->spc_qs[queue], p, p_runq);
310 if (TAILQ_EMPTY(&spc->spc_qs[queue])) {
311 spc->spc_whichqs &= ~(1U << queue);
312 if (spc->spc_whichqs == 0)
313 cpuset_del(&sched_queued_cpus, p->p_cpu);
314 }
315 }
316
317 struct proc *
sched_chooseproc(void)318 sched_chooseproc(void)
319 {
320 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
321 struct proc *p;
322 int queue;
323
324 SCHED_ASSERT_LOCKED();
325
326 #ifdef MULTIPROCESSOR
327 if (spc->spc_schedflags & SPCF_SHOULDHALT) {
328 if (spc->spc_whichqs) {
329 for (queue = 0; queue < SCHED_NQS; queue++) {
330 while ((p = TAILQ_FIRST(&spc->spc_qs[queue]))) {
331 remrunqueue(p);
332 setrunqueue(NULL, p, p->p_runpri);
333 if (p->p_cpu == curcpu()) {
334 KASSERT(p->p_flag & P_CPUPEG);
335 goto again;
336 }
337 }
338 }
339 }
340 p = spc->spc_idleproc;
341 if (p == NULL)
342 panic("no idleproc set on CPU%d",
343 CPU_INFO_UNIT(curcpu()));
344 p->p_stat = SRUN;
345 KASSERT(p->p_wchan == NULL);
346 return (p);
347 }
348 again:
349 #endif
350
351 if (spc->spc_whichqs) {
352 queue = ffs(spc->spc_whichqs) - 1;
353 p = TAILQ_FIRST(&spc->spc_qs[queue]);
354 remrunqueue(p);
355 sched_noidle++;
356 if (p->p_stat != SRUN)
357 panic("thread %d not in SRUN: %d", p->p_tid, p->p_stat);
358 } else if ((p = sched_steal_proc(curcpu())) == NULL) {
359 p = spc->spc_idleproc;
360 if (p == NULL)
361 panic("no idleproc set on CPU%d",
362 CPU_INFO_UNIT(curcpu()));
363 p->p_stat = SRUN;
364 }
365
366 KASSERT(p->p_wchan == NULL);
367 return (p);
368 }
369
370 struct cpu_info *
sched_choosecpu_fork(struct proc * parent,int flags)371 sched_choosecpu_fork(struct proc *parent, int flags)
372 {
373 #ifdef MULTIPROCESSOR
374 struct cpu_info *choice = NULL;
375 int run, best_run = INT_MAX;
376 struct cpu_info *ci;
377 struct cpuset set;
378
379 #if 0
380 /*
381 * XXX
382 * Don't do this until we have a painless way to move the cpu in exec.
383 * Preferably when nuking the old pmap and getting a new one on a
384 * new cpu.
385 */
386 /*
387 * PPWAIT forks are simple. We know that the parent will not
388 * run until we exec and choose another cpu, so we just steal its
389 * cpu.
390 */
391 if (flags & FORK_PPWAIT)
392 return (parent->p_cpu);
393 #endif
394
395 /*
396 * Look at all cpus that are currently idle and have nothing queued.
397 * If there are none, pick the one with least queued procs first,
398 * then the one with lowest load average.
399 */
400 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
401 cpuset_intersection(&set, &set, &sched_all_cpus);
402 if (cpuset_first(&set) == NULL)
403 cpuset_copy(&set, &sched_all_cpus);
404
405 while ((ci = cpuset_first(&set)) != NULL) {
406 cpuset_del(&set, ci);
407
408 run = ci->ci_schedstate.spc_nrun;
409
410 if (choice == NULL || run < best_run) {
411 choice = ci;
412 best_run = run;
413 }
414 }
415
416 return (choice);
417 #else
418 return (curcpu());
419 #endif
420 }
421
422 struct cpu_info *
sched_choosecpu(struct proc * p)423 sched_choosecpu(struct proc *p)
424 {
425 #ifdef MULTIPROCESSOR
426 struct cpu_info *choice = NULL;
427 int last_cost = INT_MAX;
428 struct cpu_info *ci;
429 struct cpuset set;
430
431 /*
432 * If pegged to a cpu, don't allow it to move.
433 */
434 if (p->p_flag & P_CPUPEG)
435 return (p->p_cpu);
436
437 sched_choose++;
438
439 /*
440 * Look at all cpus that are currently idle and have nothing queued.
441 * If there are none, pick the cheapest of those.
442 * (idle + queued could mean that the cpu is handling an interrupt
443 * at this moment and haven't had time to leave idle yet).
444 */
445 cpuset_complement(&set, &sched_queued_cpus, &sched_idle_cpus);
446 cpuset_intersection(&set, &set, &sched_all_cpus);
447
448 /*
449 * First, just check if our current cpu is in that set, if it is,
450 * this is simple.
451 * Also, our cpu might not be idle, but if it's the current cpu
452 * and it has nothing else queued and we're curproc, take it.
453 */
454 if (cpuset_isset(&set, p->p_cpu) ||
455 (p->p_cpu == curcpu() && p->p_cpu->ci_schedstate.spc_nrun == 0 &&
456 (p->p_cpu->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0 &&
457 curproc == p)) {
458 sched_wasidle++;
459 return (p->p_cpu);
460 }
461
462 if (cpuset_first(&set) == NULL)
463 cpuset_copy(&set, &sched_all_cpus);
464
465 while ((ci = cpuset_first(&set)) != NULL) {
466 int cost = sched_proc_to_cpu_cost(ci, p);
467
468 if (choice == NULL || cost < last_cost) {
469 choice = ci;
470 last_cost = cost;
471 }
472 cpuset_del(&set, ci);
473 }
474
475 if (p->p_cpu != choice)
476 sched_nmigrations++;
477 else
478 sched_nomigrations++;
479
480 return (choice);
481 #else
482 return (curcpu());
483 #endif
484 }
485
486 /*
487 * Attempt to steal a proc from some cpu.
488 */
489 struct proc *
sched_steal_proc(struct cpu_info * self)490 sched_steal_proc(struct cpu_info *self)
491 {
492 struct proc *best = NULL;
493 #ifdef MULTIPROCESSOR
494 struct schedstate_percpu *spc;
495 int bestcost = INT_MAX;
496 struct cpu_info *ci;
497 struct cpuset set;
498
499 KASSERT((self->ci_schedstate.spc_schedflags & SPCF_SHOULDHALT) == 0);
500
501 /* Don't steal if we don't want to schedule processes in this CPU. */
502 if (!cpuset_isset(&sched_all_cpus, self))
503 return (NULL);
504
505 cpuset_copy(&set, &sched_queued_cpus);
506
507 while ((ci = cpuset_first(&set)) != NULL) {
508 struct proc *p;
509 int queue;
510 int cost;
511
512 cpuset_del(&set, ci);
513
514 spc = &ci->ci_schedstate;
515
516 queue = ffs(spc->spc_whichqs) - 1;
517 TAILQ_FOREACH(p, &spc->spc_qs[queue], p_runq) {
518 if (p->p_flag & P_CPUPEG)
519 continue;
520
521 cost = sched_proc_to_cpu_cost(self, p);
522
523 if (best == NULL || cost < bestcost) {
524 best = p;
525 bestcost = cost;
526 }
527 }
528 }
529 if (best == NULL)
530 return (NULL);
531
532 TRACEPOINT(sched, steal, best->p_tid + THREAD_PID_OFFSET,
533 best->p_p->ps_pid, CPU_INFO_UNIT(self));
534
535 remrunqueue(best);
536 best->p_cpu = self;
537
538 sched_stolen++;
539 #endif
540 return (best);
541 }
542
543 #ifdef MULTIPROCESSOR
544 /*
545 * Base 2 logarithm of an int. returns 0 for 0 (yeye, I know).
546 */
547 static int
log2(unsigned int i)548 log2(unsigned int i)
549 {
550 int ret = 0;
551
552 while (i >>= 1)
553 ret++;
554
555 return (ret);
556 }
557
558 /*
559 * Calculate the cost of moving the proc to this cpu.
560 *
561 * What we want is some guesstimate of how much "performance" it will
562 * cost us to move the proc here. Not just for caches and TLBs and NUMA
563 * memory, but also for the proc itself. A highly loaded cpu might not
564 * be the best candidate for this proc since it won't get run.
565 *
566 * Just total guesstimates for now.
567 */
568
569 int sched_cost_load = 1;
570 int sched_cost_priority = 1;
571 int sched_cost_runnable = 3;
572 int sched_cost_resident = 1;
573 #endif
574
575 int
sched_proc_to_cpu_cost(struct cpu_info * ci,struct proc * p)576 sched_proc_to_cpu_cost(struct cpu_info *ci, struct proc *p)
577 {
578 int cost = 0;
579 #ifdef MULTIPROCESSOR
580 struct schedstate_percpu *spc;
581 int l2resident = 0;
582
583 spc = &ci->ci_schedstate;
584
585 /*
586 * First, account for the priority of the proc we want to move.
587 * More willing to move, the lower the priority of the destination
588 * and the higher the priority of the proc.
589 */
590 if (!cpuset_isset(&sched_idle_cpus, ci)) {
591 cost += (p->p_usrpri - spc->spc_curpriority) *
592 sched_cost_priority;
593 cost += sched_cost_runnable;
594 }
595 if (cpuset_isset(&sched_queued_cpus, ci))
596 cost += spc->spc_nrun * sched_cost_runnable;
597
598 /*
599 * Try to avoid the primary cpu as it handles hardware interrupts.
600 *
601 * XXX Needs to be revisited when we distribute interrupts
602 * over cpus.
603 */
604 if (CPU_IS_PRIMARY(ci))
605 cost += sched_cost_runnable;
606
607 /*
608 * If the proc is on this cpu already, lower the cost by how much
609 * it has been running and an estimate of its footprint.
610 */
611 if (p->p_cpu == ci && p->p_slptime == 0) {
612 l2resident =
613 log2(pmap_resident_count(p->p_vmspace->vm_map.pmap));
614 cost -= l2resident * sched_cost_resident;
615 }
616 #endif
617 return (cost);
618 }
619
620 /*
621 * Peg a proc to a cpu.
622 */
623 void
sched_peg_curproc(struct cpu_info * ci)624 sched_peg_curproc(struct cpu_info *ci)
625 {
626 struct proc *p = curproc;
627
628 SCHED_LOCK();
629 atomic_setbits_int(&p->p_flag, P_CPUPEG);
630 setrunqueue(ci, p, p->p_usrpri);
631 p->p_ru.ru_nvcsw++;
632 mi_switch();
633 SCHED_UNLOCK();
634 }
635
636 #ifdef MULTIPROCESSOR
637
638 void
sched_start_secondary_cpus(void)639 sched_start_secondary_cpus(void)
640 {
641 CPU_INFO_ITERATOR cii;
642 struct cpu_info *ci;
643
644 CPU_INFO_FOREACH(cii, ci) {
645 struct schedstate_percpu *spc = &ci->ci_schedstate;
646
647 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
648 continue;
649 atomic_clearbits_int(&spc->spc_schedflags,
650 SPCF_SHOULDHALT | SPCF_HALTED);
651 #ifdef __HAVE_CPU_TOPOLOGY
652 if (!sched_smt && ci->ci_smt_id > 0)
653 continue;
654 #endif
655 cpuset_add(&sched_all_cpus, ci);
656 }
657 }
658
659 void
sched_stop_secondary_cpus(void)660 sched_stop_secondary_cpus(void)
661 {
662 CPU_INFO_ITERATOR cii;
663 struct cpu_info *ci;
664
665 /*
666 * Make sure we stop the secondary CPUs.
667 */
668 CPU_INFO_FOREACH(cii, ci) {
669 struct schedstate_percpu *spc = &ci->ci_schedstate;
670
671 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
672 continue;
673 cpuset_del(&sched_all_cpus, ci);
674 atomic_setbits_int(&spc->spc_schedflags, SPCF_SHOULDHALT);
675 }
676 CPU_INFO_FOREACH(cii, ci) {
677 struct schedstate_percpu *spc = &ci->ci_schedstate;
678
679 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
680 continue;
681 while ((spc->spc_schedflags & SPCF_HALTED) == 0) {
682 sleep_setup(spc, PZERO, "schedstate");
683 sleep_finish(0,
684 (spc->spc_schedflags & SPCF_HALTED) == 0);
685 }
686 }
687 }
688
689 struct sched_barrier_state {
690 struct cpu_info *ci;
691 struct cond cond;
692 };
693
694 void
sched_barrier_task(void * arg)695 sched_barrier_task(void *arg)
696 {
697 struct sched_barrier_state *sb = arg;
698 struct cpu_info *ci = sb->ci;
699
700 sched_peg_curproc(ci);
701 cond_signal(&sb->cond);
702 atomic_clearbits_int(&curproc->p_flag, P_CPUPEG);
703 }
704
705 void
sched_barrier(struct cpu_info * ci)706 sched_barrier(struct cpu_info *ci)
707 {
708 struct sched_barrier_state sb;
709 struct task task;
710 CPU_INFO_ITERATOR cii;
711
712 if (ci == NULL) {
713 CPU_INFO_FOREACH(cii, ci) {
714 if (CPU_IS_PRIMARY(ci))
715 break;
716 }
717 }
718 KASSERT(ci != NULL);
719
720 if (ci == curcpu())
721 return;
722
723 sb.ci = ci;
724 cond_init(&sb.cond);
725 task_set(&task, sched_barrier_task, &sb);
726
727 task_add(systqmp, &task);
728 cond_wait(&sb.cond, "sbar");
729 }
730
731 #else
732
733 void
sched_barrier(struct cpu_info * ci)734 sched_barrier(struct cpu_info *ci)
735 {
736 }
737
738 #endif
739
740 /*
741 * Functions to manipulate cpu sets.
742 */
743 struct cpu_info *cpuset_infos[MAXCPUS];
744 static struct cpuset cpuset_all;
745
746 void
cpuset_init_cpu(struct cpu_info * ci)747 cpuset_init_cpu(struct cpu_info *ci)
748 {
749 cpuset_add(&cpuset_all, ci);
750 cpuset_infos[CPU_INFO_UNIT(ci)] = ci;
751 }
752
753 void
cpuset_clear(struct cpuset * cs)754 cpuset_clear(struct cpuset *cs)
755 {
756 memset(cs, 0, sizeof(*cs));
757 }
758
759 void
cpuset_add(struct cpuset * cs,struct cpu_info * ci)760 cpuset_add(struct cpuset *cs, struct cpu_info *ci)
761 {
762 unsigned int num = CPU_INFO_UNIT(ci);
763 atomic_setbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
764 }
765
766 void
cpuset_del(struct cpuset * cs,struct cpu_info * ci)767 cpuset_del(struct cpuset *cs, struct cpu_info *ci)
768 {
769 unsigned int num = CPU_INFO_UNIT(ci);
770 atomic_clearbits_int(&cs->cs_set[num/32], (1U << (num % 32)));
771 }
772
773 int
cpuset_isset(struct cpuset * cs,struct cpu_info * ci)774 cpuset_isset(struct cpuset *cs, struct cpu_info *ci)
775 {
776 unsigned int num = CPU_INFO_UNIT(ci);
777 return (cs->cs_set[num/32] & (1U << (num % 32)));
778 }
779
780 void
cpuset_add_all(struct cpuset * cs)781 cpuset_add_all(struct cpuset *cs)
782 {
783 cpuset_copy(cs, &cpuset_all);
784 }
785
786 void
cpuset_copy(struct cpuset * to,struct cpuset * from)787 cpuset_copy(struct cpuset *to, struct cpuset *from)
788 {
789 memcpy(to, from, sizeof(*to));
790 }
791
792 struct cpu_info *
cpuset_first(struct cpuset * cs)793 cpuset_first(struct cpuset *cs)
794 {
795 int i;
796
797 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
798 if (cs->cs_set[i])
799 return (cpuset_infos[i * 32 + ffs(cs->cs_set[i]) - 1]);
800
801 return (NULL);
802 }
803
804 void
cpuset_union(struct cpuset * to,struct cpuset * a,struct cpuset * b)805 cpuset_union(struct cpuset *to, struct cpuset *a, struct cpuset *b)
806 {
807 int i;
808
809 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
810 to->cs_set[i] = a->cs_set[i] | b->cs_set[i];
811 }
812
813 void
cpuset_intersection(struct cpuset * to,struct cpuset * a,struct cpuset * b)814 cpuset_intersection(struct cpuset *to, struct cpuset *a, struct cpuset *b)
815 {
816 int i;
817
818 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
819 to->cs_set[i] = a->cs_set[i] & b->cs_set[i];
820 }
821
822 void
cpuset_complement(struct cpuset * to,struct cpuset * a,struct cpuset * b)823 cpuset_complement(struct cpuset *to, struct cpuset *a, struct cpuset *b)
824 {
825 int i;
826
827 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
828 to->cs_set[i] = b->cs_set[i] & ~a->cs_set[i];
829 }
830
831 int
cpuset_cardinality(struct cpuset * cs)832 cpuset_cardinality(struct cpuset *cs)
833 {
834 int cardinality, i, n;
835
836 cardinality = 0;
837
838 for (i = 0; i < CPUSET_ASIZE(ncpus); i++)
839 for (n = cs->cs_set[i]; n != 0; n &= n - 1)
840 cardinality++;
841
842 return (cardinality);
843 }
844
845 int
sysctl_hwncpuonline(void)846 sysctl_hwncpuonline(void)
847 {
848 return cpuset_cardinality(&sched_all_cpus);
849 }
850
851 int
cpu_is_online(struct cpu_info * ci)852 cpu_is_online(struct cpu_info *ci)
853 {
854 return cpuset_isset(&sched_all_cpus, ci);
855 }
856
857 #ifdef __HAVE_CPU_TOPOLOGY
858
859 #include <sys/sysctl.h>
860
861 int
sysctl_hwsmt(void * oldp,size_t * oldlenp,void * newp,size_t newlen)862 sysctl_hwsmt(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
863 {
864 CPU_INFO_ITERATOR cii;
865 struct cpu_info *ci;
866 int err, newsmt;
867
868 newsmt = sched_smt;
869 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen, &newsmt, 0, 1);
870 if (err)
871 return err;
872 if (newsmt == sched_smt)
873 return 0;
874
875 sched_smt = newsmt;
876 CPU_INFO_FOREACH(cii, ci) {
877 if (CPU_IS_PRIMARY(ci) || !CPU_IS_RUNNING(ci))
878 continue;
879 if (ci->ci_smt_id == 0)
880 continue;
881 if (sched_smt)
882 cpuset_add(&sched_all_cpus, ci);
883 else
884 cpuset_del(&sched_all_cpus, ci);
885 }
886
887 return 0;
888 }
889
890 #endif
891