1 /* $OpenBSD: sched_bsd.c,v 1.93 2024/06/03 12:48:25 claudio Exp $ */
2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1990, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/clockintr.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/resourcevar.h>
47 #include <uvm/uvm_extern.h>
48 #include <sys/sched.h>
49 #include <sys/timeout.h>
50 #include <sys/smr.h>
51 #include <sys/tracepoint.h>
52
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56
57 uint64_t roundrobin_period; /* [I] roundrobin period (ns) */
58 int lbolt; /* once a second sleep address */
59
60 struct mutex sched_lock;
61
62 void update_loadavg(void *);
63 void schedcpu(void *);
64 uint32_t decay_aftersleep(uint32_t, uint32_t);
65
66 extern struct cpuset sched_idle_cpus;
67
68 /*
69 * constants for averages over 1, 5, and 15 minutes when sampling at
70 * 5 second intervals.
71 */
72 static const fixpt_t cexp[3] = {
73 0.9200444146293232 * FSCALE, /* exp(-1/12) */
74 0.9834714538216174 * FSCALE, /* exp(-1/60) */
75 0.9944598480048967 * FSCALE, /* exp(-1/180) */
76 };
77
78 struct loadavg averunnable;
79
80 /*
81 * Force switch among equal priority processes every 100ms.
82 */
83 void
roundrobin(struct clockrequest * cr,void * cf,void * arg)84 roundrobin(struct clockrequest *cr, void *cf, void *arg)
85 {
86 uint64_t count;
87 struct cpu_info *ci = curcpu();
88 struct schedstate_percpu *spc = &ci->ci_schedstate;
89
90 count = clockrequest_advance(cr, roundrobin_period);
91
92 if (ci->ci_curproc != NULL) {
93 if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
94 /*
95 * The process has already been through a roundrobin
96 * without switching and may be hogging the CPU.
97 * Indicate that the process should yield.
98 */
99 atomic_setbits_int(&spc->spc_schedflags,
100 SPCF_SEENRR | SPCF_SHOULDYIELD);
101 } else {
102 atomic_setbits_int(&spc->spc_schedflags,
103 SPCF_SEENRR);
104 }
105 }
106
107 if (spc->spc_nrun || spc->spc_schedflags & SPCF_SHOULDYIELD)
108 need_resched(ci);
109 }
110
111
112
113 /*
114 * update_loadav: compute a tenex style load average of a quantity on
115 * 1, 5, and 15 minute intervals.
116 */
117 void
update_loadavg(void * unused)118 update_loadavg(void *unused)
119 {
120 static struct timeout to = TIMEOUT_INITIALIZER(update_loadavg, NULL);
121 CPU_INFO_ITERATOR cii;
122 struct cpu_info *ci;
123 u_int i, nrun = 0;
124
125 CPU_INFO_FOREACH(cii, ci) {
126 if (!cpuset_isset(&sched_idle_cpus, ci))
127 nrun++;
128 nrun += ci->ci_schedstate.spc_nrun;
129 }
130
131 for (i = 0; i < 3; i++) {
132 averunnable.ldavg[i] = (cexp[i] * averunnable.ldavg[i] +
133 nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
134 }
135
136 timeout_add_sec(&to, 5);
137 }
138
139 /*
140 * Constants for digital decay and forget:
141 * 90% of (p_estcpu) usage in 5 * loadav time
142 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
143 * Note that, as ps(1) mentions, this can let percentages
144 * total over 100% (I've seen 137.9% for 3 processes).
145 *
146 * Note that hardclock updates p_estcpu and p_cpticks independently.
147 *
148 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
149 * That is, the system wants to compute a value of decay such
150 * that the following for loop:
151 * for (i = 0; i < (5 * loadavg); i++)
152 * p_estcpu *= decay;
153 * will compute
154 * p_estcpu *= 0.1;
155 * for all values of loadavg:
156 *
157 * Mathematically this loop can be expressed by saying:
158 * decay ** (5 * loadavg) ~= .1
159 *
160 * The system computes decay as:
161 * decay = (2 * loadavg) / (2 * loadavg + 1)
162 *
163 * We wish to prove that the system's computation of decay
164 * will always fulfill the equation:
165 * decay ** (5 * loadavg) ~= .1
166 *
167 * If we compute b as:
168 * b = 2 * loadavg
169 * then
170 * decay = b / (b + 1)
171 *
172 * We now need to prove two things:
173 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
174 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
175 *
176 * Facts:
177 * For x close to zero, exp(x) =~ 1 + x, since
178 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
179 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
180 * For x close to zero, ln(1+x) =~ x, since
181 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
182 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
183 * ln(.1) =~ -2.30
184 *
185 * Proof of (1):
186 * Solve (factor)**(power) =~ .1 given power (5*loadav):
187 * solving for factor,
188 * ln(factor) =~ (-2.30/5*loadav), or
189 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
190 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
191 *
192 * Proof of (2):
193 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
194 * solving for power,
195 * power*ln(b/(b+1)) =~ -2.30, or
196 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
197 *
198 * Actual power values for the implemented algorithm are as follows:
199 * loadav: 1 2 3 4
200 * power: 5.68 10.32 14.94 19.55
201 */
202
203 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
204 #define loadfactor(loadav) (2 * (loadav))
205 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
206
207 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
208 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
209
210 /*
211 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
212 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
213 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
214 *
215 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
216 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
217 *
218 * If you don't want to bother with the faster/more-accurate formula, you
219 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
220 * (more general) method of calculating the %age of CPU used by a process.
221 */
222 #define CCPU_SHIFT 11
223
224 /*
225 * Recompute process priorities, every second.
226 */
227 void
schedcpu(void * unused)228 schedcpu(void *unused)
229 {
230 static struct timeout to = TIMEOUT_INITIALIZER(schedcpu, NULL);
231 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
232 struct proc *p;
233 unsigned int newcpu;
234
235 LIST_FOREACH(p, &allproc, p_list) {
236 /*
237 * Idle threads are never placed on the runqueue,
238 * therefore computing their priority is pointless.
239 */
240 if (p->p_cpu != NULL &&
241 p->p_cpu->ci_schedstate.spc_idleproc == p)
242 continue;
243 /*
244 * Increment sleep time (if sleeping). We ignore overflow.
245 */
246 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
247 p->p_slptime++;
248 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
249 /*
250 * If the process has slept the entire second,
251 * stop recalculating its priority until it wakes up.
252 */
253 if (p->p_slptime > 1)
254 continue;
255 SCHED_LOCK();
256 /*
257 * p_pctcpu is only for diagnostic tools such as ps.
258 */
259 #if (FSHIFT >= CCPU_SHIFT)
260 p->p_pctcpu += (stathz == 100)?
261 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
262 100 * (((fixpt_t) p->p_cpticks)
263 << (FSHIFT - CCPU_SHIFT)) / stathz;
264 #else
265 p->p_pctcpu += ((FSCALE - ccpu) *
266 (p->p_cpticks * FSCALE / stathz)) >> FSHIFT;
267 #endif
268 p->p_cpticks = 0;
269 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
270 setpriority(p, newcpu, p->p_p->ps_nice);
271
272 if (p->p_stat == SRUN &&
273 (p->p_runpri / SCHED_PPQ) != (p->p_usrpri / SCHED_PPQ)) {
274 remrunqueue(p);
275 setrunqueue(p->p_cpu, p, p->p_usrpri);
276 }
277 SCHED_UNLOCK();
278 }
279 wakeup(&lbolt);
280 timeout_add_sec(&to, 1);
281 }
282
283 /*
284 * Recalculate the priority of a process after it has slept for a while.
285 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
286 * least six times the loadfactor will decay p_estcpu to zero.
287 */
288 uint32_t
decay_aftersleep(uint32_t estcpu,uint32_t slptime)289 decay_aftersleep(uint32_t estcpu, uint32_t slptime)
290 {
291 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
292 uint32_t newcpu;
293
294 if (slptime > 5 * loadfac)
295 newcpu = 0;
296 else {
297 newcpu = estcpu;
298 slptime--; /* the first time was done in schedcpu */
299 while (newcpu && --slptime)
300 newcpu = decay_cpu(loadfac, newcpu);
301
302 }
303
304 return (newcpu);
305 }
306
307 /*
308 * General yield call. Puts the current process back on its run queue and
309 * performs a voluntary context switch.
310 */
311 void
yield(void)312 yield(void)
313 {
314 struct proc *p = curproc;
315
316 SCHED_LOCK();
317 setrunqueue(p->p_cpu, p, p->p_usrpri);
318 p->p_ru.ru_nvcsw++;
319 mi_switch();
320 SCHED_UNLOCK();
321 }
322
323 /*
324 * General preemption call. Puts the current process back on its run queue
325 * and performs an involuntary context switch. If a process is supplied,
326 * we switch to that process. Otherwise, we use the normal process selection
327 * criteria.
328 */
329 void
preempt(void)330 preempt(void)
331 {
332 struct proc *p = curproc;
333
334 SCHED_LOCK();
335 setrunqueue(p->p_cpu, p, p->p_usrpri);
336 p->p_ru.ru_nivcsw++;
337 mi_switch();
338 SCHED_UNLOCK();
339 }
340
341 void
mi_switch(void)342 mi_switch(void)
343 {
344 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
345 struct proc *p = curproc;
346 struct proc *nextproc;
347 struct process *pr = p->p_p;
348 struct timespec ts;
349 int oldipl;
350 #ifdef MULTIPROCESSOR
351 int hold_count;
352 #endif
353
354 KASSERT(p->p_stat != SONPROC);
355
356 SCHED_ASSERT_LOCKED();
357
358 #ifdef MULTIPROCESSOR
359 /*
360 * Release the kernel_lock, as we are about to yield the CPU.
361 */
362 if (_kernel_lock_held())
363 hold_count = __mp_release_all(&kernel_lock);
364 else
365 hold_count = 0;
366 #endif
367
368 /*
369 * Compute the amount of time during which the current
370 * process was running, and add that to its total so far.
371 */
372 nanouptime(&ts);
373 if (timespeccmp(&ts, &spc->spc_runtime, <)) {
374 #if 0
375 printf("uptime is not monotonic! "
376 "ts=%lld.%09lu, runtime=%lld.%09lu\n",
377 (long long)tv.tv_sec, tv.tv_nsec,
378 (long long)spc->spc_runtime.tv_sec,
379 spc->spc_runtime.tv_nsec);
380 #endif
381 timespecclear(&ts);
382 } else {
383 timespecsub(&ts, &spc->spc_runtime, &ts);
384 }
385
386 /* add the time counts for this thread to the process's total */
387 tuagg_locked(pr, p, &ts);
388
389 /* Stop any optional clock interrupts. */
390 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
391 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
392 clockintr_cancel(&spc->spc_itimer);
393 }
394 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
395 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
396 clockintr_cancel(&spc->spc_profclock);
397 }
398
399 /*
400 * Process is about to yield the CPU; clear the appropriate
401 * scheduling flags.
402 */
403 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
404
405 nextproc = sched_chooseproc();
406
407 /* preserve old IPL level so we can switch back to that */
408 oldipl = MUTEX_OLDIPL(&sched_lock);
409
410 if (p != nextproc) {
411 uvmexp.swtch++;
412 TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
413 nextproc->p_p->ps_pid);
414 cpu_switchto(p, nextproc);
415 TRACEPOINT(sched, on__cpu, NULL);
416 } else {
417 TRACEPOINT(sched, remain__cpu, NULL);
418 p->p_stat = SONPROC;
419 }
420
421 clear_resched(curcpu());
422
423 SCHED_ASSERT_LOCKED();
424
425 /* Restore proc's IPL. */
426 MUTEX_OLDIPL(&sched_lock) = oldipl;
427 SCHED_UNLOCK();
428
429 SCHED_ASSERT_UNLOCKED();
430
431 assertwaitok();
432 smr_idle();
433
434 /*
435 * We're running again; record our new start time. We might
436 * be running on a new CPU now, so refetch the schedstate_percpu
437 * pointer.
438 */
439 KASSERT(p->p_cpu == curcpu());
440 spc = &p->p_cpu->ci_schedstate;
441
442 /* Start any optional clock interrupts needed by the thread. */
443 if (ISSET(p->p_p->ps_flags, PS_ITIMER)) {
444 atomic_setbits_int(&spc->spc_schedflags, SPCF_ITIMER);
445 clockintr_advance(&spc->spc_itimer, hardclock_period);
446 }
447 if (ISSET(p->p_p->ps_flags, PS_PROFIL)) {
448 atomic_setbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
449 clockintr_advance(&spc->spc_profclock, profclock_period);
450 }
451
452 nanouptime(&spc->spc_runtime);
453
454 #ifdef MULTIPROCESSOR
455 /*
456 * Reacquire the kernel_lock now. We do this after we've
457 * released the scheduler lock to avoid deadlock, and before
458 * we reacquire the interlock and the scheduler lock.
459 */
460 if (hold_count)
461 __mp_acquire_count(&kernel_lock, hold_count);
462 #endif
463 SCHED_LOCK();
464 }
465
466 /*
467 * Change process state to be runnable,
468 * placing it on the run queue.
469 */
470 void
setrunnable(struct proc * p)471 setrunnable(struct proc *p)
472 {
473 struct process *pr = p->p_p;
474 u_char prio;
475
476 SCHED_ASSERT_LOCKED();
477
478 switch (p->p_stat) {
479 case 0:
480 case SRUN:
481 case SONPROC:
482 case SDEAD:
483 case SIDL:
484 default:
485 panic("setrunnable");
486 case SSTOP:
487 /*
488 * If we're being traced (possibly because someone attached us
489 * while we were stopped), check for a signal from the debugger.
490 */
491 if ((pr->ps_flags & PS_TRACED) != 0 && pr->ps_xsig != 0)
492 atomic_setbits_int(&p->p_siglist, sigmask(pr->ps_xsig));
493 prio = p->p_usrpri;
494 setrunqueue(NULL, p, prio);
495 break;
496 case SSLEEP:
497 prio = p->p_slppri;
498
499 /* if not yet asleep, don't add to runqueue */
500 if (ISSET(p->p_flag, P_WSLEEP))
501 return;
502 setrunqueue(NULL, p, prio);
503 TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET,
504 p->p_p->ps_pid, CPU_INFO_UNIT(p->p_cpu));
505 break;
506 }
507 if (p->p_slptime > 1) {
508 uint32_t newcpu;
509
510 newcpu = decay_aftersleep(p->p_estcpu, p->p_slptime);
511 setpriority(p, newcpu, pr->ps_nice);
512 }
513 p->p_slptime = 0;
514 }
515
516 /*
517 * Compute the priority of a process.
518 */
519 void
setpriority(struct proc * p,uint32_t newcpu,uint8_t nice)520 setpriority(struct proc *p, uint32_t newcpu, uint8_t nice)
521 {
522 unsigned int newprio;
523
524 newprio = min((PUSER + newcpu + NICE_WEIGHT * (nice - NZERO)), MAXPRI);
525
526 SCHED_ASSERT_LOCKED();
527 p->p_estcpu = newcpu;
528 p->p_usrpri = newprio;
529 }
530
531 /*
532 * We adjust the priority of the current process. The priority of a process
533 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
534 * is increased here. The formula for computing priorities (in kern_synch.c)
535 * will compute a different value each time p_estcpu increases. This can
536 * cause a switch, but unless the priority crosses a PPQ boundary the actual
537 * queue will not change. The cpu usage estimator ramps up quite quickly
538 * when the process is running (linearly), and decays away exponentially, at
539 * a rate which is proportionally slower when the system is busy. The basic
540 * principle is that the system will 90% forget that the process used a lot
541 * of CPU time in 5 * loadav seconds. This causes the system to favor
542 * processes which haven't run much recently, and to round-robin among other
543 * processes.
544 */
545 void
schedclock(struct proc * p)546 schedclock(struct proc *p)
547 {
548 struct cpu_info *ci = curcpu();
549 struct schedstate_percpu *spc = &ci->ci_schedstate;
550 uint32_t newcpu;
551
552 if (p == spc->spc_idleproc || spc->spc_spinning)
553 return;
554
555 SCHED_LOCK();
556 newcpu = ESTCPULIM(p->p_estcpu + 1);
557 setpriority(p, newcpu, p->p_p->ps_nice);
558 SCHED_UNLOCK();
559 }
560
561 void (*cpu_setperf)(int);
562
563 #define PERFPOL_MANUAL 0
564 #define PERFPOL_AUTO 1
565 #define PERFPOL_HIGH 2
566 int perflevel = 100;
567 int perfpolicy = PERFPOL_AUTO;
568
569 #ifndef SMALL_KERNEL
570 /*
571 * The code below handles CPU throttling.
572 */
573 #include <sys/sysctl.h>
574
575 void setperf_auto(void *);
576 struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
577 extern int hw_power;
578
579 void
setperf_auto(void * v)580 setperf_auto(void *v)
581 {
582 static uint64_t *idleticks, *totalticks;
583 static int downbeats;
584 int i, j = 0;
585 int speedup = 0;
586 CPU_INFO_ITERATOR cii;
587 struct cpu_info *ci;
588 uint64_t idle, total, allidle = 0, alltotal = 0;
589
590 if (perfpolicy != PERFPOL_AUTO)
591 return;
592
593 if (cpu_setperf == NULL)
594 return;
595
596 if (hw_power) {
597 speedup = 1;
598 goto faster;
599 }
600
601 if (!idleticks)
602 if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
603 M_DEVBUF, M_NOWAIT | M_ZERO)))
604 return;
605 if (!totalticks)
606 if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
607 M_DEVBUF, M_NOWAIT | M_ZERO))) {
608 free(idleticks, M_DEVBUF,
609 sizeof(*idleticks) * ncpusfound);
610 return;
611 }
612 CPU_INFO_FOREACH(cii, ci) {
613 if (!cpu_is_online(ci))
614 continue;
615 total = 0;
616 for (i = 0; i < CPUSTATES; i++) {
617 total += ci->ci_schedstate.spc_cp_time[i];
618 }
619 total -= totalticks[j];
620 idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
621 if (idle < total / 3)
622 speedup = 1;
623 alltotal += total;
624 allidle += idle;
625 idleticks[j] += idle;
626 totalticks[j] += total;
627 j++;
628 }
629 if (allidle < alltotal / 2)
630 speedup = 1;
631 if (speedup && downbeats < 5)
632 downbeats++;
633
634 if (speedup && perflevel != 100) {
635 faster:
636 perflevel = 100;
637 cpu_setperf(perflevel);
638 } else if (!speedup && perflevel != 0 && --downbeats <= 0) {
639 perflevel = 0;
640 cpu_setperf(perflevel);
641 }
642
643 timeout_add_msec(&setperf_to, 100);
644 }
645
646 int
sysctl_hwsetperf(void * oldp,size_t * oldlenp,void * newp,size_t newlen)647 sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
648 {
649 int err;
650
651 if (!cpu_setperf)
652 return EOPNOTSUPP;
653
654 if (perfpolicy != PERFPOL_MANUAL)
655 return sysctl_rdint(oldp, oldlenp, newp, perflevel);
656
657 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
658 &perflevel, 0, 100);
659 if (err)
660 return err;
661
662 if (newp != NULL)
663 cpu_setperf(perflevel);
664
665 return 0;
666 }
667
668 int
sysctl_hwperfpolicy(void * oldp,size_t * oldlenp,void * newp,size_t newlen)669 sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
670 {
671 char policy[32];
672 int err;
673
674 if (!cpu_setperf)
675 return EOPNOTSUPP;
676
677 switch (perfpolicy) {
678 case PERFPOL_MANUAL:
679 strlcpy(policy, "manual", sizeof(policy));
680 break;
681 case PERFPOL_AUTO:
682 strlcpy(policy, "auto", sizeof(policy));
683 break;
684 case PERFPOL_HIGH:
685 strlcpy(policy, "high", sizeof(policy));
686 break;
687 default:
688 strlcpy(policy, "unknown", sizeof(policy));
689 break;
690 }
691
692 if (newp == NULL)
693 return sysctl_rdstring(oldp, oldlenp, newp, policy);
694
695 err = sysctl_string(oldp, oldlenp, newp, newlen, policy, sizeof(policy));
696 if (err)
697 return err;
698 if (strcmp(policy, "manual") == 0)
699 perfpolicy = PERFPOL_MANUAL;
700 else if (strcmp(policy, "auto") == 0)
701 perfpolicy = PERFPOL_AUTO;
702 else if (strcmp(policy, "high") == 0)
703 perfpolicy = PERFPOL_HIGH;
704 else
705 return EINVAL;
706
707 if (perfpolicy == PERFPOL_AUTO) {
708 timeout_add_msec(&setperf_to, 200);
709 } else if (perfpolicy == PERFPOL_HIGH) {
710 perflevel = 100;
711 cpu_setperf(perflevel);
712 }
713 return 0;
714 }
715 #endif
716
717 /*
718 * Start the scheduler's periodic timeouts.
719 */
720 void
scheduler_start(void)721 scheduler_start(void)
722 {
723 schedcpu(NULL);
724 update_loadavg(NULL);
725
726 #ifndef SMALL_KERNEL
727 if (perfpolicy == PERFPOL_AUTO)
728 timeout_add_msec(&setperf_to, 200);
729 #endif
730 }
731
732