1 /* $OpenBSD: sched_bsd.c,v 1.98 2024/11/24 13:02:37 claudio Exp $ */
2 /* $NetBSD: kern_synch.c,v 1.37 1996/04/22 01:38:37 christos Exp $ */
3
4 /*-
5 * Copyright (c) 1982, 1986, 1990, 1991, 1993
6 * The Regents of the University of California. All rights reserved.
7 * (c) UNIX System Laboratories, Inc.
8 * All or some portions of this file are derived from material licensed
9 * to the University of California by American Telephone and Telegraph
10 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
11 * the permission of UNIX System Laboratories, Inc.
12 *
13 * Redistribution and use in source and binary forms, with or without
14 * modification, are permitted provided that the following conditions
15 * are met:
16 * 1. Redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer.
18 * 2. Redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution.
21 * 3. Neither the name of the University nor the names of its contributors
22 * may be used to endorse or promote products derived from this software
23 * without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
26 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
29 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
30 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
31 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
32 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
33 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
34 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
35 * SUCH DAMAGE.
36 *
37 * @(#)kern_synch.c 8.6 (Berkeley) 1/21/94
38 */
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/clockintr.h>
43 #include <sys/proc.h>
44 #include <sys/kernel.h>
45 #include <sys/malloc.h>
46 #include <sys/resourcevar.h>
47 #include <uvm/uvm_extern.h>
48 #include <sys/sched.h>
49 #include <sys/timeout.h>
50 #include <sys/smr.h>
51 #include <sys/tracepoint.h>
52
53 #ifdef KTRACE
54 #include <sys/ktrace.h>
55 #endif
56
57 uint64_t roundrobin_period; /* [I] roundrobin period (ns) */
58 int lbolt; /* once a second sleep address */
59
60 struct mutex sched_lock;
61
62 void update_loadavg(void *);
63 void schedcpu(void *);
64 uint32_t decay_aftersleep(uint32_t, uint32_t);
65
66 extern struct cpuset sched_idle_cpus;
67
68 /*
69 * constants for averages over 1, 5, and 15 minutes when sampling at
70 * 5 second intervals.
71 */
72 static const fixpt_t cexp[3] = {
73 0.9200444146293232 * FSCALE, /* exp(-1/12) */
74 0.9834714538216174 * FSCALE, /* exp(-1/60) */
75 0.9944598480048967 * FSCALE, /* exp(-1/180) */
76 };
77
78 struct loadavg averunnable;
79
80 /*
81 * Force switch among equal priority processes every 100ms.
82 */
83 void
roundrobin(struct clockrequest * cr,void * cf,void * arg)84 roundrobin(struct clockrequest *cr, void *cf, void *arg)
85 {
86 uint64_t count;
87 struct cpu_info *ci = curcpu();
88 struct schedstate_percpu *spc = &ci->ci_schedstate;
89
90 count = clockrequest_advance(cr, roundrobin_period);
91
92 if (ci->ci_curproc != NULL) {
93 if (spc->spc_schedflags & SPCF_SEENRR || count >= 2) {
94 /*
95 * The process has already been through a roundrobin
96 * without switching and may be hogging the CPU.
97 * Indicate that the process should yield.
98 */
99 atomic_setbits_int(&spc->spc_schedflags,
100 SPCF_SEENRR | SPCF_SHOULDYIELD);
101 } else {
102 atomic_setbits_int(&spc->spc_schedflags,
103 SPCF_SEENRR);
104 }
105 }
106
107 if (spc->spc_nrun || spc->spc_schedflags & SPCF_SHOULDYIELD)
108 need_resched(ci);
109 }
110
111
112
113 /*
114 * update_loadav: compute a tenex style load average of a quantity on
115 * 1, 5, and 15 minute intervals.
116 */
117 void
update_loadavg(void * unused)118 update_loadavg(void *unused)
119 {
120 static struct timeout to = TIMEOUT_INITIALIZER(update_loadavg, NULL);
121 CPU_INFO_ITERATOR cii;
122 struct cpu_info *ci;
123 u_int i, nrun = 0;
124
125 CPU_INFO_FOREACH(cii, ci) {
126 if (!cpuset_isset(&sched_idle_cpus, ci))
127 nrun++;
128 nrun += ci->ci_schedstate.spc_nrun;
129 }
130
131 for (i = 0; i < 3; i++) {
132 averunnable.ldavg[i] = (cexp[i] * averunnable.ldavg[i] +
133 nrun * FSCALE * (FSCALE - cexp[i])) >> FSHIFT;
134 }
135
136 timeout_add_sec(&to, 5);
137 }
138
139 /*
140 * Constants for digital decay and forget:
141 * 90% of (p_estcpu) usage in 5 * loadav time
142 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive)
143 * Note that, as ps(1) mentions, this can let percentages
144 * total over 100% (I've seen 137.9% for 3 processes).
145 *
146 * Note that hardclock updates p_estcpu and p_cpticks independently.
147 *
148 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds.
149 * That is, the system wants to compute a value of decay such
150 * that the following for loop:
151 * for (i = 0; i < (5 * loadavg); i++)
152 * p_estcpu *= decay;
153 * will compute
154 * p_estcpu *= 0.1;
155 * for all values of loadavg:
156 *
157 * Mathematically this loop can be expressed by saying:
158 * decay ** (5 * loadavg) ~= .1
159 *
160 * The system computes decay as:
161 * decay = (2 * loadavg) / (2 * loadavg + 1)
162 *
163 * We wish to prove that the system's computation of decay
164 * will always fulfill the equation:
165 * decay ** (5 * loadavg) ~= .1
166 *
167 * If we compute b as:
168 * b = 2 * loadavg
169 * then
170 * decay = b / (b + 1)
171 *
172 * We now need to prove two things:
173 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
174 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
175 *
176 * Facts:
177 * For x close to zero, exp(x) =~ 1 + x, since
178 * exp(x) = 0! + x**1/1! + x**2/2! + ... .
179 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
180 * For x close to zero, ln(1+x) =~ x, since
181 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1
182 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
183 * ln(.1) =~ -2.30
184 *
185 * Proof of (1):
186 * Solve (factor)**(power) =~ .1 given power (5*loadav):
187 * solving for factor,
188 * ln(factor) =~ (-2.30/5*loadav), or
189 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
190 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED
191 *
192 * Proof of (2):
193 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
194 * solving for power,
195 * power*ln(b/(b+1)) =~ -2.30, or
196 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED
197 *
198 * Actual power values for the implemented algorithm are as follows:
199 * loadav: 1 2 3 4
200 * power: 5.68 10.32 14.94 19.55
201 */
202
203 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
204 #define loadfactor(loadav) (2 * (loadav))
205 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE))
206
207 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
208 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */
209
210 /*
211 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
212 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
213 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
214 *
215 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
216 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
217 *
218 * If you don't want to bother with the faster/more-accurate formula, you
219 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
220 * (more general) method of calculating the %age of CPU used by a process.
221 */
222 #define CCPU_SHIFT 11
223
224 /*
225 * Recompute process priorities, every second.
226 */
227 void
schedcpu(void * unused)228 schedcpu(void *unused)
229 {
230 static struct timeout to = TIMEOUT_INITIALIZER(schedcpu, NULL);
231 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
232 struct proc *p;
233 unsigned int newcpu;
234
235 LIST_FOREACH(p, &allproc, p_list) {
236 /*
237 * Idle threads are never placed on the runqueue,
238 * therefore computing their priority is pointless.
239 */
240 if (p->p_cpu != NULL &&
241 p->p_cpu->ci_schedstate.spc_idleproc == p)
242 continue;
243 /*
244 * Increment sleep time (if sleeping). We ignore overflow.
245 */
246 if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
247 p->p_slptime++;
248 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
249 /*
250 * If the process has slept the entire second,
251 * stop recalculating its priority until it wakes up.
252 */
253 if (p->p_slptime > 1)
254 continue;
255 SCHED_LOCK();
256 /*
257 * p_pctcpu is only for diagnostic tools such as ps.
258 */
259 #if (FSHIFT >= CCPU_SHIFT)
260 p->p_pctcpu += (stathz == 100)?
261 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
262 100 * (((fixpt_t) p->p_cpticks)
263 << (FSHIFT - CCPU_SHIFT)) / stathz;
264 #else
265 p->p_pctcpu += ((FSCALE - ccpu) *
266 (p->p_cpticks * FSCALE / stathz)) >> FSHIFT;
267 #endif
268 p->p_cpticks = 0;
269 newcpu = (u_int) decay_cpu(loadfac, p->p_estcpu);
270 setpriority(p, newcpu, p->p_p->ps_nice);
271
272 if (p->p_stat == SRUN &&
273 (p->p_runpri / SCHED_PPQ) != (p->p_usrpri / SCHED_PPQ)) {
274 remrunqueue(p);
275 setrunqueue(p->p_cpu, p, p->p_usrpri);
276 }
277 SCHED_UNLOCK();
278 }
279 wakeup(&lbolt);
280 timeout_add_sec(&to, 1);
281 }
282
283 /*
284 * Recalculate the priority of a process after it has slept for a while.
285 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at
286 * least six times the loadfactor will decay p_estcpu to zero.
287 */
288 uint32_t
decay_aftersleep(uint32_t estcpu,uint32_t slptime)289 decay_aftersleep(uint32_t estcpu, uint32_t slptime)
290 {
291 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]);
292 uint32_t newcpu;
293
294 if (slptime > 5 * loadfac)
295 newcpu = 0;
296 else {
297 newcpu = estcpu;
298 slptime--; /* the first time was done in schedcpu */
299 while (newcpu && --slptime)
300 newcpu = decay_cpu(loadfac, newcpu);
301
302 }
303
304 return (newcpu);
305 }
306
307 /*
308 * General yield call. Puts the current process back on its run queue and
309 * performs a voluntary context switch.
310 */
311 void
yield(void)312 yield(void)
313 {
314 struct proc *p = curproc;
315
316 SCHED_LOCK();
317 setrunqueue(p->p_cpu, p, p->p_usrpri);
318 p->p_ru.ru_nvcsw++;
319 mi_switch();
320 SCHED_UNLOCK();
321 }
322
323 /*
324 * General preemption call. Puts the current process back on its run queue
325 * and performs an involuntary context switch. If a process is supplied,
326 * we switch to that process. Otherwise, we use the normal process selection
327 * criteria.
328 */
329 void
preempt(void)330 preempt(void)
331 {
332 struct proc *p = curproc;
333
334 SCHED_LOCK();
335 setrunqueue(p->p_cpu, p, p->p_usrpri);
336 p->p_ru.ru_nivcsw++;
337 mi_switch();
338 SCHED_UNLOCK();
339 }
340
341 void
mi_switch(void)342 mi_switch(void)
343 {
344 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
345 struct proc *p = curproc;
346 struct proc *nextproc;
347 int oldipl;
348 #ifdef MULTIPROCESSOR
349 int hold_count;
350 #endif
351
352 KASSERT(p->p_stat != SONPROC);
353
354 SCHED_ASSERT_LOCKED();
355
356 #ifdef MULTIPROCESSOR
357 /*
358 * Release the kernel_lock, as we are about to yield the CPU.
359 */
360 if (_kernel_lock_held())
361 hold_count = __mp_release_all(&kernel_lock);
362 else
363 hold_count = 0;
364 #endif
365
366 /* Update thread runtime */
367 tuagg_add_runtime();
368
369 /* Stop any optional clock interrupts. */
370 if (ISSET(spc->spc_schedflags, SPCF_ITIMER)) {
371 atomic_clearbits_int(&spc->spc_schedflags, SPCF_ITIMER);
372 clockintr_cancel(&spc->spc_itimer);
373 }
374 if (ISSET(spc->spc_schedflags, SPCF_PROFCLOCK)) {
375 atomic_clearbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
376 clockintr_cancel(&spc->spc_profclock);
377 }
378
379 /*
380 * Process is about to yield the CPU; clear the appropriate
381 * scheduling flags.
382 */
383 atomic_clearbits_int(&spc->spc_schedflags, SPCF_SWITCHCLEAR);
384
385 nextproc = sched_chooseproc();
386
387 /* preserve old IPL level so we can switch back to that */
388 oldipl = MUTEX_OLDIPL(&sched_lock);
389
390 if (p != nextproc) {
391 uvmexp.swtch++;
392 TRACEPOINT(sched, off__cpu, nextproc->p_tid + THREAD_PID_OFFSET,
393 nextproc->p_p->ps_pid);
394 cpu_switchto(p, nextproc);
395 TRACEPOINT(sched, on__cpu, NULL);
396 } else {
397 TRACEPOINT(sched, remain__cpu, NULL);
398 p->p_stat = SONPROC;
399 }
400
401 clear_resched(curcpu());
402
403 SCHED_ASSERT_LOCKED();
404
405 /* Restore proc's IPL. */
406 MUTEX_OLDIPL(&sched_lock) = oldipl;
407 SCHED_UNLOCK();
408
409 SCHED_ASSERT_UNLOCKED();
410
411 assertwaitok();
412 smr_idle();
413
414 /*
415 * We're running again; record our new start time. We might
416 * be running on a new CPU now, so refetch the schedstate_percpu
417 * pointer.
418 */
419 KASSERT(p->p_cpu == curcpu());
420 spc = &p->p_cpu->ci_schedstate;
421
422 /* Start any optional clock interrupts needed by the thread. */
423 if (ISSET(p->p_p->ps_flags, PS_ITIMER)) {
424 atomic_setbits_int(&spc->spc_schedflags, SPCF_ITIMER);
425 clockintr_advance(&spc->spc_itimer, hardclock_period);
426 }
427 if (ISSET(p->p_p->ps_flags, PS_PROFIL)) {
428 atomic_setbits_int(&spc->spc_schedflags, SPCF_PROFCLOCK);
429 clockintr_advance(&spc->spc_profclock, profclock_period);
430 }
431
432 nanouptime(&spc->spc_runtime);
433
434 #ifdef MULTIPROCESSOR
435 /*
436 * Reacquire the kernel_lock now. We do this after we've
437 * released the scheduler lock to avoid deadlock, and before
438 * we reacquire the interlock and the scheduler lock.
439 */
440 if (hold_count)
441 __mp_acquire_count(&kernel_lock, hold_count);
442 #endif
443 SCHED_LOCK();
444 }
445
446 /*
447 * Change process state to be runnable,
448 * placing it on the run queue.
449 */
450 void
setrunnable(struct proc * p)451 setrunnable(struct proc *p)
452 {
453 struct process *pr = p->p_p;
454 u_char prio;
455
456 SCHED_ASSERT_LOCKED();
457
458 switch (p->p_stat) {
459 case 0:
460 case SRUN:
461 case SONPROC:
462 case SDEAD:
463 case SIDL:
464 default:
465 panic("setrunnable");
466 case SSTOP:
467 prio = p->p_usrpri;
468 /* if not yet asleep, unstop but don't add to runqueue */
469 if (ISSET(p->p_flag, P_WSLEEP)) {
470 p->p_stat = SSLEEP;
471 return;
472 }
473 setrunqueue(NULL, p, prio);
474 break;
475 case SSLEEP:
476 prio = p->p_slppri;
477
478 /* if not yet asleep, don't add to runqueue */
479 if (ISSET(p->p_flag, P_WSLEEP))
480 return;
481 setrunqueue(NULL, p, prio);
482 TRACEPOINT(sched, wakeup, p->p_tid + THREAD_PID_OFFSET,
483 p->p_p->ps_pid, CPU_INFO_UNIT(p->p_cpu));
484 break;
485 }
486 if (p->p_slptime > 1) {
487 uint32_t newcpu;
488
489 newcpu = decay_aftersleep(p->p_estcpu, p->p_slptime);
490 setpriority(p, newcpu, pr->ps_nice);
491 }
492 p->p_slptime = 0;
493 }
494
495 /*
496 * Compute the priority of a process.
497 */
498 void
setpriority(struct proc * p,uint32_t newcpu,uint8_t nice)499 setpriority(struct proc *p, uint32_t newcpu, uint8_t nice)
500 {
501 unsigned int newprio;
502
503 newprio = min((PUSER + newcpu + NICE_WEIGHT * (nice - NZERO)), MAXPRI);
504
505 SCHED_ASSERT_LOCKED();
506 p->p_estcpu = newcpu;
507 p->p_usrpri = newprio;
508 }
509
510 /*
511 * We adjust the priority of the current process. The priority of a process
512 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu)
513 * is increased here. The formula for computing priorities (in kern_synch.c)
514 * will compute a different value each time p_estcpu increases. This can
515 * cause a switch, but unless the priority crosses a PPQ boundary the actual
516 * queue will not change. The cpu usage estimator ramps up quite quickly
517 * when the process is running (linearly), and decays away exponentially, at
518 * a rate which is proportionally slower when the system is busy. The basic
519 * principle is that the system will 90% forget that the process used a lot
520 * of CPU time in 5 * loadav seconds. This causes the system to favor
521 * processes which haven't run much recently, and to round-robin among other
522 * processes.
523 */
524 void
schedclock(struct proc * p)525 schedclock(struct proc *p)
526 {
527 struct cpu_info *ci = curcpu();
528 struct schedstate_percpu *spc = &ci->ci_schedstate;
529 uint32_t newcpu;
530
531 if (p == spc->spc_idleproc || spc->spc_spinning)
532 return;
533
534 SCHED_LOCK();
535 newcpu = ESTCPULIM(p->p_estcpu + 1);
536 setpriority(p, newcpu, p->p_p->ps_nice);
537 SCHED_UNLOCK();
538 }
539
540 void (*cpu_setperf)(int);
541
542 #define PERFPOL_MANUAL 0
543 #define PERFPOL_AUTO 1
544 #define PERFPOL_HIGH 2
545 int perflevel = 100;
546 int perfpolicy_on_ac = PERFPOL_HIGH;
547 int perfpolicy_on_battery = PERFPOL_AUTO;
548
549 #ifndef SMALL_KERNEL
550 /*
551 * The code below handles CPU throttling.
552 */
553 #include <sys/sysctl.h>
554
555 void setperf_auto(void *);
556 struct timeout setperf_to = TIMEOUT_INITIALIZER(setperf_auto, NULL);
557 extern int hw_power;
558
559 static inline int
perfpolicy_dynamic(void)560 perfpolicy_dynamic(void)
561 {
562 return (perfpolicy_on_ac == PERFPOL_AUTO ||
563 perfpolicy_on_battery == PERFPOL_AUTO);
564 }
565
566 static inline int
current_perfpolicy(void)567 current_perfpolicy(void)
568 {
569 return (hw_power) ? perfpolicy_on_ac : perfpolicy_on_battery;
570 }
571
572 void
setperf_auto(void * v)573 setperf_auto(void *v)
574 {
575 static uint64_t *idleticks, *totalticks;
576 static int downbeats;
577 int i, j = 0;
578 int speedup = 0;
579 CPU_INFO_ITERATOR cii;
580 struct cpu_info *ci;
581 uint64_t idle, total, allidle = 0, alltotal = 0;
582
583 if (!perfpolicy_dynamic())
584 return;
585
586 if (cpu_setperf == NULL)
587 return;
588
589 if (current_perfpolicy() == PERFPOL_HIGH) {
590 speedup = 1;
591 goto faster;
592 }
593
594 if (!idleticks)
595 if (!(idleticks = mallocarray(ncpusfound, sizeof(*idleticks),
596 M_DEVBUF, M_NOWAIT | M_ZERO)))
597 return;
598 if (!totalticks)
599 if (!(totalticks = mallocarray(ncpusfound, sizeof(*totalticks),
600 M_DEVBUF, M_NOWAIT | M_ZERO))) {
601 free(idleticks, M_DEVBUF,
602 sizeof(*idleticks) * ncpusfound);
603 return;
604 }
605 CPU_INFO_FOREACH(cii, ci) {
606 if (!cpu_is_online(ci))
607 continue;
608 total = 0;
609 for (i = 0; i < CPUSTATES; i++) {
610 total += ci->ci_schedstate.spc_cp_time[i];
611 }
612 total -= totalticks[j];
613 idle = ci->ci_schedstate.spc_cp_time[CP_IDLE] - idleticks[j];
614 if (idle < total / 3)
615 speedup = 1;
616 alltotal += total;
617 allidle += idle;
618 idleticks[j] += idle;
619 totalticks[j] += total;
620 j++;
621 }
622 if (allidle < alltotal / 2)
623 speedup = 1;
624 if (speedup && downbeats < 5)
625 downbeats++;
626
627 if (speedup && perflevel != 100) {
628 faster:
629 perflevel = 100;
630 cpu_setperf(perflevel);
631 } else if (!speedup && perflevel != 0 && --downbeats <= 0) {
632 perflevel = 0;
633 cpu_setperf(perflevel);
634 }
635
636 timeout_add_msec(&setperf_to, 100);
637 }
638
639 int
sysctl_hwsetperf(void * oldp,size_t * oldlenp,void * newp,size_t newlen)640 sysctl_hwsetperf(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
641 {
642 int err;
643
644 if (!cpu_setperf)
645 return EOPNOTSUPP;
646
647 if (perfpolicy_on_ac != PERFPOL_MANUAL)
648 return sysctl_rdint(oldp, oldlenp, newp, perflevel);
649
650 err = sysctl_int_bounded(oldp, oldlenp, newp, newlen,
651 &perflevel, 0, 100);
652 if (err)
653 return err;
654
655 if (newp != NULL)
656 cpu_setperf(perflevel);
657
658 return 0;
659 }
660
661 int
sysctl_hwperfpolicy(void * oldp,size_t * oldlenp,void * newp,size_t newlen)662 sysctl_hwperfpolicy(void *oldp, size_t *oldlenp, void *newp, size_t newlen)
663 {
664 char policy[32];
665 char *policy_on_battery;
666 int err, perfpolicy;
667
668 if (!cpu_setperf)
669 return EOPNOTSUPP;
670
671 switch (current_perfpolicy()) {
672 case PERFPOL_MANUAL:
673 strlcpy(policy, "manual", sizeof(policy));
674 break;
675 case PERFPOL_AUTO:
676 strlcpy(policy, "auto", sizeof(policy));
677 break;
678 case PERFPOL_HIGH:
679 strlcpy(policy, "high", sizeof(policy));
680 break;
681 default:
682 strlcpy(policy, "unknown", sizeof(policy));
683 break;
684 }
685
686 if (newp == NULL)
687 return sysctl_rdstring(oldp, oldlenp, newp, policy);
688
689 err = sysctl_string(oldp, oldlenp, newp, newlen, policy, sizeof(policy));
690 if (err)
691 return err;
692
693 policy_on_battery = strchr(policy, ',');
694 if (policy_on_battery != NULL) {
695 *policy_on_battery = '\0';
696 policy_on_battery++;
697 }
698
699 if (strcmp(policy, "manual") == 0)
700 perfpolicy = PERFPOL_MANUAL;
701 else if (strcmp(policy, "auto") == 0)
702 perfpolicy = PERFPOL_AUTO;
703 else if (strcmp(policy, "high") == 0)
704 perfpolicy = PERFPOL_HIGH;
705 else
706 return EINVAL;
707
708 if (policy_on_battery == NULL)
709 perfpolicy_on_battery = perfpolicy_on_ac = perfpolicy;
710 else {
711 if (strcmp(policy_on_battery, "manual") == 0 ||
712 perfpolicy == PERFPOL_MANUAL) {
713 /* Not handled */
714 return EINVAL;
715 }
716 if (strcmp(policy_on_battery, "auto") == 0)
717 perfpolicy_on_battery = PERFPOL_AUTO;
718 else if (strcmp(policy_on_battery, "high") == 0)
719 perfpolicy_on_battery = PERFPOL_HIGH;
720 else
721 return EINVAL;
722 perfpolicy_on_ac = perfpolicy;
723 }
724
725 if (current_perfpolicy() == PERFPOL_HIGH) {
726 perflevel = 100;
727 cpu_setperf(perflevel);
728 }
729
730 if (perfpolicy_dynamic())
731 timeout_add_msec(&setperf_to, 200);
732
733 return 0;
734 }
735 #endif
736
737 /*
738 * Start the scheduler's periodic timeouts.
739 */
740 void
scheduler_start(void)741 scheduler_start(void)
742 {
743 schedcpu(NULL);
744 update_loadavg(NULL);
745
746 #ifndef SMALL_KERNEL
747 if (perfpolicy_dynamic())
748 timeout_add_msec(&setperf_to, 200);
749 #endif
750 }
751
752