xref: /dragonfly/usr.sbin/powerd/powerd.c (revision 62dc643e)
1 /*
2  * Copyright (c) 2010,2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42 
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
51 #include <sys/time.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
54 #include <err.h>
55 #include <signal.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <unistd.h>
59 #include <string.h>
60 #include <syslog.h>
61 
62 #include "alert1.h"
63 
64 #define MAXDOM		MAXCPU	/* worst case, 1 cpu per domain */
65 
66 #define MAXFREQ		64
67 #define CST_STRLEN	16
68 
69 #define NFREQ_MONPERF	0x0001
70 #define NFREQ_ADJPERF	0x0002
71 #define NFREQ_CPUTEMP	0x0004
72 
73 #define NFREQ_ALL	(NFREQ_MONPERF | NFREQ_ADJPERF | NFREQ_CPUTEMP)
74 
75 struct cpu_pwrdom {
76 	TAILQ_ENTRY(cpu_pwrdom)	dom_link;
77 	int			dom_id;
78 	int			dom_ncpus;
79 	cpumask_t		dom_cpumask;
80 };
81 
82 struct cpu_state {
83 	double			cpu_qavg;
84 	double			cpu_uavg;	/* used for speeding up */
85 	double			cpu_davg;	/* used for slowing down */
86 	int			cpu_limit;
87 	int			cpu_count;
88 	char			cpu_name[8];
89 };
90 
91 static void usage(void);
92 static void get_ncpus(void);
93 static void mon_cputemp(void);
94 
95 /* usched cpumask */
96 static void get_uschedcpus(void);
97 static void set_uschedcpus(void);
98 
99 /* perfbias(4) */
100 static int has_perfbias(void);
101 static void set_perfbias(int, int);
102 
103 /* acpi(4) P-state */
104 static void acpi_getcpufreq_str(int, int *, int *);
105 static int acpi_getcpufreq_bin(int, int *, int *);
106 static void acpi_get_cpufreq(int, int *, int *);
107 static void acpi_set_cpufreq(int, int);
108 static int acpi_get_cpupwrdom(void);
109 
110 /* mwait C-state hint */
111 static int probe_cstate(void);
112 static void set_cstate(int, int);
113 
114 /* Performance monitoring */
115 static void init_perf(void);
116 static void mon_perf(double);
117 static void adj_perf(cpumask_t, cpumask_t);
118 static void adj_cpu_pwrdom(int, int);
119 static void adj_cpu_perf(int, int);
120 static void get_cputime(double);
121 static int get_nstate(struct cpu_state *, double);
122 static void add_spare_cpus(const cpumask_t, int);
123 static void restore_perf(void);
124 static void set_global_freq(int freq);
125 
126 /* Battery monitoring */
127 static int has_battery(void);
128 static int mon_battery(void);
129 static void low_battery_alert(int);
130 
131 /* Backlight */
132 static void restore_backlight(void);
133 
134 /* Runtime states for performance monitoring */
135 static int global_pcpu_limit;
136 static struct cpu_state pcpu_state[MAXCPU];
137 static struct cpu_state global_cpu_state;
138 static cpumask_t cpu_used;		/* cpus w/ high perf */
139 static cpumask_t cpu_pwrdom_used;	/* cpu power domains w/ high perf */
140 static cpumask_t usched_cpu_used;	/* cpus for usched */
141 
142 /* Constants */
143 static cpumask_t cpu_pwrdom_mask;	/* usable cpu power domains */
144 static int cpu2pwrdom[MAXCPU];		/* cpu to cpu power domain map */
145 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
146 static int NCpus;			/* # of cpus */
147 static char orig_global_cx[CST_STRLEN];
148 static char cpu_perf_cx[CST_STRLEN];
149 static int cpu_perf_cxlen;
150 static char cpu_idle_cx[CST_STRLEN];
151 static int cpu_idle_cxlen;
152 static int FreqAry[MAXFREQ];
153 static int NFreq;
154 static int NFreqChanged = NFREQ_ALL;
155 static int SavedPXGlobal;
156 
157 static int DebugOpt;
158 static int TurboOpt = 1;
159 static int PowerFd;
160 static int Hysteresis = 10;	/* percentage */
161 static double TriggerUp = 0.25;	/* single-cpu load to force max freq */
162 static double TriggerDown;	/* load per cpu to force the min freq */
163 static int HasPerfbias = 0;
164 static int AdjustCpuFreq = 1;
165 static int AdjustCstate = 0;
166 static int HighestCpuFreq;
167 static int LowestCpuFreq;
168 static int AdjustUsched = 1;
169 
170 static int AdjustCpuFreqOverride;
171 
172 static volatile int stopped;
173 
174 /* Battery life monitoring */
175 static int BatLifeMin = 2;	/* shutdown the box, if low on battery life */
176 static struct timespec BatLifePrevT;
177 static int BatLifePollIntvl = 5; /* unit: sec */
178 static struct timespec BatShutdownStartT;
179 static int BatShutdownLinger = -1;
180 static int BatShutdownLingerSet = 60; /* unit: sec */
181 static int BatShutdownLingerCnt;
182 static int BatShutdownAudioAlert = 1;
183 static int MinTemp = 75;
184 static int MaxTemp = 85;
185 static int BackLightPct = 100;
186 static int OldBackLightLevel;
187 static int BackLightDown;
188 
189 static void sigintr(int signo);
190 
191 int
192 main(int ac, char **av)
193 {
194 	double srt;
195 	double pollrate;
196 	int ch;
197 	int lowest;
198 	int highest;
199 	char buf[64];
200 	int monbat;
201 	char *p2;
202 
203 	srt = 8.0;	/* time for samples - 8 seconds */
204 	pollrate = 1.0;	/* polling rate in seconds */
205 
206 	while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:H:L:P:QT:U")) != -1) {
207 		switch(ch) {
208 		case 'b':
209 			BackLightPct = strtol(optarg, NULL, 10);
210 			break;
211 		case 'c':
212 			AdjustCstate = 1;
213 			break;
214 		case 'd':
215 			DebugOpt = 1;
216 			break;
217 		case 'e':
218 			HasPerfbias = 1;
219 			break;
220 		case 'f':
221 			AdjustCpuFreq = 0;
222 			break;
223 		case 'h':
224 			HighestCpuFreq = strtol(optarg, NULL, 10);
225 			break;
226 		case 'l':
227 			LowestCpuFreq = strtol(optarg, NULL, 10);
228 			break;
229 		case 'p':
230 			Hysteresis = (int)strtol(optarg, NULL, 10);
231 			break;
232 		case 'r':
233 			pollrate = strtod(optarg, NULL);
234 			break;
235 		case 't':
236 			TurboOpt = 0;
237 			break;
238 		case 'u':
239 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
240 			break;
241 		case 'B':
242 			BatLifeMin = strtol(optarg, NULL, 10);
243 			break;
244 		case 'H':
245 			MaxTemp = strtol(optarg, &p2, 0);
246 			if (*p2 == ':') {
247 				MinTemp = MaxTemp;
248 				MaxTemp = strtol(p2 + 1, NULL, 0);
249 			} else {
250 				MinTemp = MaxTemp * 9 / 10;
251 			}
252 			break;
253 		case 'L':
254 			BatShutdownLingerSet = strtol(optarg, NULL, 10);
255 			if (BatShutdownLingerSet < 0)
256 				BatShutdownLingerSet = 0;
257 			break;
258 		case 'P':
259 			BatLifePollIntvl = strtol(optarg, NULL, 10);
260 			break;
261 		case 'Q':
262 			BatShutdownAudioAlert = 0;
263 			break;
264 		case 'T':
265 			srt = strtod(optarg, NULL);
266 			break;
267 		case 'U':
268 			AdjustUsched = 0;
269 			break;
270 		default:
271 			usage();
272 			/* NOT REACHED */
273 		}
274 	}
275 	ac -= optind;
276 	av += optind;
277 
278 	setlinebuf(stdout);
279 
280 	/* Get number of cpus */
281 	get_ncpus();
282 
283 	/* Seed FreqAry[] */
284 	acpi_get_cpufreq(0, &lowest, &highest);
285 
286 	if (Hysteresis < 0 || Hysteresis > 99) {
287 		fprintf(stderr, "Invalid hysteresis value\n");
288 		exit(1);
289 	}
290 
291 	if (TriggerUp < 0 || TriggerUp > 1) {
292 		fprintf(stderr, "Invalid load limit value\n");
293 		exit(1);
294 	}
295 
296 	if (BackLightPct > 100 || BackLightPct <= 0) {
297 		fprintf(stderr, "Invalid backlight setting, ignore\n");
298 		BackLightPct = 100;
299 	}
300 
301 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
302 
303 	/*
304 	 * Make sure powerd is not already running.
305 	 */
306 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
307 	if (PowerFd < 0) {
308 		fprintf(stderr,
309 			"Cannot create /var/run/powerd.pid, "
310 			"continuing anyway\n");
311 	} else {
312 		ssize_t r;
313 		pid_t pid = -1;
314 
315 		r = read(PowerFd, buf, sizeof(buf) - 1);
316 		if (r > 0) {
317 			buf[r] = 0;
318 			pid = strtol(buf, NULL, 0);
319 		}
320 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
321 			if (pid > 0) {
322 				kill(pid, SIGTERM);
323 				flock(PowerFd, LOCK_EX);
324 				fprintf(stderr, "restarting powerd\n");
325 			} else {
326 				fprintf(stderr,
327 					"powerd is already running, "
328 					"unable to kill pid for restart\n");
329 				exit(1);
330 			}
331 		}
332 		lseek(PowerFd, 0L, 0);
333 	}
334 
335 	/*
336 	 * Demonize and set pid
337 	 */
338 	if (DebugOpt == 0) {
339 		daemon(0, 0);
340 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
341 	}
342 
343 	if (PowerFd >= 0) {
344 		ftruncate(PowerFd, 0);
345 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
346 		write(PowerFd, buf, strlen(buf));
347 	}
348 
349 	/* Do we need to monitor battery life? */
350 	if (BatLifePollIntvl <= 0)
351 		monbat = 0;
352 	else
353 		monbat = has_battery();
354 
355 	/* Do we have perfbias(4)? */
356 	if (HasPerfbias)
357 		HasPerfbias = has_perfbias();
358 
359 	/* Could we adjust C-state? */
360 	if (AdjustCstate)
361 		AdjustCstate = probe_cstate();
362 
363 	/*
364 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
365 	 *
366 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
367 	 * taskqueue and ACPI taskqueue is shared across various
368 	 * ACPI modules, any delay in other modules may cause
369 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
370 	 * (e.g. cmbat module's task could take quite a lot of time).
371 	 */
372 	for (;;) {
373 		/* Prime delta cputime calculation. */
374 		get_cputime(pollrate);
375 
376 		/* Wait for all cpus to appear */
377 		if (acpi_get_cpupwrdom())
378 			break;
379 		usleep((int)(pollrate * 1000000.0));
380 	}
381 
382 	/*
383 	 * Catch some signals so that max performance could be restored.
384 	 */
385 	signal(SIGINT, sigintr);
386 	signal(SIGTERM, sigintr);
387 
388 	/* Initialize performance states */
389 	init_perf();
390 
391 	srt = srt / pollrate;	/* convert to sample count */
392 	if (DebugOpt)
393 		printf("samples for downgrading: %5.2f\n", srt);
394 
395 	/*
396 	 * Monitoring loop
397 	 */
398 	while (!stopped) {
399 		/*
400 		 * Monitor performance
401 		 */
402 		get_cputime(pollrate);
403 		mon_cputemp();
404 		mon_perf(srt);
405 
406 		/*
407 		 * Monitor battery
408 		 */
409 		if (monbat)
410 			monbat = mon_battery();
411 
412 		usleep((int)(pollrate * 1000000.0));
413 	}
414 
415 	/*
416 	 * Set to maximum performance if killed.
417 	 */
418 	syslog(LOG_INFO, "killed, setting max and exiting");
419 	if (SavedPXGlobal)
420 		set_global_freq(SavedPXGlobal);
421 	restore_perf();
422 	restore_backlight();
423 
424 	exit(0);
425 }
426 
427 static void
428 sigintr(int signo __unused)
429 {
430 	stopped = 1;
431 }
432 
433 /*
434  * Figure out the cpu power domains.
435  */
436 static int
437 acpi_get_cpupwrdom(void)
438 {
439 	struct cpu_pwrdom *dom;
440 	cpumask_t pwrdom_mask;
441 	char buf[64];
442 	char members[1024];
443 	char *str;
444 	size_t msize;
445 	int n, i, ncpu = 0, dom_id;
446 
447 	memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
448 	memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
449 	CPUMASK_ASSZERO(cpu_pwrdom_mask);
450 
451 	for (i = 0; i < MAXDOM; ++i) {
452 		snprintf(buf, sizeof(buf),
453 			 "hw.acpi.cpu.px_dom%d.available", i);
454 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
455 			continue;
456 
457 		dom = calloc(1, sizeof(*dom));
458 		dom->dom_id = i;
459 
460 		if (cpu_pwrdomain[i] != NULL) {
461 			fprintf(stderr, "cpu power domain %d exists\n", i);
462 			exit(1);
463 		}
464 		cpu_pwrdomain[i] = dom;
465 		CPUMASK_ORBIT(cpu_pwrdom_mask, i);
466 	}
467 	pwrdom_mask = cpu_pwrdom_mask;
468 
469 	while (CPUMASK_TESTNZERO(pwrdom_mask)) {
470 		dom_id = BSFCPUMASK(pwrdom_mask);
471 		CPUMASK_NANDBIT(pwrdom_mask, dom_id);
472 		dom = cpu_pwrdomain[dom_id];
473 
474 		CPUMASK_ASSZERO(dom->dom_cpumask);
475 
476 		snprintf(buf, sizeof(buf),
477 			 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
478 		msize = sizeof(members);
479 		if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
480 			cpu_pwrdomain[dom_id] = NULL;
481 			free(dom);
482 			continue;
483 		}
484 
485 		members[msize] = 0;
486 		for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
487 			n = -1;
488 			sscanf(str, "cpu%d", &n);
489 			if (n >= 0) {
490 				++ncpu;
491 				++dom->dom_ncpus;
492 				CPUMASK_ORBIT(dom->dom_cpumask, n);
493 				cpu2pwrdom[n] = dom->dom_id;
494 			}
495 		}
496 		if (dom->dom_ncpus == 0) {
497 			cpu_pwrdomain[dom_id] = NULL;
498 			free(dom);
499 			continue;
500 		}
501 		if (DebugOpt) {
502 			printf("dom%d cpumask: ", dom->dom_id);
503 			for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
504 				printf("%jx ",
505 				    (uintmax_t)dom->dom_cpumask.ary[i]);
506 			}
507 			printf("\n");
508 		}
509 	}
510 
511 	if (ncpu != NCpus) {
512 		if (DebugOpt)
513 			printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
514 
515 		pwrdom_mask = cpu_pwrdom_mask;
516 		while (CPUMASK_TESTNZERO(pwrdom_mask)) {
517 			dom_id = BSFCPUMASK(pwrdom_mask);
518 			CPUMASK_NANDBIT(pwrdom_mask, dom_id);
519 			dom = cpu_pwrdomain[dom_id];
520 			if (dom != NULL)
521 				free(dom);
522 		}
523 		return 0;
524 	}
525 	return 1;
526 }
527 
528 /*
529  * Save per-cpu load and sum of per-cpu load.
530  */
531 static void
532 get_cputime(double pollrate)
533 {
534 	static struct kinfo_cputime ocpu_time[MAXCPU];
535 	static struct kinfo_cputime ncpu_time[MAXCPU];
536 	size_t slen;
537 	int ncpu;
538 	int cpu;
539 	uint64_t delta;
540 
541 	bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
542 
543 	slen = sizeof(ncpu_time);
544 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
545 		fprintf(stderr, "kern.cputime sysctl not available\n");
546 		exit(1);
547 	}
548 	ncpu = slen / sizeof(ncpu_time[0]);
549 
550 	delta = 0;
551 	for (cpu = 0; cpu < ncpu; ++cpu) {
552 		uint64_t d;
553 
554 		d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
555 		     ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
556 		    (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
557 		     ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
558 		pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
559 
560 		delta += d;
561 	}
562 	global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
563 }
564 
565 static void
566 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
567 {
568 	char buf[256], sysid[64];
569 	size_t buflen;
570 	char *ptr;
571 	int v, highest, lowest;
572 	int freqidx;
573 
574 	/*
575 	 * Retrieve availability list
576 	 */
577 	snprintf(sysid, sizeof(sysid),
578 		 "hw.acpi.cpu.px_dom%d.available", dom_id);
579 	buflen = sizeof(buf) - 1;
580 	if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
581 		return;
582 	buf[buflen] = 0;
583 
584 	/*
585 	 * Parse out the highest and lowest cpu frequencies
586 	 */
587 	ptr = buf;
588 	highest = lowest = 0;
589 	freqidx = 0;
590 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
591 		if ((lowest == 0 || lowest > v) &&
592 		    (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
593 			lowest = v;
594 		if ((highest == 0 || highest < v) &&
595 		    (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
596 			highest = v;
597 		/*
598 		 * Detect turbo mode
599 		 */
600 		if (!TurboOpt && highest - v == 1)
601 			highest = v;
602 		++freqidx;
603 	}
604 
605 	/*
606 	 * Frequency array
607 	 */
608 	if (freqidx > MAXFREQ)
609 		freqidx = MAXFREQ;
610 	if (NFreq != freqidx) {
611 		NFreq = freqidx;
612 		NFreqChanged = NFREQ_ALL;
613 	}
614 	ptr = buf;
615 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
616 		if (freqidx == 0)
617 			break;
618 		if (FreqAry[freqidx - 1] != v)
619 			NFreqChanged = NFREQ_ALL;
620 		FreqAry[--freqidx] = v;
621 	}
622 
623 	*highest0 = highest;
624 	*lowest0 = lowest;
625 }
626 
627 static int
628 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
629 {
630 	char sysid[64];
631 	size_t freqlen;
632 	int freqcnt, i;
633 	int freqary[MAXFREQ];
634 
635 	/*
636 	 * Retrieve availability list
637 	 */
638 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
639 	freqlen = sizeof(FreqAry);
640 	bzero(freqary, sizeof(freqary));
641 	if (sysctlbyname(sysid, freqary, &freqlen, NULL, 0) < 0)
642 		return 0;
643 
644 	freqcnt = freqlen / sizeof(freqary[0]);
645 	if (NFreq != freqcnt) {
646 		NFreq = freqcnt;
647 		NFreqChanged = NFREQ_ALL;
648 	}
649 	if (bcmp(freqary, FreqAry, sizeof(FreqAry)) != 0)
650 		NFreqChanged = NFREQ_ALL;
651 	bcopy(freqary, FreqAry, sizeof(FreqAry));
652 	if (freqcnt == 0)
653 		return 0;
654 
655 	for (i = freqcnt - 1; i >= 0; --i) {
656 		*lowest0 = FreqAry[i];
657 		if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
658 			break;
659 	}
660 
661 	i = 0;
662 	*highest0 = FreqAry[0];
663 	if (!TurboOpt && freqcnt > 1 && FreqAry[0] - FreqAry[1] == 1) {
664 		i = 1;
665 		*highest0 = FreqAry[1];
666 	}
667 	for (; i < freqcnt; ++i) {
668 		if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
669 			break;
670 		*highest0 = FreqAry[i];
671 	}
672 	return 1;
673 }
674 
675 static void
676 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
677 {
678 	*highest = 0;
679 	*lowest = 0;
680 
681 	if (acpi_getcpufreq_bin(dom_id, highest, lowest))
682 		return;
683 	acpi_getcpufreq_str(dom_id, highest, lowest);
684 }
685 
686 static
687 void
688 usage(void)
689 {
690 	fprintf(stderr, "usage: powerd [-cdeftQU] [-p hysteresis] "
691 	    "[-h highest_freq] [-l lowest_freq] "
692 	    "[-r poll_interval] [-u trigger_up] "
693 	    "[-B min_battery_life] [-L low_battery_linger] "
694 	    "[-P battery_poll_interval] [-T sample_interval] "
695 	    "[-b backlight]\n");
696 	exit(1);
697 }
698 
699 #ifndef timespecsub
700 #define timespecsub(vvp, uvp)						\
701 	do {								\
702 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
703 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
704 		if ((vvp)->tv_nsec < 0) {				\
705 			(vvp)->tv_sec--;				\
706 			(vvp)->tv_nsec += 1000000000;			\
707 		}							\
708 	} while (0)
709 #endif
710 
711 #define BAT_SYSCTL_TIME_MAX	50000000 /* unit: nanosecond */
712 
713 static int
714 has_battery(void)
715 {
716 	struct timespec s, e;
717 	size_t len;
718 	int val;
719 
720 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
721 	BatLifePrevT = s;
722 
723 	len = sizeof(val);
724 	if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
725 		/* No AC line information */
726 		return 0;
727 	}
728 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
729 
730 	timespecsub(&e, &s);
731 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
732 		/* hw.acpi.acline takes to long to be useful */
733 		syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
734 		return 0;
735 	}
736 
737 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
738 	len = sizeof(val);
739 	if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
740 		/* No battery life */
741 		return 0;
742 	}
743 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
744 
745 	timespecsub(&e, &s);
746 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
747 		/* hw.acpi.battery.life takes to long to be useful */
748 		syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
749 		return 0;
750 	}
751 	return 1;
752 }
753 
754 static void
755 low_battery_alert(int life)
756 {
757 	int fmt, stereo, freq;
758 	int fd;
759 
760 	syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
761 	    life, BatShutdownLingerCnt);
762 	++BatShutdownLingerCnt;
763 
764 	if (!BatShutdownAudioAlert)
765 		return;
766 
767 	fd = open("/dev/dsp", O_WRONLY);
768 	if (fd < 0)
769 		return;
770 
771 	fmt = AFMT_S16_LE;
772 	if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
773 		goto done;
774 
775 	stereo = 0;
776 	if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
777 		goto done;
778 
779 	freq = 44100;
780 	if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
781 		goto done;
782 
783 	write(fd, alert1, sizeof(alert1));
784 	write(fd, alert1, sizeof(alert1));
785 
786 done:
787 	close(fd);
788 }
789 
790 static int
791 mon_battery(void)
792 {
793 	struct timespec cur, ts;
794 	int acline, life;
795 	size_t len;
796 
797 	clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
798 	ts = cur;
799 	timespecsub(&ts, &BatLifePrevT);
800 	if (ts.tv_sec < BatLifePollIntvl)
801 		return 1;
802 	BatLifePrevT = cur;
803 
804 	len = sizeof(acline);
805 	if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
806 		return 1;
807 	if (acline) {
808 		BatShutdownLinger = -1;
809 		BatShutdownLingerCnt = 0;
810 		restore_backlight();
811 		return 1;
812 	}
813 
814 	if (!BackLightDown && BackLightPct != 100) {
815 		int backlight_max, backlight;
816 
817 		len = sizeof(backlight_max);
818 		if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
819 		    NULL, 0) < 0) {
820 			/* No more backlight adjustment */
821 			BackLightPct = 100;
822 			goto after_backlight;
823 		}
824 
825 		len = sizeof(OldBackLightLevel);
826 		if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
827 		    NULL, 0) < 0) {
828 			/* No more backlight adjustment */
829 			BackLightPct = 100;
830 			goto after_backlight;
831 		}
832 
833 		backlight = (backlight_max * BackLightPct) / 100;
834 		if (backlight >= OldBackLightLevel) {
835 			/* No more backlight adjustment */
836 			BackLightPct = 100;
837 			goto after_backlight;
838 		}
839 
840 		if (sysctlbyname("hw.backlight_level", NULL, NULL,
841 		    &backlight, sizeof(backlight)) < 0) {
842 			/* No more backlight adjustment */
843 			BackLightPct = 100;
844 			goto after_backlight;
845 		}
846 		BackLightDown = 1;
847 	}
848 after_backlight:
849 
850 	len = sizeof(life);
851 	if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
852 		return 1;
853 
854 	if (BatShutdownLinger > 0) {
855 		ts = cur;
856 		timespecsub(&ts, &BatShutdownStartT);
857 		if (ts.tv_sec > BatShutdownLinger)
858 			BatShutdownLinger = 0;
859 	}
860 
861 	if (life <= BatLifeMin) {
862 		if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
863 			syslog(LOG_ALERT, "low battery life %d%%, "
864 			    "shutting down", life);
865 			if (vfork() == 0)
866 				execlp("poweroff", "poweroff", NULL);
867 			return 0;
868 		} else if (BatShutdownLinger < 0) {
869 			BatShutdownLinger = BatShutdownLingerSet;
870 			BatShutdownStartT = cur;
871 		}
872 		low_battery_alert(life);
873 	}
874 	return 1;
875 }
876 
877 static void
878 get_ncpus(void)
879 {
880 	size_t slen;
881 
882 	slen = sizeof(NCpus);
883 	if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
884 		err(1, "sysctlbyname hw.ncpu failed");
885 	if (DebugOpt)
886 		printf("hw.ncpu %d\n", NCpus);
887 }
888 
889 static void
890 get_uschedcpus(void)
891 {
892 	size_t slen;
893 
894 	slen = sizeof(usched_cpu_used);
895 	if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
896 	    NULL, 0) < 0)
897 		err(1, "sysctlbyname kern.usched_global_cpumask failed");
898 	if (DebugOpt) {
899 		int i;
900 
901 		printf("usched cpumask was: ");
902 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
903 			printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
904 		printf("\n");
905 	}
906 }
907 
908 static void
909 set_uschedcpus(void)
910 {
911 	if (DebugOpt) {
912 		int i;
913 
914 		printf("usched cpumask: ");
915 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
916 			printf("%jx ",
917 			    (uintmax_t)usched_cpu_used.ary[i]);
918 		}
919 		printf("\n");
920 	}
921 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
922 	    &usched_cpu_used, sizeof(usched_cpu_used));
923 }
924 
925 static int
926 has_perfbias(void)
927 {
928 	size_t len;
929 	int hint;
930 
931 	len = sizeof(hint);
932 	if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
933 		return 0;
934 	return 1;
935 }
936 
937 static void
938 set_perfbias(int cpu, int inc)
939 {
940 	int hint = inc ? 0 : 15;
941 	char sysid[64];
942 
943 	if (DebugOpt)
944 		printf("cpu%d set perfbias hint %d\n", cpu, hint);
945 	snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
946 	sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
947 }
948 
949 static void
950 init_perf(void)
951 {
952 	struct cpu_state *state;
953 	int cpu;
954 
955 	/* Get usched cpumask */
956 	get_uschedcpus();
957 
958 	/*
959 	 * Assume everything are used and are maxed out, before we
960 	 * start.
961 	 */
962 	CPUMASK_ASSBMASK(cpu_used, NCpus);
963 	cpu_pwrdom_used = cpu_pwrdom_mask;
964 	global_pcpu_limit = NCpus;
965 
966 	for (cpu = 0; cpu < NCpus; ++cpu) {
967 		state = &pcpu_state[cpu];
968 
969 		state->cpu_uavg = 0.0;
970 		state->cpu_davg = 0.0;
971 		state->cpu_limit = 1;
972 		state->cpu_count = 1;
973 		snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
974 		    cpu);
975 	}
976 
977 	state = &global_cpu_state;
978 	state->cpu_uavg = 0.0;
979 	state->cpu_davg = 0.0;
980 	state->cpu_limit = NCpus;
981 	state->cpu_count = NCpus;
982 	strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
983 }
984 
985 static int
986 get_nstate(struct cpu_state *state, double srt)
987 {
988 	int ustate, dstate, nstate;
989 
990 	/* speeding up */
991 	state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
992 	/* slowing down */
993 	state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
994 	if (state->cpu_davg < state->cpu_uavg)
995 		state->cpu_davg = state->cpu_uavg;
996 
997 	ustate = state->cpu_uavg / TriggerUp;
998 	if (ustate < state->cpu_limit)
999 		ustate = state->cpu_uavg / TriggerDown;
1000 	dstate = state->cpu_davg / TriggerUp;
1001 	if (dstate < state->cpu_limit)
1002 		dstate = state->cpu_davg / TriggerDown;
1003 
1004 	nstate = (ustate > dstate) ? ustate : dstate;
1005 	if (nstate > state->cpu_count)
1006 		nstate = state->cpu_count;
1007 
1008 	if (DebugOpt) {
1009 		printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
1010 		    "%2d ncpus=%d\n", state->cpu_name,
1011 		    state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
1012 		    state->cpu_limit, nstate);
1013 	}
1014 	return nstate;
1015 }
1016 
1017 static void
1018 mon_perf(double srt)
1019 {
1020 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1021 	int pnstate = 0, nstate;
1022 	int cpu;
1023 
1024 	/*
1025 	 * Find cpus requiring performance and their cooresponding power
1026 	 * domains.  Save the number of cpus requiring performance in
1027 	 * pnstate.
1028 	 */
1029 	ocpu_used = cpu_used;
1030 	ocpu_pwrdom_used = cpu_pwrdom_used;
1031 
1032 	CPUMASK_ASSZERO(cpu_used);
1033 	CPUMASK_ASSZERO(cpu_pwrdom_used);
1034 
1035 	for (cpu = 0; cpu < NCpus; ++cpu) {
1036 		struct cpu_state *state = &pcpu_state[cpu];
1037 		int s;
1038 
1039 		s = get_nstate(state, srt);
1040 		if (s) {
1041 			CPUMASK_ORBIT(cpu_used, cpu);
1042 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1043 		}
1044 		pnstate += s;
1045 
1046 		state->cpu_limit = s;
1047 	}
1048 
1049 	/*
1050 	 * Calculate nstate, the number of cpus we wish to run at max
1051 	 * performance.
1052 	 */
1053 	nstate = get_nstate(&global_cpu_state, srt);
1054 
1055 	if (nstate == global_cpu_state.cpu_limit &&
1056 	    (NFreqChanged & NFREQ_MONPERF) == 0 &&
1057 	    (pnstate == global_pcpu_limit || nstate > pnstate)) {
1058 		/* Nothing changed; keep the sets */
1059 		cpu_used = ocpu_used;
1060 		cpu_pwrdom_used = ocpu_pwrdom_used;
1061 
1062 		global_pcpu_limit = pnstate;
1063 		return;
1064 	}
1065 	NFreqChanged &= ~NFREQ_MONPERF;
1066 	global_pcpu_limit = pnstate;
1067 
1068 	if (nstate > pnstate) {
1069 		/*
1070 		 * Add spare cpus to meet global performance requirement.
1071 		 */
1072 		add_spare_cpus(ocpu_used, nstate - pnstate);
1073 	}
1074 
1075 	global_cpu_state.cpu_limit = nstate;
1076 
1077 	/*
1078 	 * Adjust cpu and cpu power domain performance
1079 	 */
1080 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1081 }
1082 
1083 static void
1084 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
1085 {
1086 	cpumask_t saved_pwrdom, xcpu_used;
1087 	int done = 0, cpu;
1088 
1089 	/*
1090 	 * Find more cpus in the previous cpu set.
1091 	 */
1092 	xcpu_used = cpu_used;
1093 	CPUMASK_XORMASK(xcpu_used, ocpu_used);
1094 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1095 		cpu = BSFCPUMASK(xcpu_used);
1096 		CPUMASK_NANDBIT(xcpu_used, cpu);
1097 
1098 		if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1099 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1100 			CPUMASK_ORBIT(cpu_used, cpu);
1101 			--ncpu;
1102 			if (ncpu == 0)
1103 				return;
1104 		}
1105 	}
1106 
1107 	/*
1108 	 * Find more cpus in the used cpu power domains.
1109 	 */
1110 	saved_pwrdom = cpu_pwrdom_used;
1111 again:
1112 	while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1113 		cpumask_t unused_cpumask;
1114 		int dom;
1115 
1116 		dom = BSFCPUMASK(saved_pwrdom);
1117 		CPUMASK_NANDBIT(saved_pwrdom, dom);
1118 
1119 		unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1120 		CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1121 
1122 		while (CPUMASK_TESTNZERO(unused_cpumask)) {
1123 			cpu = BSFCPUMASK(unused_cpumask);
1124 			CPUMASK_NANDBIT(unused_cpumask, cpu);
1125 
1126 			CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1127 			CPUMASK_ORBIT(cpu_used, cpu);
1128 			--ncpu;
1129 			if (ncpu == 0)
1130 				return;
1131 		}
1132 	}
1133 	if (!done) {
1134 		done = 1;
1135 		/*
1136 		 * Find more cpus in unused cpu power domains
1137 		 */
1138 		saved_pwrdom = cpu_pwrdom_mask;
1139 		CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1140 		goto again;
1141 	}
1142 	if (DebugOpt)
1143 		printf("%d cpus not found\n", ncpu);
1144 }
1145 
1146 static void
1147 acpi_set_cpufreq(int dom, int inc)
1148 {
1149 	int lowest, highest, desired;
1150 	char sysid[64];
1151 
1152 	acpi_get_cpufreq(dom, &highest, &lowest);
1153 	if (highest == 0 || lowest == 0)
1154 		return;
1155 	desired = inc ? highest : lowest;
1156 
1157 	if (DebugOpt)
1158 		printf("dom%d set frequency %d\n", dom, desired);
1159 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1160 	sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1161 }
1162 
1163 static void
1164 adj_cpu_pwrdom(int dom, int inc)
1165 {
1166 	if (AdjustCpuFreq && (inc == 0 || AdjustCpuFreqOverride == 0))
1167 		acpi_set_cpufreq(dom, inc);
1168 }
1169 
1170 static void
1171 adj_cpu_perf(int cpu, int inc)
1172 {
1173 	if (DebugOpt) {
1174 		if (inc)
1175 			printf("cpu%d increase perf\n", cpu);
1176 		else
1177 			printf("cpu%d decrease perf\n", cpu);
1178 	}
1179 
1180 	if (HasPerfbias)
1181 		set_perfbias(cpu, inc);
1182 	if (AdjustCstate)
1183 		set_cstate(cpu, inc);
1184 }
1185 
1186 static void
1187 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1188 {
1189 	int cpu, inc;
1190 
1191 	if (AdjustUsched) {
1192 		cpumask_t old_usched_used;
1193 
1194 		/*
1195 		 * Set cpus requiring performance to the userland process
1196 		 * scheduler.  Leave the rest of cpus unmapped.
1197 		 */
1198 		old_usched_used = usched_cpu_used;
1199 		usched_cpu_used = cpu_used;
1200 		if (CPUMASK_TESTZERO(usched_cpu_used))
1201 			CPUMASK_ORBIT(usched_cpu_used, 0);
1202 		if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1203 			set_uschedcpus();
1204 	}
1205 
1206 	/*
1207 	 * Adjust per-cpu performance for any cpus which changed.
1208 	 */
1209 	CPUMASK_XORMASK(xcpu_used, cpu_used);
1210 	if (NFreqChanged & NFREQ_ADJPERF)
1211 		CPUMASK_ASSBMASK(xcpu_used, NCpus);
1212 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1213 		cpu = BSFCPUMASK(xcpu_used);
1214 		CPUMASK_NANDBIT(xcpu_used, cpu);
1215 
1216 		if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1217 			/* Increase cpu performance */
1218 			inc = 1;
1219 		} else {
1220 			/* Decrease cpu performance */
1221 			inc = 0;
1222 		}
1223 		adj_cpu_perf(cpu, inc);
1224 	}
1225 
1226 	/*
1227 	 * Adjust cpu power domain performance.  This could affect
1228 	 * a set of cpus.
1229 	 */
1230 	CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1231 	if (NFreqChanged & NFREQ_ADJPERF)
1232 		CPUMASK_ASSBMASK(xcpu_pwrdom_used, NCpus);
1233 	while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1234 		int dom;
1235 
1236 		dom = BSFCPUMASK(xcpu_pwrdom_used);
1237 		CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1238 
1239 		if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1240 			/* Increase cpu power domain performance */
1241 			inc = 1;
1242 		} else {
1243 			/* Decrease cpu power domain performance */
1244 			inc = 0;
1245 		}
1246 		adj_cpu_pwrdom(dom, inc);
1247 	}
1248 	NFreqChanged &= ~NFREQ_ADJPERF;
1249 }
1250 
1251 static void
1252 restore_perf(void)
1253 {
1254 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1255 
1256 	/* Remove highest cpu frequency limitation */
1257 	HighestCpuFreq = 0;
1258 
1259 	ocpu_used = cpu_used;
1260 	ocpu_pwrdom_used = cpu_pwrdom_used;
1261 
1262 	/* Max out all cpus and cpu power domains performance */
1263 	CPUMASK_ASSBMASK(cpu_used, NCpus);
1264 	cpu_pwrdom_used = cpu_pwrdom_mask;
1265 
1266 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1267 
1268 	if (AdjustCstate) {
1269 		/*
1270 		 * Restore the original mwait C-state
1271 		 */
1272 		if (DebugOpt)
1273 			printf("global set cstate %s\n", orig_global_cx);
1274 		sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1275 		    orig_global_cx, strlen(orig_global_cx) + 1);
1276 	}
1277 }
1278 
1279 static int
1280 probe_cstate(void)
1281 {
1282 	char cx_supported[1024];
1283 	const char *target;
1284 	char *ptr;
1285 	int idle_hlt, deep = 1;
1286 	size_t len;
1287 
1288 	len = sizeof(idle_hlt);
1289 	if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1290 		return 0;
1291 	if (idle_hlt != 1)
1292 		return 0;
1293 
1294 	len = sizeof(cx_supported);
1295 	if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1296 	    NULL, 0) < 0)
1297 		return 0;
1298 
1299 	len = sizeof(orig_global_cx);
1300 	if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1301 	    NULL, 0) < 0)
1302 		return 0;
1303 
1304 	strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1305 	cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1306 	if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1307 	    cpu_perf_cx, cpu_perf_cxlen) < 0) {
1308 		/* AUTODEEP is not supported; try AUTO */
1309 		deep = 0;
1310 		strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1311 		cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1312 		if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1313 		    cpu_perf_cx, cpu_perf_cxlen) < 0)
1314 			return 0;
1315 	}
1316 
1317 	if (!deep)
1318 		target = "C2/0";
1319 	else
1320 		target = NULL;
1321 	for (ptr = strtok(cx_supported, " "); ptr != NULL;
1322 	     ptr = strtok(NULL, " ")) {
1323 		if (target == NULL ||
1324 		    (target != NULL && strcmp(ptr, target) == 0)) {
1325 			strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1326 			cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1327 			if (target != NULL)
1328 				break;
1329 		}
1330 	}
1331 	if (cpu_idle_cxlen == 0)
1332 		return 0;
1333 
1334 	if (DebugOpt) {
1335 		printf("cstate orig %s, perf %s, idle %s\n",
1336 		    orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1337 	}
1338 	return 1;
1339 }
1340 
1341 static void
1342 set_cstate(int cpu, int inc)
1343 {
1344 	const char *cst;
1345 	char sysid[64];
1346 	size_t len;
1347 
1348 	if (inc) {
1349 		cst = cpu_perf_cx;
1350 		len = cpu_perf_cxlen;
1351 	} else {
1352 		cst = cpu_idle_cx;
1353 		len = cpu_idle_cxlen;
1354 	}
1355 
1356 	if (DebugOpt)
1357 		printf("cpu%d set cstate %s\n", cpu, cst);
1358 	snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1359 	sysctlbyname(sysid, NULL, NULL, cst, len);
1360 }
1361 
1362 static void
1363 restore_backlight(void)
1364 {
1365 	if (BackLightDown) {
1366 		BackLightDown = 0;
1367 		sysctlbyname("hw.backlight_level", NULL, NULL,
1368 		    &OldBackLightLevel, sizeof(OldBackLightLevel));
1369 	}
1370 }
1371 
1372 /*
1373  * get_cputemp() / mon_cputemp()
1374  *
1375  * This enforces the maximum cpu frequency based on temperature
1376  * verses MinTemp and MaxTemp.
1377  */
1378 static int
1379 get_cputemp(void)
1380 {
1381 	char sysid[64];
1382 	struct sensor sensor;
1383 	size_t sensor_size;
1384 	int t;
1385 	int mt = -1;
1386 	int n;
1387 
1388 	for (n = 0; ; ++n) {
1389 		t = 0;
1390 		snprintf(sysid, sizeof(sysid),
1391 			 "hw.sensors.cpu_node%d.temp0", n);
1392 		sensor_size = sizeof(sensor);
1393 		if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1394 			break;
1395 		t = -1;
1396 		if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1397 			t = (int)((sensor.value - 273150000) / 1000000);
1398 			if (mt < t)
1399 				mt = t;
1400 		}
1401 	}
1402 	if (n)
1403 		return mt;
1404 
1405 	/*
1406 	 * Missing nodeN for some reason, try cpuN.
1407 	 */
1408 	for (n = 0; ; ++n) {
1409 		t = 0;
1410 		snprintf(sysid, sizeof(sysid),
1411 			 "hw.sensors.cpu%d.temp0", n);
1412 		sensor_size = sizeof(sensor);
1413 		if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1414 			break;
1415 		t = -1;
1416 		if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1417 			t = (int)((sensor.value - 273150000) / 1000000);
1418 			if (mt < t)
1419 				mt = t;
1420 		}
1421 	}
1422 	return mt;
1423 }
1424 
1425 static void
1426 set_global_freq(int freq)
1427 {
1428 	if (freq > 0)
1429 		sysctlbyname("hw.acpi.cpu.px_global",
1430 			     NULL, NULL, &freq, sizeof(freq));
1431 }
1432 
1433 static int
1434 get_global_freq(void)
1435 {
1436 	int freq;
1437 	size_t freq_size;
1438 
1439 	freq = -1;
1440 	freq_size = sizeof(freq);
1441 	sysctlbyname("hw.acpi.cpu.px_global", &freq, &freq_size, NULL, 0);
1442 
1443 	return freq;
1444 }
1445 
1446 static void
1447 mon_cputemp(void)
1448 {
1449 	static int last_temp = -1;
1450 	static int last_idx = -1;
1451 	int temp = get_cputemp();
1452 	int idx;
1453 	int lowest;
1454 	int highest;
1455 	static int CurPXGlobal __unused;
1456 
1457 	/*
1458 	 * Reseed FreqAry, it can change w/AC power state
1459 	 */
1460 	acpi_get_cpufreq(0, &lowest, &highest);
1461 
1462 	/*
1463 	 * Some cpu frequency steps can cause large shifts in cpu temperature,
1464 	 * creating an oscillation that min-maxes the temperature in a way
1465 	 * that is not desireable.  To deal with this, we impose an exponential
1466 	 * average for any temperature change.
1467 	 *
1468 	 * We have to do this in both directions, otherwise (in particular)
1469 	 * laptop fan responsiveness and temperature sensor response times
1470 	 * can create major frequency oscillations.
1471 	 */
1472 	if (last_temp < 0 || (NFreqChanged & NFREQ_CPUTEMP)) {
1473 		NFreqChanged &= ~NFREQ_CPUTEMP;
1474 		last_temp = temp << 8;
1475 	} else if (temp < last_temp) {
1476 		last_temp = (last_temp * 15 + (temp << 8)) / 16;
1477 		if (DebugOpt) {
1478 			printf("Falling temp %d (use %d)\n",
1479 				temp, (last_temp >> 8));
1480 		}
1481 	} else {
1482 		last_temp = (last_temp * 15 + (temp << 8)) / 16;
1483 		if (DebugOpt) {
1484 			printf("Rising temp %d (use %d)\n",
1485 				temp, (last_temp >> 8));
1486 		}
1487 	}
1488 	temp = last_temp >> 8;
1489 
1490 	/*
1491 	 * CPU Temp not available or available frequencies not yet
1492 	 * probed.
1493 	 */
1494 	if (DebugOpt)
1495 		printf("Temp %d {%d-%d} NFreq=%d)\n",
1496 		       temp, MinTemp, MaxTemp, NFreq);
1497 	if (temp <= 0)
1498 		return;
1499 	if (NFreq == 0)
1500 		return;
1501 
1502 	/*
1503 	 * Return to normal operation if under the minimum
1504 	 */
1505 	if (temp <= MinTemp) {
1506 		if (AdjustCpuFreqOverride) {
1507 			AdjustCpuFreqOverride = 0;
1508 			CurPXGlobal = 0;
1509 			NFreqChanged = NFREQ_ALL;
1510 			last_idx = -1;
1511 			syslog(LOG_ALERT,
1512 			       "Temp below %d, returning to normal operation",
1513 			       MinTemp);
1514 			if (SavedPXGlobal)
1515 				set_global_freq(SavedPXGlobal);
1516 		}
1517 		return;
1518 	}
1519 
1520 	/*
1521 	 * Hysteresis before entering temperature control mode
1522 	 */
1523 	if (AdjustCpuFreqOverride == 0 &&
1524 	    temp <= MinTemp + (MaxTemp - MinTemp) / 10 + 1) {
1525 		return;
1526 	}
1527 
1528 	/*
1529 	 * Override frequency controls (except for idle -> lowest)
1530 	 */
1531 	if (AdjustCpuFreqOverride == 0) {
1532 		AdjustCpuFreqOverride = 1;
1533 		SavedPXGlobal = get_global_freq();
1534 		CurPXGlobal = 0;
1535 		NFreqChanged = NFREQ_ALL;
1536 		last_idx = -1;
1537 		syslog(LOG_ALERT,
1538 		       "Temp %d {%d-%d}, entering temperature control mode",
1539 		       temp, MinTemp, MaxTemp);
1540 	}
1541 	if (temp > MaxTemp + (MaxTemp - MinTemp) / 10 + 1) {
1542 		syslog(LOG_ALERT,
1543 		       "Temp %d {%d-%d}, TOO HOT!!!",
1544 		       temp, MinTemp, MaxTemp);
1545 	}
1546 	idx = (temp - MinTemp) * NFreq / (MaxTemp - MinTemp);
1547 	if (idx < 0 || idx >= NFreq)	/* overtemp */
1548 		idx = NFreq - 1;
1549 
1550 	/*
1551 	 * Limit frequency shifts to single steps in both directions.
1552 	 * Some fans react very quickly, this will reduce oscillations.
1553 	 */
1554 	if (DebugOpt)
1555 		printf("Temp index %d (use %d)\n", idx, last_idx);
1556 	if (last_idx >= 0 && idx < last_idx)
1557 		idx = last_idx - 1;
1558 	else if (last_idx >= 0 && idx > last_idx)
1559 		idx = last_idx + 1;
1560 	last_idx = idx;
1561 
1562 	/*
1563 	 * One last thing, make sure our frequency adheres to
1564 	 * HighestCpuFreq.  However, override LowestCpuFreq for
1565 	 * temperature control purposes.
1566 	 */
1567 	while (HighestCpuFreq > 0 && idx < NFreq &&
1568 	       FreqAry[idx] > HighestCpuFreq) {
1569 		++idx;
1570 	}
1571 #if 0
1572 	/*
1573 	 * Currently ignore LowestCpuFreq if temp control thinks it
1574 	 * needs to go lower
1575 	 */
1576 	while (LowestCpuFreq > 0 && idx > 0 &&
1577 	       FreqAry[idx] < LowestCpuFreq) {
1578 		--idx;
1579 	}
1580 #endif
1581 
1582 	if (FreqAry[idx] != CurPXGlobal) {
1583 		CurPXGlobal = FreqAry[idx];
1584 
1585 #if 0
1586 		/* this can get noisy so don't log for now */
1587 		syslog(LOG_ALERT,
1588 		       "Temp %d {%d-%d}, set frequency %d",
1589 		       temp, MinTemp, MaxTemp, CurPXGlobal);
1590 #endif
1591 	}
1592 	set_global_freq(CurPXGlobal);
1593 }
1594