xref: /dragonfly/usr.sbin/powerd/powerd.c (revision 9348a738)
1 /*
2  * Copyright (c) 2010,2016 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42 
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/sensors.h>
51 #include <sys/time.h>
52 #include <machine/cpufunc.h>
53 #include <machine/cpumask.h>
54 #include <err.h>
55 #include <signal.h>
56 #include <stdio.h>
57 #include <stdlib.h>
58 #include <unistd.h>
59 #include <string.h>
60 #include <syslog.h>
61 
62 #include "alert1.h"
63 
64 #define MAXDOM		MAXCPU	/* worst case, 1 cpu per domain */
65 
66 #define MAXFREQ		64
67 #define CST_STRLEN	16
68 
69 #define NFREQ_MONPERF	0x0001
70 #define NFREQ_ADJPERF	0x0002
71 #define NFREQ_CPUTEMP	0x0004
72 
73 #define NFREQ_ALL	(NFREQ_MONPERF | NFREQ_ADJPERF | NFREQ_CPUTEMP)
74 
75 struct cpu_pwrdom {
76 	TAILQ_ENTRY(cpu_pwrdom)	dom_link;
77 	int			dom_id;
78 	int			dom_ncpus;
79 	cpumask_t		dom_cpumask;
80 };
81 
82 struct cpu_state {
83 	double			cpu_qavg;
84 	double			cpu_uavg;	/* used for speeding up */
85 	double			cpu_davg;	/* used for slowing down */
86 	int			cpu_limit;
87 	int			cpu_count;
88 	char			cpu_name[8];
89 };
90 
91 static void usage(void);
92 static void get_ncpus(void);
93 static void mon_cputemp(void);
94 
95 /* usched cpumask */
96 static void get_uschedcpus(void);
97 static void set_uschedcpus(void);
98 
99 /* perfbias(4) */
100 static int has_perfbias(void);
101 static void set_perfbias(int, int);
102 
103 /* acpi(4) P-state */
104 static void acpi_getcpufreq_str(int, int *, int *);
105 static int acpi_getcpufreq_bin(int, int *, int *);
106 static void acpi_get_cpufreq(int, int *, int *);
107 static void acpi_set_cpufreq(int, int);
108 static int acpi_get_cpupwrdom(void);
109 
110 /* mwait C-state hint */
111 static int probe_cstate(void);
112 static void set_cstate(int, int);
113 
114 /* Performance monitoring */
115 static void init_perf(void);
116 static void mon_perf(double);
117 static void adj_perf(cpumask_t, cpumask_t);
118 static void adj_cpu_pwrdom(int, int);
119 static void adj_cpu_perf(int, int);
120 static void get_cputime(double);
121 static int get_nstate(struct cpu_state *, double);
122 static void add_spare_cpus(const cpumask_t, int);
123 static void restore_perf(void);
124 static void set_global_freq(int freq);
125 
126 /* Battery monitoring */
127 static int has_battery(void);
128 static int mon_battery(void);
129 static void low_battery_alert(int);
130 
131 /* Backlight */
132 static void restore_backlight(void);
133 
134 /* Runtime states for performance monitoring */
135 static int global_pcpu_limit;
136 static struct cpu_state pcpu_state[MAXCPU];
137 static struct cpu_state global_cpu_state;
138 static cpumask_t cpu_used;		/* cpus w/ high perf */
139 static cpumask_t cpu_pwrdom_used;	/* cpu power domains w/ high perf */
140 static cpumask_t usched_cpu_used;	/* cpus for usched */
141 
142 /* Constants */
143 static cpumask_t cpu_pwrdom_mask;	/* usable cpu power domains */
144 static int cpu2pwrdom[MAXCPU];		/* cpu to cpu power domain map */
145 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
146 static int NCpus;			/* # of cpus */
147 static char orig_global_cx[CST_STRLEN];
148 static char cpu_perf_cx[CST_STRLEN];
149 static int cpu_perf_cxlen;
150 static char cpu_idle_cx[CST_STRLEN];
151 static int cpu_idle_cxlen;
152 static int FreqAry[MAXFREQ];
153 static int NFreq;
154 static int NFreqChanged = NFREQ_ALL;
155 static int SavedPXGlobal;
156 
157 static int DebugOpt;
158 static int TurboOpt = 1;
159 static int PowerFd;
160 static int Hysteresis = 10;	/* percentage */
161 static double TriggerUp = 0.25;	/* single-cpu load to force max freq */
162 static double TriggerDown;	/* load per cpu to force the min freq */
163 static int HasPerfbias = 0;
164 static int AdjustCpuFreq = 1;
165 static int AdjustCstate = 0;
166 static int HighestCpuFreq;
167 static int LowestCpuFreq;
168 
169 static int AdjustCpuFreqOverride;
170 
171 static volatile int stopped;
172 
173 /* Battery life monitoring */
174 static int BatLifeMin = 2;	/* shutdown the box, if low on battery life */
175 static struct timespec BatLifePrevT;
176 static int BatLifePollIntvl = 5; /* unit: sec */
177 static struct timespec BatShutdownStartT;
178 static int BatShutdownLinger = -1;
179 static int BatShutdownLingerSet = 60; /* unit: sec */
180 static int BatShutdownLingerCnt;
181 static int BatShutdownAudioAlert = 1;
182 static int MinTemp = 75;
183 static int MaxTemp = 85;
184 static int BackLightPct = 100;
185 static int OldBackLightLevel;
186 static int BackLightDown;
187 
188 static void sigintr(int signo);
189 
190 int
191 main(int ac, char **av)
192 {
193 	double srt;
194 	double pollrate;
195 	int ch;
196 	int lowest;
197 	int highest;
198 	char buf[64];
199 	int monbat;
200 	char *p2;
201 
202 	srt = 8.0;	/* time for samples - 8 seconds */
203 	pollrate = 1.0;	/* polling rate in seconds */
204 
205 	while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:H:L:P:QT:")) != -1) {
206 		switch(ch) {
207 		case 'b':
208 			BackLightPct = strtol(optarg, NULL, 10);
209 			break;
210 		case 'c':
211 			AdjustCstate = 1;
212 			break;
213 		case 'd':
214 			DebugOpt = 1;
215 			break;
216 		case 'e':
217 			HasPerfbias = 1;
218 			break;
219 		case 'f':
220 			AdjustCpuFreq = 0;
221 			break;
222 		case 'h':
223 			HighestCpuFreq = strtol(optarg, NULL, 10);
224 			break;
225 		case 'l':
226 			LowestCpuFreq = strtol(optarg, NULL, 10);
227 			break;
228 		case 'p':
229 			Hysteresis = (int)strtol(optarg, NULL, 10);
230 			break;
231 		case 'r':
232 			pollrate = strtod(optarg, NULL);
233 			break;
234 		case 't':
235 			TurboOpt = 0;
236 			break;
237 		case 'u':
238 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
239 			break;
240 		case 'B':
241 			BatLifeMin = strtol(optarg, NULL, 10);
242 			break;
243 		case 'H':
244 			MaxTemp = strtol(optarg, &p2, 0);
245 			if (*p2 == ':') {
246 				MinTemp = MaxTemp;
247 				MaxTemp = strtol(p2 + 1, NULL, 0);
248 			} else {
249 				MinTemp = MaxTemp * 9 / 10;
250 			}
251 			break;
252 		case 'L':
253 			BatShutdownLingerSet = strtol(optarg, NULL, 10);
254 			if (BatShutdownLingerSet < 0)
255 				BatShutdownLingerSet = 0;
256 			break;
257 		case 'P':
258 			BatLifePollIntvl = strtol(optarg, NULL, 10);
259 			break;
260 		case 'Q':
261 			BatShutdownAudioAlert = 0;
262 			break;
263 		case 'T':
264 			srt = strtod(optarg, NULL);
265 			break;
266 		default:
267 			usage();
268 			/* NOT REACHED */
269 		}
270 	}
271 	ac -= optind;
272 	av += optind;
273 
274 	setlinebuf(stdout);
275 
276 	/* Get number of cpus */
277 	get_ncpus();
278 
279 	/* Seed FreqAry[] */
280 	acpi_get_cpufreq(0, &lowest, &highest);
281 
282 	if (Hysteresis < 0 || Hysteresis > 99) {
283 		fprintf(stderr, "Invalid hysteresis value\n");
284 		exit(1);
285 	}
286 
287 	if (TriggerUp < 0 || TriggerUp > 1) {
288 		fprintf(stderr, "Invalid load limit value\n");
289 		exit(1);
290 	}
291 
292 	if (BackLightPct > 100 || BackLightPct <= 0) {
293 		fprintf(stderr, "Invalid backlight setting, ignore\n");
294 		BackLightPct = 100;
295 	}
296 
297 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
298 
299 	/*
300 	 * Make sure powerd is not already running.
301 	 */
302 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
303 	if (PowerFd < 0) {
304 		fprintf(stderr,
305 			"Cannot create /var/run/powerd.pid, "
306 			"continuing anyway\n");
307 	} else {
308 		ssize_t r;
309 		pid_t pid = -1;
310 
311 		r = read(PowerFd, buf, sizeof(buf) - 1);
312 		if (r > 0) {
313 			buf[r] = 0;
314 			pid = strtol(buf, NULL, 0);
315 		}
316 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
317 			if (pid > 0) {
318 				kill(pid, SIGTERM);
319 				flock(PowerFd, LOCK_EX);
320 				fprintf(stderr, "restarting powerd\n");
321 			} else {
322 				fprintf(stderr,
323 					"powerd is already running, "
324 					"unable to kill pid for restart\n");
325 				exit(1);
326 			}
327 		}
328 		lseek(PowerFd, 0L, 0);
329 	}
330 
331 	/*
332 	 * Demonize and set pid
333 	 */
334 	if (DebugOpt == 0) {
335 		daemon(0, 0);
336 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
337 	}
338 
339 	if (PowerFd >= 0) {
340 		ftruncate(PowerFd, 0);
341 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
342 		write(PowerFd, buf, strlen(buf));
343 	}
344 
345 	/* Do we need to monitor battery life? */
346 	if (BatLifePollIntvl <= 0)
347 		monbat = 0;
348 	else
349 		monbat = has_battery();
350 
351 	/* Do we have perfbias(4)? */
352 	if (HasPerfbias)
353 		HasPerfbias = has_perfbias();
354 
355 	/* Could we adjust C-state? */
356 	if (AdjustCstate)
357 		AdjustCstate = probe_cstate();
358 
359 	/*
360 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
361 	 *
362 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
363 	 * taskqueue and ACPI taskqueue is shared across various
364 	 * ACPI modules, any delay in other modules may cause
365 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
366 	 * (e.g. cmbat module's task could take quite a lot of time).
367 	 */
368 	for (;;) {
369 		/* Prime delta cputime calculation. */
370 		get_cputime(pollrate);
371 
372 		/* Wait for all cpus to appear */
373 		if (acpi_get_cpupwrdom())
374 			break;
375 		usleep((int)(pollrate * 1000000.0));
376 	}
377 
378 	/*
379 	 * Catch some signals so that max performance could be restored.
380 	 */
381 	signal(SIGINT, sigintr);
382 	signal(SIGTERM, sigintr);
383 
384 	/* Initialize performance states */
385 	init_perf();
386 
387 	srt = srt / pollrate;	/* convert to sample count */
388 	if (DebugOpt)
389 		printf("samples for downgrading: %5.2f\n", srt);
390 
391 	/*
392 	 * Monitoring loop
393 	 */
394 	while (!stopped) {
395 		/*
396 		 * Monitor performance
397 		 */
398 		get_cputime(pollrate);
399 		mon_cputemp();
400 		mon_perf(srt);
401 
402 		/*
403 		 * Monitor battery
404 		 */
405 		if (monbat)
406 			monbat = mon_battery();
407 
408 		usleep((int)(pollrate * 1000000.0));
409 	}
410 
411 	/*
412 	 * Set to maximum performance if killed.
413 	 */
414 	syslog(LOG_INFO, "killed, setting max and exiting");
415 	if (SavedPXGlobal)
416 		set_global_freq(SavedPXGlobal);
417 	restore_perf();
418 	restore_backlight();
419 
420 	exit(0);
421 }
422 
423 static void
424 sigintr(int signo __unused)
425 {
426 	stopped = 1;
427 }
428 
429 /*
430  * Figure out the cpu power domains.
431  */
432 static int
433 acpi_get_cpupwrdom(void)
434 {
435 	struct cpu_pwrdom *dom;
436 	cpumask_t pwrdom_mask;
437 	char buf[64];
438 	char members[1024];
439 	char *str;
440 	size_t msize;
441 	int n, i, ncpu = 0, dom_id;
442 
443 	memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
444 	memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
445 	CPUMASK_ASSZERO(cpu_pwrdom_mask);
446 
447 	for (i = 0; i < MAXDOM; ++i) {
448 		snprintf(buf, sizeof(buf),
449 			 "hw.acpi.cpu.px_dom%d.available", i);
450 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
451 			continue;
452 
453 		dom = calloc(1, sizeof(*dom));
454 		dom->dom_id = i;
455 
456 		if (cpu_pwrdomain[i] != NULL) {
457 			fprintf(stderr, "cpu power domain %d exists\n", i);
458 			exit(1);
459 		}
460 		cpu_pwrdomain[i] = dom;
461 		CPUMASK_ORBIT(cpu_pwrdom_mask, i);
462 	}
463 	pwrdom_mask = cpu_pwrdom_mask;
464 
465 	while (CPUMASK_TESTNZERO(pwrdom_mask)) {
466 		dom_id = BSFCPUMASK(pwrdom_mask);
467 		CPUMASK_NANDBIT(pwrdom_mask, dom_id);
468 		dom = cpu_pwrdomain[dom_id];
469 
470 		CPUMASK_ASSZERO(dom->dom_cpumask);
471 
472 		snprintf(buf, sizeof(buf),
473 			 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
474 		msize = sizeof(members);
475 		if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
476 			cpu_pwrdomain[dom_id] = NULL;
477 			free(dom);
478 			continue;
479 		}
480 
481 		members[msize] = 0;
482 		for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
483 			n = -1;
484 			sscanf(str, "cpu%d", &n);
485 			if (n >= 0) {
486 				++ncpu;
487 				++dom->dom_ncpus;
488 				CPUMASK_ORBIT(dom->dom_cpumask, n);
489 				cpu2pwrdom[n] = dom->dom_id;
490 			}
491 		}
492 		if (dom->dom_ncpus == 0) {
493 			cpu_pwrdomain[dom_id] = NULL;
494 			free(dom);
495 			continue;
496 		}
497 		if (DebugOpt) {
498 			printf("dom%d cpumask: ", dom->dom_id);
499 			for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
500 				printf("%jx ",
501 				    (uintmax_t)dom->dom_cpumask.ary[i]);
502 			}
503 			printf("\n");
504 		}
505 	}
506 
507 	if (ncpu != NCpus) {
508 		if (DebugOpt)
509 			printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
510 
511 		pwrdom_mask = cpu_pwrdom_mask;
512 		while (CPUMASK_TESTNZERO(pwrdom_mask)) {
513 			dom_id = BSFCPUMASK(pwrdom_mask);
514 			CPUMASK_NANDBIT(pwrdom_mask, dom_id);
515 			dom = cpu_pwrdomain[dom_id];
516 			if (dom != NULL)
517 				free(dom);
518 		}
519 		return 0;
520 	}
521 	return 1;
522 }
523 
524 /*
525  * Save per-cpu load and sum of per-cpu load.
526  */
527 static void
528 get_cputime(double pollrate)
529 {
530 	static struct kinfo_cputime ocpu_time[MAXCPU];
531 	static struct kinfo_cputime ncpu_time[MAXCPU];
532 	size_t slen;
533 	int ncpu;
534 	int cpu;
535 	uint64_t delta;
536 
537 	bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
538 
539 	slen = sizeof(ncpu_time);
540 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
541 		fprintf(stderr, "kern.cputime sysctl not available\n");
542 		exit(1);
543 	}
544 	ncpu = slen / sizeof(ncpu_time[0]);
545 
546 	delta = 0;
547 	for (cpu = 0; cpu < ncpu; ++cpu) {
548 		uint64_t d;
549 
550 		d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
551 		     ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
552 		    (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
553 		     ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
554 		pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
555 
556 		delta += d;
557 	}
558 	global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
559 }
560 
561 static void
562 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
563 {
564 	char buf[256], sysid[64];
565 	size_t buflen;
566 	char *ptr;
567 	int v, highest, lowest;
568 	int freqidx;
569 
570 	/*
571 	 * Retrieve availability list
572 	 */
573 	snprintf(sysid, sizeof(sysid),
574 		 "hw.acpi.cpu.px_dom%d.available", dom_id);
575 	buflen = sizeof(buf) - 1;
576 	if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
577 		return;
578 	buf[buflen] = 0;
579 
580 	/*
581 	 * Parse out the highest and lowest cpu frequencies
582 	 */
583 	ptr = buf;
584 	highest = lowest = 0;
585 	freqidx = 0;
586 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
587 		if ((lowest == 0 || lowest > v) &&
588 		    (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
589 			lowest = v;
590 		if ((highest == 0 || highest < v) &&
591 		    (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
592 			highest = v;
593 		/*
594 		 * Detect turbo mode
595 		 */
596 		if (!TurboOpt && highest - v == 1)
597 			highest = v;
598 		++freqidx;
599 	}
600 
601 	/*
602 	 * Frequency array
603 	 */
604 	if (freqidx > MAXFREQ)
605 		freqidx = MAXFREQ;
606 	if (NFreq != freqidx) {
607 		NFreq = freqidx;
608 		NFreqChanged = NFREQ_ALL;
609 	}
610 	ptr = buf;
611 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
612 		if (freqidx == 0)
613 			break;
614 		if (FreqAry[freqidx - 1] != v)
615 			NFreqChanged = NFREQ_ALL;
616 		FreqAry[--freqidx] = v;
617 	}
618 
619 	*highest0 = highest;
620 	*lowest0 = lowest;
621 }
622 
623 static int
624 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
625 {
626 	char sysid[64];
627 	size_t freqlen;
628 	int freqcnt, i;
629 	int freqary[MAXFREQ];
630 
631 	/*
632 	 * Retrieve availability list
633 	 */
634 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
635 	freqlen = sizeof(FreqAry);
636 	bzero(freqary, sizeof(freqary));
637 	if (sysctlbyname(sysid, freqary, &freqlen, NULL, 0) < 0)
638 		return 0;
639 
640 	freqcnt = freqlen / sizeof(freqary[0]);
641 	if (NFreq != freqcnt) {
642 		NFreq = freqcnt;
643 		NFreqChanged = NFREQ_ALL;
644 	}
645 	if (bcmp(freqary, FreqAry, sizeof(FreqAry)) != 0)
646 		NFreqChanged = NFREQ_ALL;
647 	bcopy(freqary, FreqAry, sizeof(FreqAry));
648 	if (freqcnt == 0)
649 		return 0;
650 
651 	for (i = freqcnt - 1; i >= 0; --i) {
652 		*lowest0 = FreqAry[i];
653 		if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
654 			break;
655 	}
656 
657 	i = 0;
658 	*highest0 = FreqAry[0];
659 	if (!TurboOpt && freqcnt > 1 && FreqAry[0] - FreqAry[1] == 1) {
660 		i = 1;
661 		*highest0 = FreqAry[1];
662 	}
663 	for (; i < freqcnt; ++i) {
664 		if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
665 			break;
666 		*highest0 = FreqAry[i];
667 	}
668 	return 1;
669 }
670 
671 static void
672 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
673 {
674 	*highest = 0;
675 	*lowest = 0;
676 
677 	if (acpi_getcpufreq_bin(dom_id, highest, lowest))
678 		return;
679 	acpi_getcpufreq_str(dom_id, highest, lowest);
680 }
681 
682 static
683 void
684 usage(void)
685 {
686 	fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
687 	    "[-h highest_freq] [-l lowest_freq] "
688 	    "[-r poll_interval] [-u trigger_up] "
689 	    "[-B min_battery_life] [-L low_battery_linger] "
690 	    "[-P battery_poll_interval] [-T sample_interval] "
691 	    "[-b backlight]\n");
692 	exit(1);
693 }
694 
695 #ifndef timespecsub
696 #define timespecsub(vvp, uvp)						\
697 	do {								\
698 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
699 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
700 		if ((vvp)->tv_nsec < 0) {				\
701 			(vvp)->tv_sec--;				\
702 			(vvp)->tv_nsec += 1000000000;			\
703 		}							\
704 	} while (0)
705 #endif
706 
707 #define BAT_SYSCTL_TIME_MAX	50000000 /* unit: nanosecond */
708 
709 static int
710 has_battery(void)
711 {
712 	struct timespec s, e;
713 	size_t len;
714 	int val;
715 
716 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
717 	BatLifePrevT = s;
718 
719 	len = sizeof(val);
720 	if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
721 		/* No AC line information */
722 		return 0;
723 	}
724 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
725 
726 	timespecsub(&e, &s);
727 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
728 		/* hw.acpi.acline takes to long to be useful */
729 		syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
730 		return 0;
731 	}
732 
733 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
734 	len = sizeof(val);
735 	if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
736 		/* No battery life */
737 		return 0;
738 	}
739 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
740 
741 	timespecsub(&e, &s);
742 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
743 		/* hw.acpi.battery.life takes to long to be useful */
744 		syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
745 		return 0;
746 	}
747 	return 1;
748 }
749 
750 static void
751 low_battery_alert(int life)
752 {
753 	int fmt, stereo, freq;
754 	int fd;
755 
756 	syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
757 	    life, BatShutdownLingerCnt);
758 	++BatShutdownLingerCnt;
759 
760 	if (!BatShutdownAudioAlert)
761 		return;
762 
763 	fd = open("/dev/dsp", O_WRONLY);
764 	if (fd < 0)
765 		return;
766 
767 	fmt = AFMT_S16_LE;
768 	if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
769 		goto done;
770 
771 	stereo = 0;
772 	if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
773 		goto done;
774 
775 	freq = 44100;
776 	if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
777 		goto done;
778 
779 	write(fd, alert1, sizeof(alert1));
780 	write(fd, alert1, sizeof(alert1));
781 
782 done:
783 	close(fd);
784 }
785 
786 static int
787 mon_battery(void)
788 {
789 	struct timespec cur, ts;
790 	int acline, life;
791 	size_t len;
792 
793 	clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
794 	ts = cur;
795 	timespecsub(&ts, &BatLifePrevT);
796 	if (ts.tv_sec < BatLifePollIntvl)
797 		return 1;
798 	BatLifePrevT = cur;
799 
800 	len = sizeof(acline);
801 	if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
802 		return 1;
803 	if (acline) {
804 		BatShutdownLinger = -1;
805 		BatShutdownLingerCnt = 0;
806 		restore_backlight();
807 		return 1;
808 	}
809 
810 	if (!BackLightDown && BackLightPct != 100) {
811 		int backlight_max, backlight;
812 
813 		len = sizeof(backlight_max);
814 		if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
815 		    NULL, 0) < 0) {
816 			/* No more backlight adjustment */
817 			BackLightPct = 100;
818 			goto after_backlight;
819 		}
820 
821 		len = sizeof(OldBackLightLevel);
822 		if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
823 		    NULL, 0) < 0) {
824 			/* No more backlight adjustment */
825 			BackLightPct = 100;
826 			goto after_backlight;
827 		}
828 
829 		backlight = (backlight_max * BackLightPct) / 100;
830 		if (backlight >= OldBackLightLevel) {
831 			/* No more backlight adjustment */
832 			BackLightPct = 100;
833 			goto after_backlight;
834 		}
835 
836 		if (sysctlbyname("hw.backlight_level", NULL, NULL,
837 		    &backlight, sizeof(backlight)) < 0) {
838 			/* No more backlight adjustment */
839 			BackLightPct = 100;
840 			goto after_backlight;
841 		}
842 		BackLightDown = 1;
843 	}
844 after_backlight:
845 
846 	len = sizeof(life);
847 	if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
848 		return 1;
849 
850 	if (BatShutdownLinger > 0) {
851 		ts = cur;
852 		timespecsub(&ts, &BatShutdownStartT);
853 		if (ts.tv_sec > BatShutdownLinger)
854 			BatShutdownLinger = 0;
855 	}
856 
857 	if (life <= BatLifeMin) {
858 		if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
859 			syslog(LOG_ALERT, "low battery life %d%%, "
860 			    "shutting down", life);
861 			if (vfork() == 0)
862 				execlp("poweroff", "poweroff", NULL);
863 			return 0;
864 		} else if (BatShutdownLinger < 0) {
865 			BatShutdownLinger = BatShutdownLingerSet;
866 			BatShutdownStartT = cur;
867 		}
868 		low_battery_alert(life);
869 	}
870 	return 1;
871 }
872 
873 static void
874 get_ncpus(void)
875 {
876 	size_t slen;
877 
878 	slen = sizeof(NCpus);
879 	if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
880 		err(1, "sysctlbyname hw.ncpu failed");
881 	if (DebugOpt)
882 		printf("hw.ncpu %d\n", NCpus);
883 }
884 
885 static void
886 get_uschedcpus(void)
887 {
888 	size_t slen;
889 
890 	slen = sizeof(usched_cpu_used);
891 	if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
892 	    NULL, 0) < 0)
893 		err(1, "sysctlbyname kern.usched_global_cpumask failed");
894 	if (DebugOpt) {
895 		int i;
896 
897 		printf("usched cpumask was: ");
898 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
899 			printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
900 		printf("\n");
901 	}
902 }
903 
904 static void
905 set_uschedcpus(void)
906 {
907 	if (DebugOpt) {
908 		int i;
909 
910 		printf("usched cpumask: ");
911 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
912 			printf("%jx ",
913 			    (uintmax_t)usched_cpu_used.ary[i]);
914 		}
915 		printf("\n");
916 	}
917 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
918 	    &usched_cpu_used, sizeof(usched_cpu_used));
919 }
920 
921 static int
922 has_perfbias(void)
923 {
924 	size_t len;
925 	int hint;
926 
927 	len = sizeof(hint);
928 	if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
929 		return 0;
930 	return 1;
931 }
932 
933 static void
934 set_perfbias(int cpu, int inc)
935 {
936 	int hint = inc ? 0 : 15;
937 	char sysid[64];
938 
939 	if (DebugOpt)
940 		printf("cpu%d set perfbias hint %d\n", cpu, hint);
941 	snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
942 	sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
943 }
944 
945 static void
946 init_perf(void)
947 {
948 	struct cpu_state *state;
949 	int cpu;
950 
951 	/* Get usched cpumask */
952 	get_uschedcpus();
953 
954 	/*
955 	 * Assume everything are used and are maxed out, before we
956 	 * start.
957 	 */
958 	CPUMASK_ASSBMASK(cpu_used, NCpus);
959 	cpu_pwrdom_used = cpu_pwrdom_mask;
960 	global_pcpu_limit = NCpus;
961 
962 	for (cpu = 0; cpu < NCpus; ++cpu) {
963 		state = &pcpu_state[cpu];
964 
965 		state->cpu_uavg = 0.0;
966 		state->cpu_davg = 0.0;
967 		state->cpu_limit = 1;
968 		state->cpu_count = 1;
969 		snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
970 		    cpu);
971 	}
972 
973 	state = &global_cpu_state;
974 	state->cpu_uavg = 0.0;
975 	state->cpu_davg = 0.0;
976 	state->cpu_limit = NCpus;
977 	state->cpu_count = NCpus;
978 	strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
979 }
980 
981 static int
982 get_nstate(struct cpu_state *state, double srt)
983 {
984 	int ustate, dstate, nstate;
985 
986 	/* speeding up */
987 	state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
988 	/* slowing down */
989 	state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
990 	if (state->cpu_davg < state->cpu_uavg)
991 		state->cpu_davg = state->cpu_uavg;
992 
993 	ustate = state->cpu_uavg / TriggerUp;
994 	if (ustate < state->cpu_limit)
995 		ustate = state->cpu_uavg / TriggerDown;
996 	dstate = state->cpu_davg / TriggerUp;
997 	if (dstate < state->cpu_limit)
998 		dstate = state->cpu_davg / TriggerDown;
999 
1000 	nstate = (ustate > dstate) ? ustate : dstate;
1001 	if (nstate > state->cpu_count)
1002 		nstate = state->cpu_count;
1003 
1004 	if (DebugOpt) {
1005 		printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
1006 		    "%2d ncpus=%d\n", state->cpu_name,
1007 		    state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
1008 		    state->cpu_limit, nstate);
1009 	}
1010 	return nstate;
1011 }
1012 
1013 static void
1014 mon_perf(double srt)
1015 {
1016 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1017 	int pnstate = 0, nstate;
1018 	int cpu;
1019 
1020 	/*
1021 	 * Find cpus requiring performance and their cooresponding power
1022 	 * domains.  Save the number of cpus requiring performance in
1023 	 * pnstate.
1024 	 */
1025 	ocpu_used = cpu_used;
1026 	ocpu_pwrdom_used = cpu_pwrdom_used;
1027 
1028 	CPUMASK_ASSZERO(cpu_used);
1029 	CPUMASK_ASSZERO(cpu_pwrdom_used);
1030 
1031 	for (cpu = 0; cpu < NCpus; ++cpu) {
1032 		struct cpu_state *state = &pcpu_state[cpu];
1033 		int s;
1034 
1035 		s = get_nstate(state, srt);
1036 		if (s) {
1037 			CPUMASK_ORBIT(cpu_used, cpu);
1038 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1039 		}
1040 		pnstate += s;
1041 
1042 		state->cpu_limit = s;
1043 	}
1044 
1045 	/*
1046 	 * Calculate nstate, the number of cpus we wish to run at max
1047 	 * performance.
1048 	 */
1049 	nstate = get_nstate(&global_cpu_state, srt);
1050 
1051 	if (nstate == global_cpu_state.cpu_limit &&
1052 	    (NFreqChanged & NFREQ_MONPERF) == 0 &&
1053 	    (pnstate == global_pcpu_limit || nstate > pnstate)) {
1054 		/* Nothing changed; keep the sets */
1055 		cpu_used = ocpu_used;
1056 		cpu_pwrdom_used = ocpu_pwrdom_used;
1057 
1058 		global_pcpu_limit = pnstate;
1059 		return;
1060 	}
1061 	NFreqChanged &= ~NFREQ_MONPERF;
1062 	global_pcpu_limit = pnstate;
1063 
1064 	if (nstate > pnstate) {
1065 		/*
1066 		 * Add spare cpus to meet global performance requirement.
1067 		 */
1068 		add_spare_cpus(ocpu_used, nstate - pnstate);
1069 	}
1070 
1071 	global_cpu_state.cpu_limit = nstate;
1072 
1073 	/*
1074 	 * Adjust cpu and cpu power domain performance
1075 	 */
1076 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1077 }
1078 
1079 static void
1080 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
1081 {
1082 	cpumask_t saved_pwrdom, xcpu_used;
1083 	int done = 0, cpu;
1084 
1085 	/*
1086 	 * Find more cpus in the previous cpu set.
1087 	 */
1088 	xcpu_used = cpu_used;
1089 	CPUMASK_XORMASK(xcpu_used, ocpu_used);
1090 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1091 		cpu = BSFCPUMASK(xcpu_used);
1092 		CPUMASK_NANDBIT(xcpu_used, cpu);
1093 
1094 		if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1095 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1096 			CPUMASK_ORBIT(cpu_used, cpu);
1097 			--ncpu;
1098 			if (ncpu == 0)
1099 				return;
1100 		}
1101 	}
1102 
1103 	/*
1104 	 * Find more cpus in the used cpu power domains.
1105 	 */
1106 	saved_pwrdom = cpu_pwrdom_used;
1107 again:
1108 	while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1109 		cpumask_t unused_cpumask;
1110 		int dom;
1111 
1112 		dom = BSFCPUMASK(saved_pwrdom);
1113 		CPUMASK_NANDBIT(saved_pwrdom, dom);
1114 
1115 		unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1116 		CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1117 
1118 		while (CPUMASK_TESTNZERO(unused_cpumask)) {
1119 			cpu = BSFCPUMASK(unused_cpumask);
1120 			CPUMASK_NANDBIT(unused_cpumask, cpu);
1121 
1122 			CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1123 			CPUMASK_ORBIT(cpu_used, cpu);
1124 			--ncpu;
1125 			if (ncpu == 0)
1126 				return;
1127 		}
1128 	}
1129 	if (!done) {
1130 		done = 1;
1131 		/*
1132 		 * Find more cpus in unused cpu power domains
1133 		 */
1134 		saved_pwrdom = cpu_pwrdom_mask;
1135 		CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1136 		goto again;
1137 	}
1138 	if (DebugOpt)
1139 		printf("%d cpus not found\n", ncpu);
1140 }
1141 
1142 static void
1143 acpi_set_cpufreq(int dom, int inc)
1144 {
1145 	int lowest, highest, desired;
1146 	char sysid[64];
1147 
1148 	acpi_get_cpufreq(dom, &highest, &lowest);
1149 	if (highest == 0 || lowest == 0)
1150 		return;
1151 	desired = inc ? highest : lowest;
1152 
1153 	if (DebugOpt)
1154 		printf("dom%d set frequency %d\n", dom, desired);
1155 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1156 	sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1157 }
1158 
1159 static void
1160 adj_cpu_pwrdom(int dom, int inc)
1161 {
1162 	if (AdjustCpuFreq && (inc == 0 || AdjustCpuFreqOverride == 0))
1163 		acpi_set_cpufreq(dom, inc);
1164 }
1165 
1166 static void
1167 adj_cpu_perf(int cpu, int inc)
1168 {
1169 	if (DebugOpt) {
1170 		if (inc)
1171 			printf("cpu%d increase perf\n", cpu);
1172 		else
1173 			printf("cpu%d decrease perf\n", cpu);
1174 	}
1175 
1176 	if (HasPerfbias)
1177 		set_perfbias(cpu, inc);
1178 	if (AdjustCstate)
1179 		set_cstate(cpu, inc);
1180 }
1181 
1182 static void
1183 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1184 {
1185 	cpumask_t old_usched_used;
1186 	int cpu, inc;
1187 
1188 	/*
1189 	 * Set cpus requiring performance to the userland process
1190 	 * scheduler.  Leave the rest of cpus unmapped.
1191 	 */
1192 	old_usched_used = usched_cpu_used;
1193 	usched_cpu_used = cpu_used;
1194 	if (CPUMASK_TESTZERO(usched_cpu_used))
1195 		CPUMASK_ORBIT(usched_cpu_used, 0);
1196 	if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1197 		set_uschedcpus();
1198 
1199 	/*
1200 	 * Adjust per-cpu performance for any cpus which changed.
1201 	 */
1202 	CPUMASK_XORMASK(xcpu_used, cpu_used);
1203 	if (NFreqChanged & NFREQ_ADJPERF)
1204 		CPUMASK_ASSBMASK(xcpu_used, NCpus);
1205 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1206 		cpu = BSFCPUMASK(xcpu_used);
1207 		CPUMASK_NANDBIT(xcpu_used, cpu);
1208 
1209 		if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1210 			/* Increase cpu performance */
1211 			inc = 1;
1212 		} else {
1213 			/* Decrease cpu performance */
1214 			inc = 0;
1215 		}
1216 		adj_cpu_perf(cpu, inc);
1217 	}
1218 
1219 	/*
1220 	 * Adjust cpu power domain performance.  This could affect
1221 	 * a set of cpus.
1222 	 */
1223 	CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1224 	if (NFreqChanged & NFREQ_ADJPERF)
1225 		CPUMASK_ASSBMASK(xcpu_pwrdom_used, NCpus);
1226 	while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1227 		int dom;
1228 
1229 		dom = BSFCPUMASK(xcpu_pwrdom_used);
1230 		CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1231 
1232 		if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1233 			/* Increase cpu power domain performance */
1234 			inc = 1;
1235 		} else {
1236 			/* Decrease cpu power domain performance */
1237 			inc = 0;
1238 		}
1239 		adj_cpu_pwrdom(dom, inc);
1240 	}
1241 	NFreqChanged &= ~NFREQ_ADJPERF;
1242 }
1243 
1244 static void
1245 restore_perf(void)
1246 {
1247 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1248 
1249 	/* Remove highest cpu frequency limitation */
1250 	HighestCpuFreq = 0;
1251 
1252 	ocpu_used = cpu_used;
1253 	ocpu_pwrdom_used = cpu_pwrdom_used;
1254 
1255 	/* Max out all cpus and cpu power domains performance */
1256 	CPUMASK_ASSBMASK(cpu_used, NCpus);
1257 	cpu_pwrdom_used = cpu_pwrdom_mask;
1258 
1259 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1260 
1261 	if (AdjustCstate) {
1262 		/*
1263 		 * Restore the original mwait C-state
1264 		 */
1265 		if (DebugOpt)
1266 			printf("global set cstate %s\n", orig_global_cx);
1267 		sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1268 		    orig_global_cx, strlen(orig_global_cx) + 1);
1269 	}
1270 }
1271 
1272 static int
1273 probe_cstate(void)
1274 {
1275 	char cx_supported[1024];
1276 	const char *target;
1277 	char *ptr;
1278 	int idle_hlt, deep = 1;
1279 	size_t len;
1280 
1281 	len = sizeof(idle_hlt);
1282 	if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1283 		return 0;
1284 	if (idle_hlt != 1)
1285 		return 0;
1286 
1287 	len = sizeof(cx_supported);
1288 	if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1289 	    NULL, 0) < 0)
1290 		return 0;
1291 
1292 	len = sizeof(orig_global_cx);
1293 	if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1294 	    NULL, 0) < 0)
1295 		return 0;
1296 
1297 	strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1298 	cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1299 	if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1300 	    cpu_perf_cx, cpu_perf_cxlen) < 0) {
1301 		/* AUTODEEP is not supported; try AUTO */
1302 		deep = 0;
1303 		strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1304 		cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1305 		if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1306 		    cpu_perf_cx, cpu_perf_cxlen) < 0)
1307 			return 0;
1308 	}
1309 
1310 	if (!deep)
1311 		target = "C2/0";
1312 	else
1313 		target = NULL;
1314 	for (ptr = strtok(cx_supported, " "); ptr != NULL;
1315 	     ptr = strtok(NULL, " ")) {
1316 		if (target == NULL ||
1317 		    (target != NULL && strcmp(ptr, target) == 0)) {
1318 			strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1319 			cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1320 			if (target != NULL)
1321 				break;
1322 		}
1323 	}
1324 	if (cpu_idle_cxlen == 0)
1325 		return 0;
1326 
1327 	if (DebugOpt) {
1328 		printf("cstate orig %s, perf %s, idle %s\n",
1329 		    orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1330 	}
1331 	return 1;
1332 }
1333 
1334 static void
1335 set_cstate(int cpu, int inc)
1336 {
1337 	const char *cst;
1338 	char sysid[64];
1339 	size_t len;
1340 
1341 	if (inc) {
1342 		cst = cpu_perf_cx;
1343 		len = cpu_perf_cxlen;
1344 	} else {
1345 		cst = cpu_idle_cx;
1346 		len = cpu_idle_cxlen;
1347 	}
1348 
1349 	if (DebugOpt)
1350 		printf("cpu%d set cstate %s\n", cpu, cst);
1351 	snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1352 	sysctlbyname(sysid, NULL, NULL, cst, len);
1353 }
1354 
1355 static void
1356 restore_backlight(void)
1357 {
1358 	if (BackLightDown) {
1359 		BackLightDown = 0;
1360 		sysctlbyname("hw.backlight_level", NULL, NULL,
1361 		    &OldBackLightLevel, sizeof(OldBackLightLevel));
1362 	}
1363 }
1364 
1365 /*
1366  * get_cputemp() / mon_cputemp()
1367  *
1368  * This enforces the maximum cpu frequency based on temperature
1369  * verses MinTemp and MaxTemp.
1370  */
1371 static int
1372 get_cputemp(void)
1373 {
1374 	char sysid[64];
1375 	struct sensor sensor;
1376 	size_t sensor_size;
1377 	int t;
1378 	int mt = -1;
1379 	int n;
1380 
1381 	for (n = 0; ; ++n) {
1382 		t = 0;
1383 		snprintf(sysid, sizeof(sysid),
1384 			 "hw.sensors.cpu_node%d.temp0", n);
1385 		sensor_size = sizeof(sensor);
1386 		if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1387 			break;
1388 		t = -1;
1389 		if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1390 			t = (int)((sensor.value - 273150000) / 1000000);
1391 			if (mt < t)
1392 				mt = t;
1393 		}
1394 	}
1395 	if (n)
1396 		return mt;
1397 
1398 	/*
1399 	 * Missing nodeN for some reason, try cpuN.
1400 	 */
1401 	for (n = 0; ; ++n) {
1402 		t = 0;
1403 		snprintf(sysid, sizeof(sysid),
1404 			 "hw.sensors.cpu%d.temp0", n);
1405 		sensor_size = sizeof(sensor);
1406 		if (sysctlbyname(sysid, &sensor, &sensor_size, NULL, 0) < 0)
1407 			break;
1408 		t = -1;
1409 		if ((sensor.flags & (SENSOR_FINVALID | SENSOR_FUNKNOWN)) == 0) {
1410 			t = (int)((sensor.value - 273150000) / 1000000);
1411 			if (mt < t)
1412 				mt = t;
1413 		}
1414 	}
1415 	return mt;
1416 }
1417 
1418 static void
1419 set_global_freq(int freq)
1420 {
1421 	if (freq > 0)
1422 		sysctlbyname("hw.acpi.cpu.px_global",
1423 			     NULL, NULL, &freq, sizeof(freq));
1424 }
1425 
1426 static int
1427 get_global_freq(void)
1428 {
1429 	int freq;
1430 	size_t freq_size;
1431 
1432 	freq = -1;
1433 	freq_size = sizeof(freq);
1434 	sysctlbyname("hw.acpi.cpu.px_global", &freq, &freq_size, NULL, 0);
1435 
1436 	return freq;
1437 }
1438 
1439 static void
1440 mon_cputemp(void)
1441 {
1442 	static int last_temp = -1;
1443 	static int last_idx = -1;
1444 	int temp = get_cputemp();
1445 	int idx;
1446 	int lowest;
1447 	int highest;
1448 	static int CurPXGlobal __unused;
1449 
1450 	/*
1451 	 * Reseed FreqAry, it can change w/AC power state
1452 	 */
1453 	acpi_get_cpufreq(0, &lowest, &highest);
1454 
1455 	/*
1456 	 * Some cpu frequency steps can cause large shifts in cpu temperature,
1457 	 * creating an oscillation that min-maxes the temperature in a way
1458 	 * that is not desireable.  To deal with this, we impose an exponential
1459 	 * average for any temperature change.
1460 	 *
1461 	 * We have to do this in both directions, otherwise (in particular)
1462 	 * laptop fan responsiveness and temperature sensor response times
1463 	 * can create major frequency oscillations.
1464 	 */
1465 	if (last_temp < 0 || (NFreqChanged & NFREQ_CPUTEMP)) {
1466 		NFreqChanged &= ~NFREQ_CPUTEMP;
1467 		last_temp = temp << 8;
1468 	} else if (temp < last_temp) {
1469 		last_temp = (last_temp * 15 + (temp << 8)) / 16;
1470 		if (DebugOpt) {
1471 			printf("Falling temp %d (use %d)\n",
1472 				temp, (last_temp >> 8));
1473 		}
1474 	} else {
1475 		last_temp = (last_temp * 15 + (temp << 8)) / 16;
1476 		if (DebugOpt) {
1477 			printf("Rising temp %d (use %d)\n",
1478 				temp, (last_temp >> 8));
1479 		}
1480 	}
1481 	temp = last_temp >> 8;
1482 
1483 	/*
1484 	 * CPU Temp not available or available frequencies not yet
1485 	 * probed.
1486 	 */
1487 	if (DebugOpt)
1488 		printf("Temp %d {%d-%d} NFreq=%d)\n",
1489 		       temp, MinTemp, MaxTemp, NFreq);
1490 	if (temp <= 0)
1491 		return;
1492 	if (NFreq == 0)
1493 		return;
1494 
1495 	/*
1496 	 * Return to normal operation if under the minimum
1497 	 */
1498 	if (temp <= MinTemp) {
1499 		if (AdjustCpuFreqOverride) {
1500 			AdjustCpuFreqOverride = 0;
1501 			CurPXGlobal = 0;
1502 			NFreqChanged = NFREQ_ALL;
1503 			last_idx = -1;
1504 			syslog(LOG_ALERT,
1505 			       "Temp below %d, returning to normal operation",
1506 			       MinTemp);
1507 			if (SavedPXGlobal)
1508 				set_global_freq(SavedPXGlobal);
1509 		}
1510 		return;
1511 	}
1512 
1513 	/*
1514 	 * Hysteresis before entering temperature control mode
1515 	 */
1516 	if (AdjustCpuFreqOverride == 0 &&
1517 	    temp <= MinTemp + (MaxTemp - MinTemp) / 10 + 1) {
1518 		return;
1519 	}
1520 
1521 	/*
1522 	 * Override frequency controls (except for idle -> lowest)
1523 	 */
1524 	if (AdjustCpuFreqOverride == 0) {
1525 		AdjustCpuFreqOverride = 1;
1526 		SavedPXGlobal = get_global_freq();
1527 		CurPXGlobal = 0;
1528 		NFreqChanged = NFREQ_ALL;
1529 		last_idx = -1;
1530 		syslog(LOG_ALERT,
1531 		       "Temp %d {%d-%d}, entering temperature control mode",
1532 		       temp, MinTemp, MaxTemp);
1533 	}
1534 	if (temp > MaxTemp + (MaxTemp - MinTemp) / 10 + 1) {
1535 		syslog(LOG_ALERT,
1536 		       "Temp %d {%d-%d}, TOO HOT!!!",
1537 		       temp, MinTemp, MaxTemp);
1538 	}
1539 	idx = (temp - MinTemp) * NFreq / (MaxTemp - MinTemp);
1540 	if (idx < 0 || idx >= NFreq)	/* overtemp */
1541 		idx = NFreq - 1;
1542 
1543 	/*
1544 	 * Limit frequency shifts to single steps in both directions.
1545 	 * Some fans react very quickly, this will reduce oscillations.
1546 	 */
1547 	if (DebugOpt)
1548 		printf("Temp index %d (use %d)\n", idx, last_idx);
1549 	if (last_idx >= 0 && idx < last_idx)
1550 		idx = last_idx - 1;
1551 	else if (last_idx >= 0 && idx > last_idx)
1552 		idx = last_idx + 1;
1553 	last_idx = idx;
1554 
1555 	/*
1556 	 * One last thing, make sure our frequency adheres to
1557 	 * HighestCpuFreq.  However, override LowestCpuFreq for
1558 	 * temperature control purposes.
1559 	 */
1560 	while (HighestCpuFreq > 0 && idx < NFreq &&
1561 	       FreqAry[idx] > HighestCpuFreq) {
1562 		++idx;
1563 	}
1564 #if 0
1565 	/*
1566 	 * Currently ignore LowestCpuFreq if temp control thinks it
1567 	 * needs to go lower
1568 	 */
1569 	while (LowestCpuFreq > 0 && idx > 0 &&
1570 	       FreqAry[idx] < LowestCpuFreq) {
1571 		--idx;
1572 	}
1573 #endif
1574 
1575 	if (FreqAry[idx] != CurPXGlobal) {
1576 		CurPXGlobal = FreqAry[idx];
1577 
1578 #if 0
1579 		/* this can get noisy so don't log for now */
1580 		syslog(LOG_ALERT,
1581 		       "Temp %d {%d-%d}, set frequency %d",
1582 		       temp, MinTemp, MaxTemp, CurPXGlobal);
1583 #endif
1584 	}
1585 	set_global_freq(CurPXGlobal);
1586 }
1587