xref: /dragonfly/usr.sbin/powerd/powerd.c (revision 0ca59c34)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon :
37  * - Monitor the cpu load and adjusts cpu and cpu power domain
38  *   performance accordingly.
39  * - Monitor battery life.  Alarm alerts and shutdown the machine
40  *   if battery life goes low.
41  */
42 
43 #define _KERNEL_STRUCTURES
44 #include <sys/types.h>
45 #include <sys/sysctl.h>
46 #include <sys/kinfo.h>
47 #include <sys/file.h>
48 #include <sys/queue.h>
49 #include <sys/soundcard.h>
50 #include <sys/time.h>
51 #include <machine/cpufunc.h>
52 #include <err.h>
53 #include <stdio.h>
54 #include <stdlib.h>
55 #include <unistd.h>
56 #include <string.h>
57 #include <syslog.h>
58 
59 #include "alert1.h"
60 
61 #define MAXDOM		MAXCPU	/* worst case, 1 cpu per domain */
62 
63 #define MAXFREQ		64
64 #define CST_STRLEN	16
65 
66 struct cpu_pwrdom {
67 	TAILQ_ENTRY(cpu_pwrdom)	dom_link;
68 	int			dom_id;
69 	int			dom_ncpus;
70 	cpumask_t		dom_cpumask;
71 };
72 
73 struct cpu_state {
74 	double			cpu_qavg;
75 	double			cpu_uavg;	/* used for speeding up */
76 	double			cpu_davg;	/* used for slowing down */
77 	int			cpu_limit;
78 	int			cpu_count;
79 	char			cpu_name[8];
80 };
81 
82 static void usage(void);
83 static void get_ncpus(void);
84 
85 /* usched cpumask */
86 static void get_uschedcpus(void);
87 static void set_uschedcpus(void);
88 
89 /* perfbias(4) */
90 static int has_perfbias(void);
91 static void set_perfbias(int, int);
92 
93 /* acpi(4) P-state */
94 static void acpi_getcpufreq_str(int, int *, int *);
95 static int acpi_getcpufreq_bin(int, int *, int *);
96 static void acpi_get_cpufreq(int, int *, int *);
97 static void acpi_set_cpufreq(int, int);
98 static int acpi_get_cpupwrdom(void);
99 
100 /* mwait C-state hint */
101 static int probe_cstate(void);
102 static void set_cstate(int, int);
103 
104 /* Performance monitoring */
105 static void init_perf(void);
106 static void mon_perf(double);
107 static void adj_perf(cpumask_t, cpumask_t);
108 static void adj_cpu_pwrdom(int, int);
109 static void adj_cpu_perf(int, int);
110 static void get_cputime(double);
111 static int get_nstate(struct cpu_state *, double);
112 static void add_spare_cpus(const cpumask_t, int);
113 static void restore_perf(void);
114 
115 /* Battery monitoring */
116 static int has_battery(void);
117 static int mon_battery(void);
118 static void low_battery_alert(int);
119 
120 /* Backlight */
121 static void restore_backlight(void);
122 
123 /* Runtime states for performance monitoring */
124 static int global_pcpu_limit;
125 static struct cpu_state pcpu_state[MAXCPU];
126 static struct cpu_state global_cpu_state;
127 static cpumask_t cpu_used;		/* cpus w/ high perf */
128 static cpumask_t cpu_pwrdom_used;	/* cpu power domains w/ high perf */
129 static cpumask_t usched_cpu_used;	/* cpus for usched */
130 
131 /* Constants */
132 static cpumask_t cpu_pwrdom_mask;	/* usable cpu power domains */
133 static int cpu2pwrdom[MAXCPU];		/* cpu to cpu power domain map */
134 static struct cpu_pwrdom *cpu_pwrdomain[MAXDOM];
135 static int NCpus;			/* # of cpus */
136 static char orig_global_cx[CST_STRLEN];
137 static char cpu_perf_cx[CST_STRLEN];
138 static int cpu_perf_cxlen;
139 static char cpu_idle_cx[CST_STRLEN];
140 static int cpu_idle_cxlen;
141 
142 static int DebugOpt;
143 static int TurboOpt = 1;
144 static int PowerFd;
145 static int Hysteresis = 10;	/* percentage */
146 static double TriggerUp = 0.25;	/* single-cpu load to force max freq */
147 static double TriggerDown;	/* load per cpu to force the min freq */
148 static int HasPerfbias = 0;
149 static int AdjustCpuFreq = 1;
150 static int AdjustCstate = 0;
151 static int HighestCpuFreq;
152 static int LowestCpuFreq;
153 
154 static volatile int stopped;
155 
156 /* Battery life monitoring */
157 static int BatLifeMin = 2;	/* shutdown the box, if low on battery life */
158 static struct timespec BatLifePrevT;
159 static int BatLifePollIntvl = 5; /* unit: sec */
160 static struct timespec BatShutdownStartT;
161 static int BatShutdownLinger = -1;
162 static int BatShutdownLingerSet = 60; /* unit: sec */
163 static int BatShutdownLingerCnt;
164 static int BatShutdownAudioAlert = 1;
165 static int BackLightPct = 100;
166 static int OldBackLightLevel;
167 static int BackLightDown;
168 
169 static void sigintr(int signo);
170 
171 int
172 main(int ac, char **av)
173 {
174 	double srt;
175 	double pollrate;
176 	int ch;
177 	char buf[64];
178 	int monbat;
179 
180 	srt = 8.0;	/* time for samples - 8 seconds */
181 	pollrate = 1.0;	/* polling rate in seconds */
182 
183 	while ((ch = getopt(ac, av, "b:cdefh:l:p:r:tu:B:L:P:QT:")) != -1) {
184 		switch(ch) {
185 		case 'b':
186 			BackLightPct = strtol(optarg, NULL, 10);
187 			break;
188 		case 'c':
189 			AdjustCstate = 1;
190 			break;
191 		case 'd':
192 			DebugOpt = 1;
193 			break;
194 		case 'e':
195 			HasPerfbias = 1;
196 			break;
197 		case 'f':
198 			AdjustCpuFreq = 0;
199 			break;
200 		case 'h':
201 			HighestCpuFreq = strtol(optarg, NULL, 10);
202 			break;
203 		case 'l':
204 			LowestCpuFreq = strtol(optarg, NULL, 10);
205 			break;
206 		case 'p':
207 			Hysteresis = (int)strtol(optarg, NULL, 10);
208 			break;
209 		case 'r':
210 			pollrate = strtod(optarg, NULL);
211 			break;
212 		case 't':
213 			TurboOpt = 0;
214 			break;
215 		case 'u':
216 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
217 			break;
218 		case 'B':
219 			BatLifeMin = strtol(optarg, NULL, 10);
220 			break;
221 		case 'L':
222 			BatShutdownLingerSet = strtol(optarg, NULL, 10);
223 			if (BatShutdownLingerSet < 0)
224 				BatShutdownLingerSet = 0;
225 			break;
226 		case 'P':
227 			BatLifePollIntvl = strtol(optarg, NULL, 10);
228 			break;
229 		case 'Q':
230 			BatShutdownAudioAlert = 0;
231 			break;
232 		case 'T':
233 			srt = strtod(optarg, NULL);
234 			break;
235 		default:
236 			usage();
237 			/* NOT REACHED */
238 		}
239 	}
240 	ac -= optind;
241 	av += optind;
242 
243 	setlinebuf(stdout);
244 
245 	/* Get number of cpus */
246 	get_ncpus();
247 
248 	if (0 > Hysteresis || Hysteresis > 99) {
249 		fprintf(stderr, "Invalid hysteresis value\n");
250 		exit(1);
251 	}
252 
253 	if (0 > TriggerUp || TriggerUp > 1) {
254 		fprintf(stderr, "Invalid load limit value\n");
255 		exit(1);
256 	}
257 
258 	if (BackLightPct > 100 || BackLightPct <= 0) {
259 		fprintf(stderr, "Invalid backlight setting, ignore\n");
260 		BackLightPct = 100;
261 	}
262 
263 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
264 
265 	/*
266 	 * Make sure powerd is not already running.
267 	 */
268 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
269 	if (PowerFd < 0) {
270 		fprintf(stderr,
271 			"Cannot create /var/run/powerd.pid, "
272 			"continuing anyway\n");
273 	} else {
274 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
275 			fprintf(stderr, "powerd is already running\n");
276 			exit(1);
277 		}
278 	}
279 
280 	/*
281 	 * Demonize and set pid
282 	 */
283 	if (DebugOpt == 0) {
284 		daemon(0, 0);
285 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
286 	}
287 
288 	if (PowerFd >= 0) {
289 		ftruncate(PowerFd, 0);
290 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
291 		write(PowerFd, buf, strlen(buf));
292 	}
293 
294 	/* Do we need to monitor battery life? */
295 	if (BatLifePollIntvl <= 0)
296 		monbat = 0;
297 	else
298 		monbat = has_battery();
299 
300 	/* Do we have perfbias(4)? */
301 	if (HasPerfbias)
302 		HasPerfbias = has_perfbias();
303 
304 	/* Could we adjust C-state? */
305 	if (AdjustCstate)
306 		AdjustCstate = probe_cstate();
307 
308 	/*
309 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel.
310 	 *
311 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
312 	 * taskqueue and ACPI taskqueue is shared across various
313 	 * ACPI modules, any delay in other modules may cause
314 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
315 	 * (e.g. cmbat module's task could take quite a lot of time).
316 	 */
317 	for (;;) {
318 		/* Prime delta cputime calculation. */
319 		get_cputime(pollrate);
320 
321 		/* Wait for all cpus to appear */
322 		if (acpi_get_cpupwrdom())
323 			break;
324 		usleep((int)(pollrate * 1000000.0));
325 	}
326 
327 	/*
328 	 * Catch some signals so that max performance could be restored.
329 	 */
330 	signal(SIGINT, sigintr);
331 	signal(SIGTERM, sigintr);
332 
333 	/* Initialize performance states */
334 	init_perf();
335 
336 	srt = srt / pollrate;	/* convert to sample count */
337 	if (DebugOpt)
338 		printf("samples for downgrading: %5.2f\n", srt);
339 
340 	/*
341 	 * Monitoring loop
342 	 */
343 	while (!stopped) {
344 		/*
345 		 * Monitor performance
346 		 */
347 		get_cputime(pollrate);
348 		mon_perf(srt);
349 
350 		/*
351 		 * Monitor battery
352 		 */
353 		if (monbat)
354 			monbat = mon_battery();
355 
356 		usleep((int)(pollrate * 1000000.0));
357 	}
358 
359 	/*
360 	 * Set to maximum performance if killed.
361 	 */
362 	syslog(LOG_INFO, "killed, setting max and exiting");
363 	restore_perf();
364 	restore_backlight();
365 
366 	exit(0);
367 }
368 
369 static void
370 sigintr(int signo __unused)
371 {
372 	stopped = 1;
373 }
374 
375 /*
376  * Figure out the cpu power domains.
377  */
378 static int
379 acpi_get_cpupwrdom(void)
380 {
381 	struct cpu_pwrdom *dom;
382 	cpumask_t pwrdom_mask;
383 	char buf[64];
384 	char members[1024];
385 	char *str;
386 	size_t msize;
387 	int n, i, ncpu = 0, dom_id;
388 
389 	memset(cpu2pwrdom, 0, sizeof(cpu2pwrdom));
390 	memset(cpu_pwrdomain, 0, sizeof(cpu_pwrdomain));
391 	CPUMASK_ASSZERO(cpu_pwrdom_mask);
392 
393 	for (i = 0; i < MAXDOM; ++i) {
394 		snprintf(buf, sizeof(buf),
395 			 "hw.acpi.cpu.px_dom%d.available", i);
396 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) < 0)
397 			continue;
398 
399 		dom = calloc(1, sizeof(*dom));
400 		dom->dom_id = i;
401 
402 		if (cpu_pwrdomain[i] != NULL) {
403 			fprintf(stderr, "cpu power domain %d exists\n", i);
404 			exit(1);
405 		}
406 		cpu_pwrdomain[i] = dom;
407 		CPUMASK_ORBIT(cpu_pwrdom_mask, i);
408 	}
409 	pwrdom_mask = cpu_pwrdom_mask;
410 
411 	while (CPUMASK_TESTNZERO(pwrdom_mask)) {
412 		dom_id = BSFCPUMASK(pwrdom_mask);
413 		CPUMASK_NANDBIT(pwrdom_mask, dom_id);
414 		dom = cpu_pwrdomain[dom_id];
415 
416 		CPUMASK_ASSZERO(dom->dom_cpumask);
417 
418 		snprintf(buf, sizeof(buf),
419 			 "hw.acpi.cpu.px_dom%d.members", dom->dom_id);
420 		msize = sizeof(members);
421 		if (sysctlbyname(buf, members, &msize, NULL, 0) < 0) {
422 			cpu_pwrdomain[dom_id] = NULL;
423 			free(dom);
424 			continue;
425 		}
426 
427 		members[msize] = 0;
428 		for (str = strtok(members, " "); str; str = strtok(NULL, " ")) {
429 			n = -1;
430 			sscanf(str, "cpu%d", &n);
431 			if (n >= 0) {
432 				++ncpu;
433 				++dom->dom_ncpus;
434 				CPUMASK_ORBIT(dom->dom_cpumask, n);
435 				cpu2pwrdom[n] = dom->dom_id;
436 			}
437 		}
438 		if (dom->dom_ncpus == 0) {
439 			cpu_pwrdomain[dom_id] = NULL;
440 			free(dom);
441 			continue;
442 		}
443 		if (DebugOpt) {
444 			printf("dom%d cpumask: ", dom->dom_id);
445 			for (i = 0; i < (int)NELEM(dom->dom_cpumask.ary); ++i) {
446 				printf("%jx ",
447 				    (uintmax_t)dom->dom_cpumask.ary[i]);
448 			}
449 			printf("\n");
450 		}
451 	}
452 
453 	if (ncpu != NCpus) {
454 		if (DebugOpt)
455 			printf("Found %d cpus, expecting %d\n", ncpu, NCpus);
456 
457 		pwrdom_mask = cpu_pwrdom_mask;
458 		while (CPUMASK_TESTNZERO(pwrdom_mask)) {
459 			dom_id = BSFCPUMASK(pwrdom_mask);
460 			CPUMASK_NANDBIT(pwrdom_mask, dom_id);
461 			dom = cpu_pwrdomain[dom_id];
462 			if (dom != NULL)
463 				free(dom);
464 		}
465 		return 0;
466 	}
467 	return 1;
468 }
469 
470 /*
471  * Save per-cpu load and sum of per-cpu load.
472  */
473 static void
474 get_cputime(double pollrate)
475 {
476 	static struct kinfo_cputime ocpu_time[MAXCPU];
477 	static struct kinfo_cputime ncpu_time[MAXCPU];
478 	size_t slen;
479 	int ncpu;
480 	int cpu;
481 	uint64_t delta;
482 
483 	bcopy(ncpu_time, ocpu_time, sizeof(struct kinfo_cputime) * NCpus);
484 
485 	slen = sizeof(ncpu_time);
486 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
487 		fprintf(stderr, "kern.cputime sysctl not available\n");
488 		exit(1);
489 	}
490 	ncpu = slen / sizeof(ncpu_time[0]);
491 
492 	delta = 0;
493 	for (cpu = 0; cpu < ncpu; ++cpu) {
494 		uint64_t d;
495 
496 		d = (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
497 		     ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
498 		    (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
499 		     ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
500 		pcpu_state[cpu].cpu_qavg = (double)d / (pollrate * 1000000.0);
501 
502 		delta += d;
503 	}
504 	global_cpu_state.cpu_qavg = (double)delta / (pollrate * 1000000.0);
505 }
506 
507 static void
508 acpi_getcpufreq_str(int dom_id, int *highest0, int *lowest0)
509 {
510 	char buf[256], sysid[64];
511 	size_t buflen;
512 	char *ptr;
513 	int v, highest, lowest;
514 
515 	/*
516 	 * Retrieve availability list
517 	 */
518 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.available",
519 	    dom_id);
520 	buflen = sizeof(buf) - 1;
521 	if (sysctlbyname(sysid, buf, &buflen, NULL, 0) < 0)
522 		return;
523 	buf[buflen] = 0;
524 
525 	/*
526 	 * Parse out the highest and lowest cpu frequencies
527 	 */
528 	ptr = buf;
529 	highest = lowest = 0;
530 	while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
531 		if ((lowest == 0 || lowest > v) &&
532 		    (LowestCpuFreq <= 0 || v >= LowestCpuFreq))
533 			lowest = v;
534 		if ((highest == 0 || highest < v) &&
535 		    (HighestCpuFreq <= 0 || v <= HighestCpuFreq))
536 			highest = v;
537 		/*
538 		 * Detect turbo mode
539 		 */
540 		if (!TurboOpt && highest - v == 1)
541 			highest = v;
542 	}
543 
544 	*highest0 = highest;
545 	*lowest0 = lowest;
546 }
547 
548 static int
549 acpi_getcpufreq_bin(int dom_id, int *highest0, int *lowest0)
550 {
551 	char sysid[64];
552 	int freq[MAXFREQ];
553 	size_t freqlen;
554 	int freqcnt, i;
555 
556 	/*
557 	 * Retrieve availability list
558 	 */
559 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.avail", dom_id);
560 	freqlen = sizeof(freq);
561 	if (sysctlbyname(sysid, freq, &freqlen, NULL, 0) < 0)
562 		return 0;
563 
564 	freqcnt = freqlen / sizeof(freq[0]);
565 	if (freqcnt == 0)
566 		return 0;
567 
568 	for (i = freqcnt - 1; i >= 0; --i) {
569 		*lowest0 = freq[i];
570 		if (LowestCpuFreq <= 0 || *lowest0 >= LowestCpuFreq)
571 			break;
572 	}
573 
574 	i = 0;
575 	*highest0 = freq[0];
576 	if (!TurboOpt && freqcnt > 1 && freq[0] - freq[1] == 1) {
577 		i = 1;
578 		*highest0 = freq[1];
579 	}
580 	for (; i < freqcnt; ++i) {
581 		if (HighestCpuFreq <= 0 || *highest0 <= HighestCpuFreq)
582 			break;
583 		*highest0 = freq[i];
584 	}
585 	return 1;
586 }
587 
588 static void
589 acpi_get_cpufreq(int dom_id, int *highest, int *lowest)
590 {
591 	*highest = 0;
592 	*lowest = 0;
593 
594 	if (acpi_getcpufreq_bin(dom_id, highest, lowest))
595 		return;
596 	acpi_getcpufreq_str(dom_id, highest, lowest);
597 }
598 
599 static
600 void
601 usage(void)
602 {
603 	fprintf(stderr, "usage: powerd [-cdeftQ] [-p hysteresis] "
604 	    "[-h highest_freq] [-l lowest_freq] "
605 	    "[-r poll_interval] [-u trigger_up] "
606 	    "[-B min_battery_life] [-L low_battery_linger] "
607 	    "[-P battery_poll_interval] [-T sample_interval] "
608 	    "[-b backlight]\n");
609 	exit(1);
610 }
611 
612 #ifndef timespecsub
613 #define timespecsub(vvp, uvp)						\
614 	do {								\
615 		(vvp)->tv_sec -= (uvp)->tv_sec;				\
616 		(vvp)->tv_nsec -= (uvp)->tv_nsec;			\
617 		if ((vvp)->tv_nsec < 0) {				\
618 			(vvp)->tv_sec--;				\
619 			(vvp)->tv_nsec += 1000000000;			\
620 		}							\
621 	} while (0)
622 #endif
623 
624 #define BAT_SYSCTL_TIME_MAX	50000000 /* unit: nanosecond */
625 
626 static int
627 has_battery(void)
628 {
629 	struct timespec s, e;
630 	size_t len;
631 	int val;
632 
633 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
634 	BatLifePrevT = s;
635 
636 	len = sizeof(val);
637 	if (sysctlbyname("hw.acpi.acline", &val, &len, NULL, 0) < 0) {
638 		/* No AC line information */
639 		return 0;
640 	}
641 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
642 
643 	timespecsub(&e, &s);
644 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
645 		/* hw.acpi.acline takes to long to be useful */
646 		syslog(LOG_NOTICE, "hw.acpi.acline takes too long");
647 		return 0;
648 	}
649 
650 	clock_gettime(CLOCK_MONOTONIC_FAST, &s);
651 	len = sizeof(val);
652 	if (sysctlbyname("hw.acpi.battery.life", &val, &len, NULL, 0) < 0) {
653 		/* No battery life */
654 		return 0;
655 	}
656 	clock_gettime(CLOCK_MONOTONIC_FAST, &e);
657 
658 	timespecsub(&e, &s);
659 	if (e.tv_sec > 0 || e.tv_nsec > BAT_SYSCTL_TIME_MAX) {
660 		/* hw.acpi.battery.life takes to long to be useful */
661 		syslog(LOG_NOTICE, "hw.acpi.battery.life takes too long");
662 		return 0;
663 	}
664 	return 1;
665 }
666 
667 static void
668 low_battery_alert(int life)
669 {
670 	int fmt, stereo, freq;
671 	int fd;
672 
673 	syslog(LOG_ALERT, "low battery life %d%%, please plugin AC line, #%d",
674 	    life, BatShutdownLingerCnt);
675 	++BatShutdownLingerCnt;
676 
677 	if (!BatShutdownAudioAlert)
678 		return;
679 
680 	fd = open("/dev/dsp", O_WRONLY);
681 	if (fd < 0)
682 		return;
683 
684 	fmt = AFMT_S16_LE;
685 	if (ioctl(fd, SNDCTL_DSP_SETFMT, &fmt, sizeof(fmt)) < 0)
686 		goto done;
687 
688 	stereo = 0;
689 	if (ioctl(fd, SNDCTL_DSP_STEREO, &stereo, sizeof(stereo)) < 0)
690 		goto done;
691 
692 	freq = 44100;
693 	if (ioctl(fd, SNDCTL_DSP_SPEED, &freq, sizeof(freq)) < 0)
694 		goto done;
695 
696 	write(fd, alert1, sizeof(alert1));
697 	write(fd, alert1, sizeof(alert1));
698 
699 done:
700 	close(fd);
701 }
702 
703 static int
704 mon_battery(void)
705 {
706 	struct timespec cur, ts;
707 	int acline, life;
708 	size_t len;
709 
710 	clock_gettime(CLOCK_MONOTONIC_FAST, &cur);
711 	ts = cur;
712 	timespecsub(&ts, &BatLifePrevT);
713 	if (ts.tv_sec < BatLifePollIntvl)
714 		return 1;
715 	BatLifePrevT = cur;
716 
717 	len = sizeof(acline);
718 	if (sysctlbyname("hw.acpi.acline", &acline, &len, NULL, 0) < 0)
719 		return 1;
720 	if (acline) {
721 		BatShutdownLinger = -1;
722 		BatShutdownLingerCnt = 0;
723 		restore_backlight();
724 		return 1;
725 	}
726 
727 	if (!BackLightDown && BackLightPct != 100) {
728 		int backlight_max, backlight;
729 
730 		len = sizeof(backlight_max);
731 		if (sysctlbyname("hw.backlight_max", &backlight_max, &len,
732 		    NULL, 0) < 0) {
733 			/* No more backlight adjustment */
734 			BackLightPct = 100;
735 			goto after_backlight;
736 		}
737 
738 		len = sizeof(OldBackLightLevel);
739 		if (sysctlbyname("hw.backlight_level", &OldBackLightLevel, &len,
740 		    NULL, 0) < 0) {
741 			/* No more backlight adjustment */
742 			BackLightPct = 100;
743 			goto after_backlight;
744 		}
745 
746 		backlight = (backlight_max * BackLightPct) / 100;
747 		if (backlight >= OldBackLightLevel) {
748 			/* No more backlight adjustment */
749 			BackLightPct = 100;
750 			goto after_backlight;
751 		}
752 
753 		if (sysctlbyname("hw.backlight_level", NULL, NULL,
754 		    &backlight, sizeof(backlight)) < 0) {
755 			/* No more backlight adjustment */
756 			BackLightPct = 100;
757 			goto after_backlight;
758 		}
759 		BackLightDown = 1;
760 	}
761 after_backlight:
762 
763 	len = sizeof(life);
764 	if (sysctlbyname("hw.acpi.battery.life", &life, &len, NULL, 0) < 0)
765 		return 1;
766 
767 	if (BatShutdownLinger > 0) {
768 		ts = cur;
769 		timespecsub(&ts, &BatShutdownStartT);
770 		if (ts.tv_sec > BatShutdownLinger)
771 			BatShutdownLinger = 0;
772 	}
773 
774 	if (life <= BatLifeMin) {
775 		if (BatShutdownLinger == 0 || BatShutdownLingerSet == 0) {
776 			syslog(LOG_ALERT, "low battery life %d%%, "
777 			    "shutting down", life);
778 			if (vfork() == 0)
779 				execlp("poweroff", "poweroff", NULL);
780 			return 0;
781 		} else if (BatShutdownLinger < 0) {
782 			BatShutdownLinger = BatShutdownLingerSet;
783 			BatShutdownStartT = cur;
784 		}
785 		low_battery_alert(life);
786 	}
787 	return 1;
788 }
789 
790 static void
791 get_ncpus(void)
792 {
793 	size_t slen;
794 
795 	slen = sizeof(NCpus);
796 	if (sysctlbyname("hw.ncpu", &NCpus, &slen, NULL, 0) < 0)
797 		err(1, "sysctlbyname hw.ncpu failed");
798 	if (DebugOpt)
799 		printf("hw.ncpu %d\n", NCpus);
800 }
801 
802 static void
803 get_uschedcpus(void)
804 {
805 	size_t slen;
806 
807 	slen = sizeof(usched_cpu_used);
808 	if (sysctlbyname("kern.usched_global_cpumask", &usched_cpu_used, &slen,
809 	    NULL, 0) < 0)
810 		err(1, "sysctlbyname kern.usched_global_cpumask failed");
811 	if (DebugOpt) {
812 		int i;
813 
814 		printf("usched cpumask was: ");
815 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i)
816 			printf("%jx ", (uintmax_t)usched_cpu_used.ary[i]);
817 		printf("\n");
818 	}
819 }
820 
821 static void
822 set_uschedcpus(void)
823 {
824 	if (DebugOpt) {
825 		int i;
826 
827 		printf("usched cpumask: ");
828 		for (i = 0; i < (int)NELEM(usched_cpu_used.ary); ++i) {
829 			printf("%jx ",
830 			    (uintmax_t)usched_cpu_used.ary[i]);
831 		}
832 		printf("\n");
833 	}
834 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
835 	    &usched_cpu_used, sizeof(usched_cpu_used));
836 }
837 
838 static int
839 has_perfbias(void)
840 {
841 	size_t len;
842 	int hint;
843 
844 	len = sizeof(hint);
845 	if (sysctlbyname("machdep.perfbias0.hint", &hint, &len, NULL, 0) < 0)
846 		return 0;
847 	return 1;
848 }
849 
850 static void
851 set_perfbias(int cpu, int inc)
852 {
853 	int hint = inc ? 0 : 15;
854 	char sysid[64];
855 
856 	if (DebugOpt)
857 		printf("cpu%d set perfbias hint %d\n", cpu, hint);
858 	snprintf(sysid, sizeof(sysid), "machdep.perfbias%d.hint", cpu);
859 	sysctlbyname(sysid, NULL, NULL, &hint, sizeof(hint));
860 }
861 
862 static void
863 init_perf(void)
864 {
865 	struct cpu_state *state;
866 	int cpu;
867 
868 	/* Get usched cpumask */
869 	get_uschedcpus();
870 
871 	/*
872 	 * Assume everything are used and are maxed out, before we
873 	 * start.
874 	 */
875 
876 	CPUMASK_ASSBMASK(cpu_used, NCpus);
877 	cpu_pwrdom_used = cpu_pwrdom_mask;
878 	global_pcpu_limit = NCpus;
879 
880 	for (cpu = 0; cpu < NCpus; ++cpu) {
881 		state = &pcpu_state[cpu];
882 
883 		state->cpu_uavg = 0.0;
884 		state->cpu_davg = 0.0;
885 		state->cpu_limit = 1;
886 		state->cpu_count = 1;
887 		snprintf(state->cpu_name, sizeof(state->cpu_name), "cpu%d",
888 		    cpu);
889 	}
890 
891 	state = &global_cpu_state;
892 	state->cpu_uavg = 0.0;
893 	state->cpu_davg = 0.0;
894 	state->cpu_limit = NCpus;
895 	state->cpu_count = NCpus;
896 	strlcpy(state->cpu_name, "global", sizeof(state->cpu_name));
897 }
898 
899 static int
900 get_nstate(struct cpu_state *state, double srt)
901 {
902 	int ustate, dstate, nstate;
903 
904 	/* speeding up */
905 	state->cpu_uavg = (state->cpu_uavg * 2.0 + state->cpu_qavg) / 3.0;
906 	/* slowing down */
907 	state->cpu_davg = (state->cpu_davg * srt + state->cpu_qavg) / (srt + 1);
908 	if (state->cpu_davg < state->cpu_uavg)
909 		state->cpu_davg = state->cpu_uavg;
910 
911 	ustate = state->cpu_uavg / TriggerUp;
912 	if (ustate < state->cpu_limit)
913 		ustate = state->cpu_uavg / TriggerDown;
914 	dstate = state->cpu_davg / TriggerUp;
915 	if (dstate < state->cpu_limit)
916 		dstate = state->cpu_davg / TriggerDown;
917 
918 	nstate = (ustate > dstate) ? ustate : dstate;
919 	if (nstate > state->cpu_count)
920 		nstate = state->cpu_count;
921 
922 	if (DebugOpt) {
923 		printf("%s qavg=%5.2f uavg=%5.2f davg=%5.2f "
924 		    "%2d ncpus=%d\n", state->cpu_name,
925 		    state->cpu_qavg, state->cpu_uavg, state->cpu_davg,
926 		    state->cpu_limit, nstate);
927 	}
928 	return nstate;
929 }
930 
931 static void
932 mon_perf(double srt)
933 {
934 	cpumask_t ocpu_used, ocpu_pwrdom_used;
935 	int pnstate = 0, nstate;
936 	int cpu;
937 
938 	/*
939 	 * Find cpus requiring performance and their cooresponding power
940 	 * domains.  Save the number of cpus requiring performance in
941 	 * pnstate.
942 	 */
943 	ocpu_used = cpu_used;
944 	ocpu_pwrdom_used = cpu_pwrdom_used;
945 
946 	CPUMASK_ASSZERO(cpu_used);
947 	CPUMASK_ASSZERO(cpu_pwrdom_used);
948 
949 	for (cpu = 0; cpu < NCpus; ++cpu) {
950 		struct cpu_state *state = &pcpu_state[cpu];
951 		int s;
952 
953 		s = get_nstate(state, srt);
954 		if (s) {
955 			CPUMASK_ORBIT(cpu_used, cpu);
956 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
957 		}
958 		pnstate += s;
959 
960 		state->cpu_limit = s;
961 	}
962 
963 	/*
964 	 * Calculate nstate, the number of cpus we wish to run at max
965 	 * performance.
966 	 */
967 	nstate = get_nstate(&global_cpu_state, srt);
968 
969 	if (nstate == global_cpu_state.cpu_limit &&
970 	    (pnstate == global_pcpu_limit || nstate > pnstate)) {
971 		/* Nothing changed; keep the sets */
972 		cpu_used = ocpu_used;
973 		cpu_pwrdom_used = ocpu_pwrdom_used;
974 
975 		global_pcpu_limit = pnstate;
976 		return;
977 	}
978 	global_pcpu_limit = pnstate;
979 
980 	if (nstate > pnstate) {
981 		/*
982 		 * Add spare cpus to meet global performance requirement.
983 		 */
984 		add_spare_cpus(ocpu_used, nstate - pnstate);
985 	}
986 
987 	global_cpu_state.cpu_limit = nstate;
988 
989 	/*
990 	 * Adjust cpu and cpu power domain performance
991 	 */
992 	adj_perf(ocpu_used, ocpu_pwrdom_used);
993 }
994 
995 static void
996 add_spare_cpus(const cpumask_t ocpu_used, int ncpu)
997 {
998 	cpumask_t saved_pwrdom, xcpu_used;
999 	int done = 0, cpu;
1000 
1001 	/*
1002 	 * Find more cpus in the previous cpu set.
1003 	 */
1004 	xcpu_used = cpu_used;
1005 	CPUMASK_XORMASK(xcpu_used, ocpu_used);
1006 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1007 		cpu = BSFCPUMASK(xcpu_used);
1008 		CPUMASK_NANDBIT(xcpu_used, cpu);
1009 
1010 		if (CPUMASK_TESTBIT(ocpu_used, cpu)) {
1011 			CPUMASK_ORBIT(cpu_pwrdom_used, cpu2pwrdom[cpu]);
1012 			CPUMASK_ORBIT(cpu_used, cpu);
1013 			--ncpu;
1014 			if (ncpu == 0)
1015 				return;
1016 		}
1017 	}
1018 
1019 	/*
1020 	 * Find more cpus in the used cpu power domains.
1021 	 */
1022 	saved_pwrdom = cpu_pwrdom_used;
1023 again:
1024 	while (CPUMASK_TESTNZERO(saved_pwrdom)) {
1025 		cpumask_t unused_cpumask;
1026 		int dom;
1027 
1028 		dom = BSFCPUMASK(saved_pwrdom);
1029 		CPUMASK_NANDBIT(saved_pwrdom, dom);
1030 
1031 		unused_cpumask = cpu_pwrdomain[dom]->dom_cpumask;
1032 		CPUMASK_NANDMASK(unused_cpumask, cpu_used);
1033 
1034 		while (CPUMASK_TESTNZERO(unused_cpumask)) {
1035 			cpu = BSFCPUMASK(unused_cpumask);
1036 			CPUMASK_NANDBIT(unused_cpumask, cpu);
1037 
1038 			CPUMASK_ORBIT(cpu_pwrdom_used, dom);
1039 			CPUMASK_ORBIT(cpu_used, cpu);
1040 			--ncpu;
1041 			if (ncpu == 0)
1042 				return;
1043 		}
1044 	}
1045 	if (!done) {
1046 		done = 1;
1047 		/*
1048 		 * Find more cpus in unused cpu power domains
1049 		 */
1050 		saved_pwrdom = cpu_pwrdom_mask;
1051 		CPUMASK_NANDMASK(saved_pwrdom, cpu_pwrdom_used);
1052 		goto again;
1053 	}
1054 	if (DebugOpt)
1055 		printf("%d cpus not found\n", ncpu);
1056 }
1057 
1058 static void
1059 acpi_set_cpufreq(int dom, int inc)
1060 {
1061 	int lowest, highest, desired;
1062 	char sysid[64];
1063 
1064 	acpi_get_cpufreq(dom, &highest, &lowest);
1065 	if (highest == 0 || lowest == 0)
1066 		return;
1067 	desired = inc ? highest : lowest;
1068 
1069 	if (DebugOpt)
1070 		printf("dom%d set frequency %d\n", dom, desired);
1071 	snprintf(sysid, sizeof(sysid), "hw.acpi.cpu.px_dom%d.select", dom);
1072 	sysctlbyname(sysid, NULL, NULL, &desired, sizeof(desired));
1073 }
1074 
1075 static void
1076 adj_cpu_pwrdom(int dom, int inc)
1077 {
1078 	if (AdjustCpuFreq)
1079 		acpi_set_cpufreq(dom, inc);
1080 }
1081 
1082 static void
1083 adj_cpu_perf(int cpu, int inc)
1084 {
1085 	if (DebugOpt) {
1086 		if (inc)
1087 			printf("cpu%d increase perf\n", cpu);
1088 		else
1089 			printf("cpu%d decrease perf\n", cpu);
1090 	}
1091 
1092 	if (HasPerfbias)
1093 		set_perfbias(cpu, inc);
1094 	if (AdjustCstate)
1095 		set_cstate(cpu, inc);
1096 }
1097 
1098 static void
1099 adj_perf(cpumask_t xcpu_used, cpumask_t xcpu_pwrdom_used)
1100 {
1101 	cpumask_t old_usched_used;
1102 	int cpu, inc;
1103 
1104 	/*
1105 	 * Set cpus requiring performance to the userland process
1106 	 * scheduler.  Leave the rest of cpus unmapped.
1107 	 */
1108 	old_usched_used = usched_cpu_used;
1109 	usched_cpu_used = cpu_used;
1110 	if (CPUMASK_TESTZERO(usched_cpu_used))
1111 		CPUMASK_ORBIT(usched_cpu_used, 0);
1112 	if (CPUMASK_CMPMASKNEQ(usched_cpu_used, old_usched_used))
1113 		set_uschedcpus();
1114 
1115 	/*
1116 	 * Adjust per-cpu performance.
1117 	 */
1118 	CPUMASK_XORMASK(xcpu_used, cpu_used);
1119 	while (CPUMASK_TESTNZERO(xcpu_used)) {
1120 		cpu = BSFCPUMASK(xcpu_used);
1121 		CPUMASK_NANDBIT(xcpu_used, cpu);
1122 
1123 		if (CPUMASK_TESTBIT(cpu_used, cpu)) {
1124 			/* Increase cpu performance */
1125 			inc = 1;
1126 		} else {
1127 			/* Decrease cpu performance */
1128 			inc = 0;
1129 		}
1130 		adj_cpu_perf(cpu, inc);
1131 	}
1132 
1133 	/*
1134 	 * Adjust cpu power domain performance.  This could affect
1135 	 * a set of cpus.
1136 	 */
1137 	CPUMASK_XORMASK(xcpu_pwrdom_used, cpu_pwrdom_used);
1138 	while (CPUMASK_TESTNZERO(xcpu_pwrdom_used)) {
1139 		int dom;
1140 
1141 		dom = BSFCPUMASK(xcpu_pwrdom_used);
1142 		CPUMASK_NANDBIT(xcpu_pwrdom_used, dom);
1143 
1144 		if (CPUMASK_TESTBIT(cpu_pwrdom_used, dom)) {
1145 			/* Increase cpu power domain performance */
1146 			inc = 1;
1147 		} else {
1148 			/* Decrease cpu power domain performance */
1149 			inc = 0;
1150 		}
1151 		adj_cpu_pwrdom(dom, inc);
1152 	}
1153 }
1154 
1155 static void
1156 restore_perf(void)
1157 {
1158 	cpumask_t ocpu_used, ocpu_pwrdom_used;
1159 
1160 	/* Remove highest cpu frequency limitation */
1161 	HighestCpuFreq = 0;
1162 
1163 	ocpu_used = cpu_used;
1164 	ocpu_pwrdom_used = cpu_pwrdom_used;
1165 
1166 	/* Max out all cpus and cpu power domains performance */
1167 	CPUMASK_ASSBMASK(cpu_used, NCpus);
1168 	cpu_pwrdom_used = cpu_pwrdom_mask;
1169 
1170 	adj_perf(ocpu_used, ocpu_pwrdom_used);
1171 
1172 	if (AdjustCstate) {
1173 		/*
1174 		 * Restore the original mwait C-state
1175 		 */
1176 		if (DebugOpt)
1177 			printf("global set cstate %s\n", orig_global_cx);
1178 		sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1179 		    orig_global_cx, strlen(orig_global_cx) + 1);
1180 	}
1181 }
1182 
1183 static int
1184 probe_cstate(void)
1185 {
1186 	char cx_supported[1024];
1187 	const char *target;
1188 	char *ptr;
1189 	int idle_hlt, deep = 1;
1190 	size_t len;
1191 
1192 	len = sizeof(idle_hlt);
1193 	if (sysctlbyname("machdep.cpu_idle_hlt", &idle_hlt, &len, NULL, 0) < 0)
1194 		return 0;
1195 	if (idle_hlt != 1)
1196 		return 0;
1197 
1198 	len = sizeof(cx_supported);
1199 	if (sysctlbyname("machdep.mwait.CX.supported", cx_supported, &len,
1200 	    NULL, 0) < 0)
1201 		return 0;
1202 
1203 	len = sizeof(orig_global_cx);
1204 	if (sysctlbyname("machdep.mwait.CX.idle", orig_global_cx, &len,
1205 	    NULL, 0) < 0)
1206 		return 0;
1207 
1208 	strlcpy(cpu_perf_cx, "AUTODEEP", sizeof(cpu_perf_cx));
1209 	cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1210 	if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1211 	    cpu_perf_cx, cpu_perf_cxlen) < 0) {
1212 		/* AUTODEEP is not supported; try AUTO */
1213 		deep = 0;
1214 		strlcpy(cpu_perf_cx, "AUTO", sizeof(cpu_perf_cx));
1215 		cpu_perf_cxlen = strlen(cpu_perf_cx) + 1;
1216 		if (sysctlbyname("machdep.mwait.CX.idle", NULL, NULL,
1217 		    cpu_perf_cx, cpu_perf_cxlen) < 0)
1218 			return 0;
1219 	}
1220 
1221 	if (!deep)
1222 		target = "C2/0";
1223 	else
1224 		target = NULL;
1225 	for (ptr = strtok(cx_supported, " "); ptr != NULL;
1226 	     ptr = strtok(NULL, " ")) {
1227 		if (target == NULL ||
1228 		    (target != NULL && strcmp(ptr, target) == 0)) {
1229 			strlcpy(cpu_idle_cx, ptr, sizeof(cpu_idle_cx));
1230 			cpu_idle_cxlen = strlen(cpu_idle_cx) + 1;
1231 			if (target != NULL)
1232 				break;
1233 		}
1234 	}
1235 	if (cpu_idle_cxlen == 0)
1236 		return 0;
1237 
1238 	if (DebugOpt) {
1239 		printf("cstate orig %s, perf %s, idle %s\n",
1240 		    orig_global_cx, cpu_perf_cx, cpu_idle_cx);
1241 	}
1242 	return 1;
1243 }
1244 
1245 static void
1246 set_cstate(int cpu, int inc)
1247 {
1248 	const char *cst;
1249 	char sysid[64];
1250 	size_t len;
1251 
1252 	if (inc) {
1253 		cst = cpu_perf_cx;
1254 		len = cpu_perf_cxlen;
1255 	} else {
1256 		cst = cpu_idle_cx;
1257 		len = cpu_idle_cxlen;
1258 	}
1259 
1260 	if (DebugOpt)
1261 		printf("cpu%d set cstate %s\n", cpu, cst);
1262 	snprintf(sysid, sizeof(sysid), "machdep.mwait.CX.idle%d", cpu);
1263 	sysctlbyname(sysid, NULL, NULL, cst, len);
1264 }
1265 
1266 static void
1267 restore_backlight(void)
1268 {
1269 	if (BackLightDown) {
1270 		BackLightDown = 0;
1271 		sysctlbyname("hw.backlight_level", NULL, NULL,
1272 		    &OldBackLightLevel, sizeof(OldBackLightLevel));
1273 	}
1274 }
1275