xref: /dragonfly/usr.sbin/powerd/powerd.c (revision 6589c761)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37  * via hw.acpi.cpu.px_dom*.
38  */
39 
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
44 #include <sys/file.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <string.h>
49 #include <syslog.h>
50 
51 static void usage(void);
52 static double getcputime(void);
53 static void acpi_setcpufreq(int nstate);
54 static void setupdominfo(void);
55 
56 int DebugOpt;
57 int TurboOpt = 1;
58 int CpuLimit;		/* # of cpus at max frequency */
59 int DomLimit;		/* # of domains at max frequency */
60 int PowerFd;
61 int DomBeg;
62 int DomEnd;
63 int NCpus;
64 int CpuCount[256];	/* # of cpus in any given domain */
65 int CpuToDom[256];	/* domain a particular cpu belongs to */
66 int Hysteresis = 10;	/* percentage */
67 double TriggerUp = 0.25;/* single-cpu load to force max freq */
68 double TriggerDown; /* load per cpu to force the min freq */
69 
70 static void sigintr(int signo);
71 
72 int
73 main(int ac, char **av)
74 {
75 	double qavg;
76 	double uavg;	/* uavg - used for speeding up */
77 	double davg;	/* davg - used for slowing down */
78 	double srt;
79 	double pollrate;
80 	int ch;
81 	int ustate;
82 	int dstate;
83 	int nstate;
84 	char buf[64];
85 
86 	srt = 8.0;	/* time for samples - 8 seconds */
87 	pollrate = 1.0;	/* polling rate in seconds */
88 
89 	while ((ch = getopt(ac, av, "dp:r:tu:T:")) != -1) {
90 		switch(ch) {
91 		case 'd':
92 			DebugOpt = 1;
93 			break;
94 		case 'p':
95 			Hysteresis = (int)strtol(optarg, NULL, 10);
96 			break;
97 		case 't':
98 			TurboOpt = 0;
99 			break;
100 		case 'u':
101 			TriggerUp = (double)strtol(optarg, NULL, 10) / 100;
102 			break;
103 		case 'r':
104 			pollrate = strtod(optarg, NULL);
105 			break;
106 		case 'T':
107 			srt = strtod(optarg, NULL);
108 			break;
109 		default:
110 			usage();
111 			/* NOT REACHED */
112 		}
113 	}
114 	ac -= optind;
115 	av += optind;
116 
117 	if (0 > Hysteresis || Hysteresis > 99) {
118 		fprintf(stderr, "Invalid hysteresis value\n");
119 		exit(1);
120 	}
121 
122 	if (0 > TriggerUp || TriggerUp > 1) {
123 		fprintf(stderr, "Invalid load limit value\n");
124 		exit(1);
125 	}
126 
127 	TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100);
128 
129 	/*
130 	 * Make sure powerd is not already running.
131 	 */
132 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
133 	if (PowerFd < 0) {
134 		fprintf(stderr,
135 			"Cannot create /var/run/powerd.pid, "
136 			"continuing anyway\n");
137 	} else {
138 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
139 			fprintf(stderr, "powerd is already running\n");
140 			exit(1);
141 		}
142 	}
143 
144 	/*
145 	 * Demonize and set pid
146 	 */
147 	if (DebugOpt == 0) {
148 		daemon(0, 0);
149 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
150 	}
151 
152 	if (PowerFd >= 0) {
153 		ftruncate(PowerFd, 0);
154 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
155 		write(PowerFd, buf, strlen(buf));
156 	}
157 
158 	/*
159 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
160 	 *
161 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
162 	 * taskqueue and ACPI taskqueue is shared across various
163 	 * ACPI modules, any delay in other modules may cause
164 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
165 	 * (e.g. cmbat module's task could take quite a lot of time).
166 	 */
167 	for (;;) {
168 		/*
169 		 * Prime delta cputime calculation, make sure at least
170 		 * dom0 exists.
171 		 */
172 		getcputime();
173 
174 		setupdominfo();
175 		if (DomBeg >= DomEnd) {
176 			usleep((int)(pollrate * 1000000.0));
177 			continue;
178 		}
179 
180 		DomLimit = DomEnd;
181 		CpuLimit = NCpus;
182 		break;
183 	}
184 
185 	/*
186 	 * Set to maximum performance if killed.
187 	 */
188 	signal(SIGINT, sigintr);
189 	signal(SIGTERM, sigintr);
190 	uavg = 0.0;
191 	davg = 0.0;
192 
193 	srt = srt / pollrate;	/* convert to sample count */
194 
195 	if (DebugOpt)
196 		printf("samples for downgrading: %5.2f\n", srt);
197 
198 	/*
199 	 * Monitoring loop
200 	 *
201 	 * Calculate nstate, the number of cpus we wish to run at max
202 	 * frequency.  All remaining cpus will be set to their lowest
203 	 * frequency and mapped out of the user process scheduler.
204 	 */
205 	for (;;) {
206 		qavg = getcputime();
207 		uavg = (uavg * 2.0 + qavg) / 3.0;	/* speeding up */
208 		davg = (davg * srt + qavg) / (srt + 1);	/* slowing down */
209 		if (davg < uavg)
210 			davg = uavg;
211 
212 		ustate = uavg / TriggerUp;
213 		if (ustate < CpuLimit)
214 			ustate = uavg / TriggerDown;
215 		dstate = davg / TriggerUp;
216 		if (dstate < CpuLimit)
217 			dstate = davg / TriggerDown;
218 
219 		nstate = (ustate > dstate) ? ustate : dstate;
220 		if (nstate > NCpus)
221 			nstate = NCpus;
222 
223 		if (DebugOpt) {
224 			printf("\rqavg=%5.2f uavg=%5.2f davg=%5.2f "
225 			       "%2d/%2d ncpus=%d\r",
226 				qavg, uavg, davg,
227 				CpuLimit, DomLimit, nstate);
228 			fflush(stdout);
229 		}
230 		if (nstate != CpuLimit)
231 			acpi_setcpufreq(nstate);
232 		usleep((int)(pollrate * 1000000.0));
233 	}
234 }
235 
236 static
237 void
238 sigintr(int signo __unused)
239 {
240 	syslog(LOG_INFO, "killed, setting max and exiting");
241 	acpi_setcpufreq(NCpus);
242 	exit(1);
243 }
244 
245 /*
246  * Figure out the domains and calculate the CpuCount[] and CpuToDom[]
247  * arrays.
248  */
249 static
250 void
251 setupdominfo(void)
252 {
253 	char buf[64];
254 	char members[1024];
255 	char *str;
256 	size_t msize;
257 	int i;
258 	int n;
259 
260 	for (i = 0; i < 256; ++i) {
261 		snprintf(buf, sizeof(buf),
262 			 "hw.acpi.cpu.px_dom%d.available", i);
263 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0)
264 			break;
265 	}
266 	DomBeg = i;
267 
268 	for (i = 255; i >= DomBeg; --i) {
269 		snprintf(buf, sizeof(buf),
270 			 "hw.acpi.cpu.px_dom%d.available", i);
271 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) {
272 			++i;
273 			break;
274 		}
275 	}
276 	DomEnd = i;
277 
278 	for (i = DomBeg; i < DomEnd; ++i) {
279 		snprintf(buf, sizeof(buf),
280 			 "hw.acpi.cpu.px_dom%d.members", i);
281 		msize = sizeof(members);
282 		if (sysctlbyname(buf, members, &msize, NULL, 0) == 0) {
283 			members[msize] = 0;
284 			for (str = strtok(members, " "); str;
285 			     str = strtok(NULL, " ")) {
286 				n = -1;
287 				sscanf(str, "cpu%d", &n);
288 				if (n >= 0) {
289 					++NCpus;
290 					++CpuCount[i];
291 					CpuToDom[n]= i;
292 				}
293 			}
294 		}
295 	}
296 }
297 
298 /*
299  * Return the one-second cpu load.  One cpu at 100% will return a value
300  * of 1.0.  On a SMP system N cpus running at 100% will return a value of N.
301  */
302 static
303 double
304 getcputime(void)
305 {
306 	static struct kinfo_cputime ocpu_time[64];
307 	static struct kinfo_cputime ncpu_time[64];
308 	size_t slen;
309 	int ncpu;
310 	int cpu;
311 	uint64_t delta;
312 
313 	bcopy(ncpu_time, ocpu_time, sizeof(ncpu_time));
314 	slen = sizeof(ncpu_time);
315 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
316 		fprintf(stderr, "kern.cputime sysctl not available\n");
317 		exit(1);
318 	}
319 	ncpu = slen / sizeof(ncpu_time[0]);
320 	delta = 0;
321 
322 	for (cpu = 0; cpu < ncpu; ++cpu) {
323 		delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
324 			  ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
325 			 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
326 			  ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
327 	}
328 	return((double)delta / 1000000.0);
329 }
330 
331 /*
332  * nstate is the requested number of cpus that we wish to run at full
333  * frequency.  We calculate how many domains we have to adjust to reach
334  * this goal.
335  *
336  * This function also sets the user scheduler global cpu mask.
337  */
338 static
339 void
340 acpi_setcpufreq(int nstate)
341 {
342 	int ncpus = 0;
343 	int increasing = (nstate > CpuLimit);
344 	int dom;
345 	int domBeg;
346 	int domEnd;
347 	int lowest;
348 	int highest;
349 	int desired;
350 	int v;
351 	char *sysid;
352 	char *ptr;
353 	char buf[256];
354 	size_t buflen;
355 	cpumask_t global_cpumask;
356 
357 	/*
358 	 * Calculate the ending domain if the number of operating cpus
359 	 * has increased.
360 	 *
361 	 * Calculate the starting domain if the number of operating cpus
362 	 * has decreased.
363 	 */
364 	for (dom = DomBeg; dom < DomEnd; ++dom) {
365 		if (ncpus >= nstate)
366 			break;
367 		ncpus += CpuCount[dom];
368 	}
369 
370 	syslog(LOG_INFO, "using %d cpus", nstate);
371 
372 	/*
373 	 * Set the mask of cpus the userland scheduler is allowed to use.
374 	 */
375 	CPUMASK_ASSBMASK(global_cpumask, nstate);
376 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
377 		     &global_cpumask, sizeof(global_cpumask));
378 
379 	if (increasing) {
380 		domBeg = DomLimit;
381 		domEnd = dom;
382 	} else {
383 		domBeg = dom;
384 		domEnd = DomLimit;
385 	}
386 	DomLimit = dom;
387 	CpuLimit = nstate;
388 
389 	/*
390 	 * Adjust the cpu frequency
391 	 */
392 	if (DebugOpt)
393 		printf("\n");
394 	for (dom = domBeg; dom < domEnd; ++dom) {
395 		/*
396 		 * Retrieve availability list
397 		 */
398 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.available", dom);
399 		buflen = sizeof(buf) - 1;
400 		v = sysctlbyname(sysid, buf, &buflen, NULL, 0);
401 		free(sysid);
402 		if (v < 0)
403 			continue;
404 		buf[buflen] = 0;
405 
406 		/*
407 		 * Parse out the highest and lowest cpu frequencies
408 		 */
409 		ptr = buf;
410 		highest = lowest = 0;
411 		while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
412 			if (lowest == 0 || lowest > v)
413 				lowest = v;
414 			if (highest == 0 || highest < v)
415 				highest = v;
416 			/*
417 			 * Detect turbo mode
418 			 */
419 			if ((highest - v == 1) && ! TurboOpt)
420 				highest = v;
421 
422 		}
423 
424 		/*
425 		 * Calculate the desired cpu frequency, test, and set.
426 		 */
427 		desired = increasing ? highest : lowest;
428 
429 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.select", dom);
430 		buflen = sizeof(v);
431 		v = 0;
432 		sysctlbyname(sysid, &v, &buflen, NULL, 0);
433 		{
434 			if (DebugOpt) {
435 				printf("dom%d set frequency %d\n",
436 				       dom, desired);
437 			}
438 			sysctlbyname(sysid, NULL, NULL,
439 				     &desired, sizeof(desired));
440 		}
441 		free(sysid);
442 	}
443 }
444 
445 static
446 void
447 usage(void)
448 {
449 	fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] [-u trigger_up]\n");
450 	exit(1);
451 }
452