xref: /dragonfly/usr.sbin/powerd/powerd.c (revision f503b4c4)
1 /*
2  * Copyright (c) 2010 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * The powerd daemon monitors the cpu load and adjusts cpu frequencies
37  * via hw.acpi.cpu.px_dom*.
38  */
39 
40 #define _KERNEL_STRUCTURES
41 #include <sys/types.h>
42 #include <sys/sysctl.h>
43 #include <sys/kinfo.h>
44 #include <sys/file.h>
45 #include <stdio.h>
46 #include <stdlib.h>
47 #include <unistd.h>
48 #include <string.h>
49 #include <syslog.h>
50 
51 static void usage(void);
52 static double getcputime(void);
53 static void acpi_setcpufreq(int nstate);
54 static void setupdominfo(void);
55 
56 int DebugOpt;
57 int CpuLimit;		/* # of cpus at max frequency */
58 int DomLimit;		/* # of domains at max frequency */
59 int PowerFd;
60 int DomBeg;
61 int DomEnd;
62 int NCpus;
63 int CpuCount[256];	/* # of cpus in any given domain */
64 int CpuToDom[256];	/* domain a particular cpu belongs to */
65 double Trigger = 0.25;	/* load per cpu to force max freq */
66 
67 static void sigintr(int signo);
68 
69 int
70 main(int ac, char **av)
71 {
72 	double qavg;
73 	double savg;
74 	int ch;
75 	int nstate;
76 	char buf[64];
77 
78 	while ((ch = getopt(ac, av, "d")) != -1) {
79 		switch(ch) {
80 		case 'd':
81 			DebugOpt = 1;
82 			break;
83 		default:
84 			usage();
85 			/* NOT REACHED */
86 		}
87 	}
88 	ac -= optind;
89 	av += optind;
90 
91 	/*
92 	 * Make sure powerd is not already running.
93 	 */
94 	PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644);
95 	if (PowerFd < 0) {
96 		fprintf(stderr,
97 			"Cannot create /var/run/powerd.pid, "
98 			"continuing anyway\n");
99 	} else {
100 		if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) {
101 			fprintf(stderr, "powerd is already running\n");
102 			exit(1);
103 		}
104 	}
105 
106 	/*
107 	 * Demonize and set pid
108 	 */
109 	if (DebugOpt == 0) {
110 		daemon(0, 0);
111 		openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON);
112 	}
113 
114 	if (PowerFd >= 0) {
115 		ftruncate(PowerFd, 0);
116 		snprintf(buf, sizeof(buf), "%d\n", (int)getpid());
117 		write(PowerFd, buf, strlen(buf));
118 	}
119 
120 	/*
121 	 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel
122 	 *
123 	 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI
124 	 * taskqueue and ACPI taskqueue is shared across various
125 	 * ACPI modules, any delay in other modules may cause
126 	 * hw.acpi.cpu.px_dom* to be created at quite a later time
127 	 * (e.g. cmbat module's task could take quite a lot of time).
128 	 */
129 	for (;;) {
130 		/*
131 		 * Prime delta cputime calculation, make sure at least
132 		 * dom0 exists.
133 		 */
134 		getcputime();
135 		savg = 0.0;
136 
137 		setupdominfo();
138 		if (DomBeg >= DomEnd) {
139 			sleep(1);
140 			continue;
141 		}
142 
143 		DomLimit = DomEnd;
144 		CpuLimit = NCpus;
145 		break;
146 	}
147 
148 	/*
149 	 * Set to maximum performance if killed.
150 	 */
151 	signal(SIGINT, sigintr);
152 	signal(SIGTERM, sigintr);
153 
154 	/*
155 	 * Monitoring loop
156 	 *
157 	 * Calculate nstate, the number of cpus we wish to run at max
158 	 * frequency.  All remaining cpus will be set to their lowest
159 	 * frequency and mapped out of the user process scheduler.
160 	 */
161 	for (;;) {
162 		qavg = getcputime();
163 		savg = (savg * 7.0 + qavg) / 8.0;
164 
165 		nstate = savg / Trigger;
166 		if (nstate > NCpus)
167 			nstate = NCpus;
168 		if (DebugOpt) {
169 			printf("\rqavg=%5.2f savg=%5.2f %2d/%2d ncpus=%d\r",
170 				qavg, savg, CpuLimit, DomLimit, nstate);
171 			fflush(stdout);
172 		}
173 		if (nstate != CpuLimit)
174 			acpi_setcpufreq(nstate);
175 		sleep(1);
176 	}
177 }
178 
179 static
180 void
181 sigintr(int signo __unused)
182 {
183 	syslog(LOG_INFO, "killed, setting max and exiting");
184 	acpi_setcpufreq(NCpus);
185 	exit(1);
186 }
187 
188 /*
189  * Figure out the domains and calculate the CpuCount[] and CpuToDom[]
190  * arrays.
191  */
192 static
193 void
194 setupdominfo(void)
195 {
196 	char buf[64];
197 	char members[1024];
198 	char *str;
199 	size_t msize;
200 	int i;
201 	int n;
202 
203 	for (i = 0; i < 256; ++i) {
204 		snprintf(buf, sizeof(buf),
205 			 "hw.acpi.cpu.px_dom%d.available", i);
206 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0)
207 			break;
208 	}
209 	DomBeg = i;
210 
211 	for (i = 255; i >= DomBeg; --i) {
212 		snprintf(buf, sizeof(buf),
213 			 "hw.acpi.cpu.px_dom%d.available", i);
214 		if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) {
215 			++i;
216 			break;
217 		}
218 	}
219 	DomEnd = i;
220 
221 	for (i = DomBeg; i < DomEnd; ++i) {
222 		snprintf(buf, sizeof(buf),
223 			 "hw.acpi.cpu.px_dom%d.members", i);
224 		msize = sizeof(members);
225 		if (sysctlbyname(buf, members, &msize, NULL, 0) == 0) {
226 			members[msize] = 0;
227 			for (str = strtok(members, " "); str;
228 			     str = strtok(NULL, " ")) {
229 				n = -1;
230 				sscanf(str, "cpu%d", &n);
231 				if (n >= 0) {
232 					++NCpus;
233 					++CpuCount[i];
234 					CpuToDom[n]= i;
235 				}
236 			}
237 		}
238 	}
239 }
240 
241 /*
242  * Return the one-second cpu load.  One cpu at 100% will return a value
243  * of 1.0.  On a SMP system N cpus running at 100% will return a value of N.
244  */
245 static
246 double
247 getcputime(void)
248 {
249 	static struct kinfo_cputime ocpu_time[64];
250 	static struct kinfo_cputime ncpu_time[64];
251 	size_t slen;
252 	int ncpu;
253 	int cpu;
254 	uint64_t delta;
255 
256 	bcopy(ncpu_time, ocpu_time, sizeof(ncpu_time));
257 	slen = sizeof(ncpu_time);
258 	if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) {
259 		fprintf(stderr, "kern.cputime sysctl not available\n");
260 		exit(1);
261 	}
262 	ncpu = slen / sizeof(ncpu_time[0]);
263 	delta = 0;
264 
265 	for (cpu = 0; cpu < ncpu; ++cpu) {
266 		delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys +
267 			  ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) -
268 			 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys +
269 			  ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr);
270 	}
271 	return((double)delta / 1000000.0);
272 }
273 
274 /*
275  * nstate is the requested number of cpus that we wish to run at full
276  * frequency.  We calculate how many domains we have to adjust to reach
277  * this goal.
278  *
279  * This function also sets the user scheduler global cpu mask.
280  */
281 static
282 void
283 acpi_setcpufreq(int nstate)
284 {
285 	int ncpus = 0;
286 	int increasing = (nstate > CpuLimit);
287 	int dom;
288 	int domBeg;
289 	int domEnd;
290 	int lowest;
291 	int highest;
292 	int desired;
293 	int v;
294 	char *sysid;
295 	char *ptr;
296 	char buf[256];
297 	size_t buflen;
298 	cpumask_t global_cpumask;
299 
300 	/*
301 	 * Calculate the ending domain if the number of operating cpus
302 	 * has increased.
303 	 *
304 	 * Calculate the starting domain if the number of operating cpus
305 	 * has decreased.
306 	 */
307 	for (dom = DomBeg; dom < DomEnd; ++dom) {
308 		if (ncpus >= nstate)
309 			break;
310 		ncpus += CpuCount[dom];
311 	}
312 
313 	syslog(LOG_INFO, "using %d cpus", nstate);
314 
315 	/*
316 	 * Set the mask of cpus the userland scheduler is allowed to use.
317 	 */
318 	CPUMASK_ASSBMASK(global_cpumask, nstate);
319 	sysctlbyname("kern.usched_global_cpumask", NULL, 0,
320 		     &global_cpumask, sizeof(global_cpumask));
321 
322 	if (increasing) {
323 		domBeg = DomLimit;
324 		domEnd = dom;
325 	} else {
326 		domBeg = dom;
327 		domEnd = DomLimit;
328 	}
329 	DomLimit = dom;
330 	CpuLimit = nstate;
331 
332 	/*
333 	 * Adjust the cpu frequency
334 	 */
335 	if (DebugOpt)
336 		printf("\n");
337 	for (dom = domBeg; dom < domEnd; ++dom) {
338 		/*
339 		 * Retrieve availability list
340 		 */
341 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.available", dom);
342 		buflen = sizeof(buf) - 1;
343 		v = sysctlbyname(sysid, buf, &buflen, NULL, 0);
344 		free(sysid);
345 		if (v < 0)
346 			continue;
347 		buf[buflen] = 0;
348 
349 		/*
350 		 * Parse out the highest and lowest cpu frequencies
351 		 */
352 		ptr = buf;
353 		highest = lowest = 0;
354 		while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) {
355 			if (lowest == 0 || lowest > v)
356 				lowest = v;
357 			if (highest == 0 || highest < v)
358 				highest = v;
359 		}
360 
361 		/*
362 		 * Calculate the desired cpu frequency, test, and set.
363 		 */
364 		desired = increasing ? highest : lowest;
365 
366 		asprintf(&sysid, "hw.acpi.cpu.px_dom%d.select", dom);
367 		buflen = sizeof(v);
368 		v = 0;
369 		sysctlbyname(sysid, &v, &buflen, NULL, 0);
370 		{
371 			if (DebugOpt) {
372 				printf("dom%d set frequency %d\n",
373 				       dom, desired);
374 			}
375 			sysctlbyname(sysid, NULL, NULL,
376 				     &desired, sizeof(desired));
377 		}
378 		free(sysid);
379 	}
380 }
381 
382 static
383 void
384 usage(void)
385 {
386 	fprintf(stderr, "usage: powerd [-d]\n");
387 	exit(1);
388 }
389