1 /* 2 * Copyright (c) 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 /* 36 * The powerd daemon monitors the cpu load and adjusts cpu frequencies 37 * via hw.acpi.cpu.px_dom*. 38 */ 39 40 #define _KERNEL_STRUCTURES 41 #include <sys/types.h> 42 #include <sys/sysctl.h> 43 #include <sys/kinfo.h> 44 #include <sys/file.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <unistd.h> 48 #include <string.h> 49 #include <syslog.h> 50 51 static void usage(void); 52 static double getcputime(void); 53 static void acpi_setcpufreq(int nstate); 54 static void setupdominfo(void); 55 56 int DebugOpt; 57 int TurboOpt = 1; 58 int CpuLimit; /* # of cpus at max frequency */ 59 int DomLimit; /* # of domains at max frequency */ 60 int PowerFd; 61 int DomBeg; 62 int DomEnd; 63 int NCpus; 64 int CpuCount[256]; /* # of cpus in any given domain */ 65 int CpuToDom[256]; /* domain a particular cpu belongs to */ 66 int Hysteresis = 10; /* percentage */ 67 double TriggerUp = 0.25;/* single-cpu load to force max freq */ 68 double TriggerDown; /* load per cpu to force the min freq */ 69 70 static void sigintr(int signo); 71 72 int 73 main(int ac, char **av) 74 { 75 double qavg; 76 double uavg; /* uavg - used for speeding up */ 77 double davg; /* davg - used for slowing down */ 78 double srt; 79 double pollrate; 80 int ch; 81 int ustate; 82 int dstate; 83 int nstate; 84 char buf[64]; 85 86 srt = 8.0; /* time for samples - 8 seconds */ 87 pollrate = 1.0; /* polling rate in seconds */ 88 89 while ((ch = getopt(ac, av, "dp:r:tu:T:")) != -1) { 90 switch(ch) { 91 case 'd': 92 DebugOpt = 1; 93 break; 94 case 'p': 95 Hysteresis = (int)strtol(optarg, NULL, 10); 96 break; 97 case 't': 98 TurboOpt = 0; 99 break; 100 case 'u': 101 TriggerUp = (double)strtol(optarg, NULL, 10) / 100; 102 break; 103 case 'r': 104 pollrate = strtod(optarg, NULL); 105 break; 106 case 'T': 107 srt = strtod(optarg, NULL); 108 break; 109 default: 110 usage(); 111 /* NOT REACHED */ 112 } 113 } 114 ac -= optind; 115 av += optind; 116 117 if (0 > Hysteresis || Hysteresis > 99) { 118 fprintf(stderr, "Invalid hysteresis value\n"); 119 exit(1); 120 } 121 122 if (0 > TriggerUp || TriggerUp > 1) { 123 fprintf(stderr, "Invalid load limit value\n"); 124 exit(1); 125 } 126 127 TriggerDown = TriggerUp - (TriggerUp * (double) Hysteresis / 100); 128 129 /* 130 * Make sure powerd is not already running. 131 */ 132 PowerFd = open("/var/run/powerd.pid", O_CREAT|O_RDWR, 0644); 133 if (PowerFd < 0) { 134 fprintf(stderr, 135 "Cannot create /var/run/powerd.pid, " 136 "continuing anyway\n"); 137 } else { 138 if (flock(PowerFd, LOCK_EX|LOCK_NB) < 0) { 139 fprintf(stderr, "powerd is already running\n"); 140 exit(1); 141 } 142 } 143 144 /* 145 * Demonize and set pid 146 */ 147 if (DebugOpt == 0) { 148 daemon(0, 0); 149 openlog("powerd", LOG_CONS | LOG_PID, LOG_DAEMON); 150 } 151 152 if (PowerFd >= 0) { 153 ftruncate(PowerFd, 0); 154 snprintf(buf, sizeof(buf), "%d\n", (int)getpid()); 155 write(PowerFd, buf, strlen(buf)); 156 } 157 158 /* 159 * Wait hw.acpi.cpu.px_dom* sysctl to be created by kernel 160 * 161 * Since hw.acpi.cpu.px_dom* creation is queued into ACPI 162 * taskqueue and ACPI taskqueue is shared across various 163 * ACPI modules, any delay in other modules may cause 164 * hw.acpi.cpu.px_dom* to be created at quite a later time 165 * (e.g. cmbat module's task could take quite a lot of time). 166 */ 167 for (;;) { 168 /* 169 * Prime delta cputime calculation, make sure at least 170 * dom0 exists. 171 */ 172 getcputime(); 173 174 setupdominfo(); 175 if (DomBeg >= DomEnd) { 176 usleep((int)(pollrate * 1000000.0)); 177 continue; 178 } 179 180 DomLimit = DomEnd; 181 CpuLimit = NCpus; 182 break; 183 } 184 185 /* 186 * Set to maximum performance if killed. 187 */ 188 signal(SIGINT, sigintr); 189 signal(SIGTERM, sigintr); 190 uavg = 0.0; 191 davg = 0.0; 192 193 srt = srt / pollrate; /* convert to sample count */ 194 195 if (DebugOpt) 196 printf("samples for downgrading: %5.2f\n", srt); 197 198 /* 199 * Monitoring loop 200 * 201 * Calculate nstate, the number of cpus we wish to run at max 202 * frequency. All remaining cpus will be set to their lowest 203 * frequency and mapped out of the user process scheduler. 204 */ 205 for (;;) { 206 qavg = getcputime(); 207 uavg = (uavg * 2.0 + qavg) / 3.0; /* speeding up */ 208 davg = (davg * srt + qavg) / (srt + 1); /* slowing down */ 209 if (davg < uavg) 210 davg = uavg; 211 212 ustate = uavg / TriggerUp; 213 if (ustate < CpuLimit) 214 ustate = uavg / TriggerDown; 215 dstate = davg / TriggerUp; 216 if (dstate < CpuLimit) 217 dstate = davg / TriggerDown; 218 219 nstate = (ustate > dstate) ? ustate : dstate; 220 if (nstate > NCpus) 221 nstate = NCpus; 222 223 if (DebugOpt) { 224 printf("\rqavg=%5.2f uavg=%5.2f davg=%5.2f " 225 "%2d/%2d ncpus=%d\r", 226 qavg, uavg, davg, 227 CpuLimit, DomLimit, nstate); 228 fflush(stdout); 229 } 230 if (nstate != CpuLimit) 231 acpi_setcpufreq(nstate); 232 usleep((int)(pollrate * 1000000.0)); 233 } 234 } 235 236 static 237 void 238 sigintr(int signo __unused) 239 { 240 syslog(LOG_INFO, "killed, setting max and exiting"); 241 acpi_setcpufreq(NCpus); 242 exit(1); 243 } 244 245 /* 246 * Figure out the domains and calculate the CpuCount[] and CpuToDom[] 247 * arrays. 248 */ 249 static 250 void 251 setupdominfo(void) 252 { 253 char buf[64]; 254 char members[1024]; 255 char *str; 256 size_t msize; 257 int i; 258 int n; 259 260 for (i = 0; i < 256; ++i) { 261 snprintf(buf, sizeof(buf), 262 "hw.acpi.cpu.px_dom%d.available", i); 263 if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) 264 break; 265 } 266 DomBeg = i; 267 268 for (i = 255; i >= DomBeg; --i) { 269 snprintf(buf, sizeof(buf), 270 "hw.acpi.cpu.px_dom%d.available", i); 271 if (sysctlbyname(buf, NULL, NULL, NULL, 0) >= 0) { 272 ++i; 273 break; 274 } 275 } 276 DomEnd = i; 277 278 for (i = DomBeg; i < DomEnd; ++i) { 279 snprintf(buf, sizeof(buf), 280 "hw.acpi.cpu.px_dom%d.members", i); 281 msize = sizeof(members); 282 if (sysctlbyname(buf, members, &msize, NULL, 0) == 0) { 283 members[msize] = 0; 284 for (str = strtok(members, " "); str; 285 str = strtok(NULL, " ")) { 286 n = -1; 287 sscanf(str, "cpu%d", &n); 288 if (n >= 0) { 289 ++NCpus; 290 ++CpuCount[i]; 291 CpuToDom[n]= i; 292 } 293 } 294 } 295 } 296 } 297 298 /* 299 * Return the one-second cpu load. One cpu at 100% will return a value 300 * of 1.0. On a SMP system N cpus running at 100% will return a value of N. 301 */ 302 static 303 double 304 getcputime(void) 305 { 306 static struct kinfo_cputime ocpu_time[64]; 307 static struct kinfo_cputime ncpu_time[64]; 308 size_t slen; 309 int ncpu; 310 int cpu; 311 uint64_t delta; 312 313 bcopy(ncpu_time, ocpu_time, sizeof(ncpu_time)); 314 slen = sizeof(ncpu_time); 315 if (sysctlbyname("kern.cputime", &ncpu_time, &slen, NULL, 0) < 0) { 316 fprintf(stderr, "kern.cputime sysctl not available\n"); 317 exit(1); 318 } 319 ncpu = slen / sizeof(ncpu_time[0]); 320 delta = 0; 321 322 for (cpu = 0; cpu < ncpu; ++cpu) { 323 delta += (ncpu_time[cpu].cp_user + ncpu_time[cpu].cp_sys + 324 ncpu_time[cpu].cp_nice + ncpu_time[cpu].cp_intr) - 325 (ocpu_time[cpu].cp_user + ocpu_time[cpu].cp_sys + 326 ocpu_time[cpu].cp_nice + ocpu_time[cpu].cp_intr); 327 } 328 return((double)delta / 1000000.0); 329 } 330 331 /* 332 * nstate is the requested number of cpus that we wish to run at full 333 * frequency. We calculate how many domains we have to adjust to reach 334 * this goal. 335 * 336 * This function also sets the user scheduler global cpu mask. 337 */ 338 static 339 void 340 acpi_setcpufreq(int nstate) 341 { 342 int ncpus = 0; 343 int increasing = (nstate > CpuLimit); 344 int dom; 345 int domBeg; 346 int domEnd; 347 int lowest; 348 int highest; 349 int desired; 350 int v; 351 char *sysid; 352 char *ptr; 353 char buf[256]; 354 size_t buflen; 355 cpumask_t global_cpumask; 356 357 /* 358 * Calculate the ending domain if the number of operating cpus 359 * has increased. 360 * 361 * Calculate the starting domain if the number of operating cpus 362 * has decreased. 363 */ 364 for (dom = DomBeg; dom < DomEnd; ++dom) { 365 if (ncpus >= nstate) 366 break; 367 ncpus += CpuCount[dom]; 368 } 369 370 syslog(LOG_INFO, "using %d cpus", nstate); 371 372 /* 373 * Set the mask of cpus the userland scheduler is allowed to use. 374 */ 375 CPUMASK_ASSBMASK(global_cpumask, nstate); 376 sysctlbyname("kern.usched_global_cpumask", NULL, 0, 377 &global_cpumask, sizeof(global_cpumask)); 378 379 if (increasing) { 380 domBeg = DomLimit; 381 domEnd = dom; 382 } else { 383 domBeg = dom; 384 domEnd = DomLimit; 385 } 386 DomLimit = dom; 387 CpuLimit = nstate; 388 389 /* 390 * Adjust the cpu frequency 391 */ 392 if (DebugOpt) 393 printf("\n"); 394 for (dom = domBeg; dom < domEnd; ++dom) { 395 /* 396 * Retrieve availability list 397 */ 398 asprintf(&sysid, "hw.acpi.cpu.px_dom%d.available", dom); 399 buflen = sizeof(buf) - 1; 400 v = sysctlbyname(sysid, buf, &buflen, NULL, 0); 401 free(sysid); 402 if (v < 0) 403 continue; 404 buf[buflen] = 0; 405 406 /* 407 * Parse out the highest and lowest cpu frequencies 408 */ 409 ptr = buf; 410 highest = lowest = 0; 411 while (ptr && (v = strtol(ptr, &ptr, 10)) > 0) { 412 if (lowest == 0 || lowest > v) 413 lowest = v; 414 if (highest == 0 || highest < v) 415 highest = v; 416 /* 417 * Detect turbo mode 418 */ 419 if ((highest - v == 1) && ! TurboOpt) 420 highest = v; 421 422 } 423 424 /* 425 * Calculate the desired cpu frequency, test, and set. 426 */ 427 desired = increasing ? highest : lowest; 428 429 asprintf(&sysid, "hw.acpi.cpu.px_dom%d.select", dom); 430 buflen = sizeof(v); 431 v = 0; 432 sysctlbyname(sysid, &v, &buflen, NULL, 0); 433 { 434 if (DebugOpt) { 435 printf("dom%d set frequency %d\n", 436 dom, desired); 437 } 438 sysctlbyname(sysid, NULL, NULL, 439 &desired, sizeof(desired)); 440 } 441 free(sysid); 442 } 443 } 444 445 static 446 void 447 usage(void) 448 { 449 fprintf(stderr, "usage: powerd [-dt] [-p hysteresis] [-u trigger_up]\n"); 450 exit(1); 451 } 452