1 /* 2 * Copyright (c) 2015 Imre Vadász <imre@vdsz.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 23 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Device driver for Intel's On Die power usage estimation via MSR. 29 * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs 30 * of the Silvermont and later architectures. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/systm.h> 36 #include <sys/module.h> 37 #include <sys/conf.h> 38 #include <sys/cpu_topology.h> 39 #include <sys/kernel.h> 40 #include <sys/sensors.h> 41 #include <sys/bitops.h> 42 43 #include <machine/specialreg.h> 44 #include <machine/cpufunc.h> 45 #include <machine/cputypes.h> 46 #include <machine/md_var.h> 47 48 #include "cpu_if.h" 49 50 #define MSR_RAPL_POWER_UNIT_POWER __BITS64(0, 3) 51 #define MSR_RAPL_POWER_UNIT_ENERGY __BITS64(8, 12) 52 #define MSR_RAPL_POWER_UNIT_TIME __BITS64(16, 19) 53 54 struct corepower_sensor { 55 uint64_t energy; 56 u_int msr; 57 struct ksensor sensor; 58 }; 59 60 struct corepower_softc { 61 device_t sc_dev; 62 63 uint32_t sc_watt_unit; 64 uint32_t sc_joule_unit; 65 uint32_t sc_second_unit; 66 67 int sc_have_sens; 68 int sc_is_atom; 69 70 struct corepower_sensor sc_pkg_sens; 71 struct corepower_sensor sc_dram_sens; 72 struct corepower_sensor sc_pp0_sens; 73 struct corepower_sensor sc_pp1_sens; 74 struct corepower_sensor sc_platform_sens; 75 76 struct ksensordev sc_sensordev; 77 struct sensor_task *sc_senstask; 78 }; 79 80 /* 81 * Device methods. 82 */ 83 static void corepower_identify(driver_t *driver, device_t parent); 84 static int corepower_probe(device_t dev); 85 static int corepower_attach(device_t dev); 86 static int corepower_detach(device_t dev); 87 static uint32_t corepower_energy_to_uwatts(struct corepower_softc *sc, 88 uint32_t units, uint32_t secs); 89 static void corepower_refresh(void *arg); 90 static void corepower_sens_init(struct corepower_sensor *sens, 91 char *desc, u_int msr, int cpu); 92 static void corepower_sens_update(struct corepower_softc *sc, 93 struct corepower_sensor *sens); 94 static int corepower_try(u_int msr, char *name); 95 96 static device_method_t corepower_methods[] = { 97 /* Device interface */ 98 DEVMETHOD(device_identify, corepower_identify), 99 DEVMETHOD(device_probe, corepower_probe), 100 DEVMETHOD(device_attach, corepower_attach), 101 DEVMETHOD(device_detach, corepower_detach), 102 103 DEVMETHOD_END 104 }; 105 106 static driver_t corepower_driver = { 107 "corepower", 108 corepower_methods, 109 sizeof(struct corepower_softc), 110 }; 111 112 static devclass_t corepower_devclass; 113 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL); 114 MODULE_VERSION(corepower, 1); 115 116 static void 117 corepower_identify(driver_t *driver, device_t parent) 118 { 119 device_t child; 120 const struct cpu_node *node; 121 int cpu, master_cpu; 122 123 /* Make sure we're not being doubly invoked. */ 124 if (device_find_child(parent, "corepower", -1) != NULL) 125 return; 126 127 /* Check that the vendor is Intel. */ 128 if (cpu_vendor_id != CPU_VENDOR_INTEL) 129 return; 130 131 /* We only want one child per CPU package */ 132 cpu = device_get_unit(parent); 133 node = get_cpu_node_by_cpuid(cpu); 134 while (node != NULL) { 135 if (node->type == CHIP_LEVEL) { 136 if (node->child_no == 0) 137 node = NULL; 138 break; 139 } 140 node = node->parent_node; 141 } 142 if (node == NULL) 143 return; 144 145 master_cpu = BSRCPUMASK(node->members); 146 if (cpu != master_cpu) 147 return; 148 149 child = device_add_child(parent, "corepower", -1); 150 if (child == NULL) 151 device_printf(parent, "add corepower child failed\n"); 152 } 153 154 static int 155 corepower_probe(device_t dev) 156 { 157 int cpu_family, cpu_model; 158 159 if (resource_disabled("corepower", 0)) 160 return (ENXIO); 161 162 cpu_model = CPUID_TO_MODEL(cpu_id); 163 cpu_family = CPUID_TO_FAMILY(cpu_id); 164 165 if (cpu_family == 0x06) { 166 switch (cpu_model) { 167 /* Core CPUs */ 168 case 0x2a: 169 case 0x3a: 170 /* Xeon CPUs */ 171 case 0x2d: 172 case 0x3e: 173 case 0x3f: 174 case 0x4f: 175 case 0x56: 176 /* Haswell, Broadwell, Skylake, Kaby Lake, Coffee Lake */ 177 case 0x3c: 178 case 0x3d: 179 case 0x45: 180 case 0x46: 181 case 0x47: 182 case 0x4e: 183 case 0x5e: 184 case 0x8e: /* Kaby Lake, Coffee Lake */ 185 case 0x9e: /* dito */ 186 /* Atom CPUs */ 187 case 0x37: 188 case 0x4a: 189 case 0x4c: 190 case 0x4d: 191 case 0x5a: 192 case 0x5d: 193 break; 194 default: 195 return (ENXIO); 196 } 197 } 198 199 if (corepower_try(MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT") == 0) 200 return (ENXIO); 201 202 device_set_desc(dev, "CPU On-Die Power Usage Estimation"); 203 204 return (BUS_PROBE_GENERIC); 205 } 206 207 static int 208 corepower_attach(device_t dev) 209 { 210 struct corepower_softc *sc = device_get_softc(dev); 211 uint64_t val; 212 uint32_t power_units; 213 uint32_t energy_units; 214 uint32_t time_units; 215 int cpu_family, cpu_model; 216 int cpu; 217 218 sc->sc_dev = dev; 219 sc->sc_have_sens = 0; 220 sc->sc_is_atom = 0; 221 222 cpu_family = CPUID_TO_FAMILY(cpu_id); 223 cpu_model = CPUID_TO_MODEL(cpu_id); 224 225 /* Check CPU model */ 226 if (cpu_family == 0x06) { 227 switch (cpu_model) { 228 /* Core CPUs */ 229 case 0x2a: 230 case 0x3a: 231 sc->sc_have_sens = 0xd; 232 break; 233 /* Xeon CPUs */ 234 case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */ 235 case 0x3e: 236 case 0x3f: 237 case 0x4f: 238 case 0x56: 239 sc->sc_have_sens = 0x7; 240 break; 241 /* Haswell, Broadwell */ 242 case 0x3c: 243 case 0x3d: 244 case 0x45: 245 case 0x46: 246 case 0x47: 247 /* Check if Core or Xeon (Xeon CPUs might be 0x7) */ 248 sc->sc_have_sens = 0xf; 249 break; 250 /* Skylake, Kaby Lake, Coffee Lake */ 251 case 0x4e: 252 case 0x5e: 253 case 0x8e: /* Kaby Lake, Coffee Lake */ 254 case 0x9e: /* dito */ 255 sc->sc_have_sens = 0x1f; 256 break; 257 /* Atom CPUs */ 258 case 0x37: 259 case 0x4a: 260 case 0x4c: 261 case 0x4d: 262 case 0x5a: 263 case 0x5d: 264 sc->sc_have_sens = 0x5; 265 /* use quirk for Valleyview Atom CPUs */ 266 sc->sc_is_atom = 1; 267 break; 268 default: 269 return (ENXIO); 270 } 271 } 272 273 val = rdmsr(MSR_RAPL_POWER_UNIT); 274 275 power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER); 276 energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY); 277 time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME); 278 279 sc->sc_watt_unit = (1 << power_units); 280 sc->sc_joule_unit = (1 << energy_units); 281 sc->sc_second_unit = (1 << time_units); 282 283 /* 284 * Add hw.sensors.cpu_nodeN MIB. 285 */ 286 cpu = device_get_unit(device_get_parent(dev)); 287 ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname), 288 "cpu_node%d", get_chip_ID(cpu)); 289 if ((sc->sc_have_sens & 1) && 290 corepower_try(MSR_PKG_ENERGY_STATUS, "MSR_PKG_ENERGY_STATUS")) { 291 corepower_sens_init(&sc->sc_pkg_sens, "Package Power", 292 MSR_PKG_ENERGY_STATUS, cpu); 293 sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor); 294 } else { 295 sc->sc_have_sens &= ~1; 296 } 297 if ((sc->sc_have_sens & 2) && 298 corepower_try(MSR_DRAM_ENERGY_STATUS, "MSR_DRAM_ENERGY_STATUS")) { 299 corepower_sens_init(&sc->sc_dram_sens, "DRAM Power", 300 MSR_DRAM_ENERGY_STATUS, cpu); 301 sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor); 302 } else { 303 sc->sc_have_sens &= ~2; 304 } 305 if ((sc->sc_have_sens & 4) && 306 corepower_try(MSR_PP0_ENERGY_STATUS, "MSR_PP0_ENERGY_STATUS")) { 307 corepower_sens_init(&sc->sc_pp0_sens, "Cores Power", 308 MSR_PP0_ENERGY_STATUS, cpu); 309 sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor); 310 } else { 311 sc->sc_have_sens &= ~4; 312 } 313 if ((sc->sc_have_sens & 8) && 314 corepower_try(MSR_PP1_ENERGY_STATUS, "MSR_PP1_ENERGY_STATUS")) { 315 corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power", 316 MSR_PP1_ENERGY_STATUS, cpu); 317 sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor); 318 } else { 319 sc->sc_have_sens &= ~8; 320 } 321 if ((sc->sc_have_sens & 0x10) && 322 corepower_try(MSR_PLATFORM_ENERGY_COUNTER, "MSR_PLATFORM_ENERGY_COUNTER") && 323 (rdmsr(MSR_PLATFORM_ENERGY_COUNTER) & 0xffffffffU) != 0) { 324 corepower_sens_init(&sc->sc_platform_sens, "Platform Power", 325 MSR_PLATFORM_ENERGY_COUNTER, cpu); 326 sensor_attach(&sc->sc_sensordev, &sc->sc_platform_sens.sensor); 327 } else { 328 sc->sc_have_sens &= ~0x10; 329 } 330 331 if (sc->sc_have_sens == 0) 332 return (ENXIO); 333 334 sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu); 335 336 sensordev_install(&sc->sc_sensordev); 337 338 return (0); 339 } 340 341 static int 342 corepower_detach(device_t dev) 343 { 344 struct corepower_softc *sc = device_get_softc(dev); 345 346 sensordev_deinstall(&sc->sc_sensordev); 347 sensor_task_unregister2(sc->sc_senstask); 348 349 return (0); 350 } 351 352 static uint32_t 353 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units, 354 uint32_t secs) 355 { 356 uint64_t val; 357 358 if (sc->sc_is_atom) { 359 val = ((uint64_t)units) * sc->sc_joule_unit; 360 } else { 361 val = ((uint64_t)units) * 1000ULL * 1000ULL; 362 val /= sc->sc_joule_unit; 363 } 364 365 return val / secs; 366 } 367 368 static void 369 corepower_refresh(void *arg) 370 { 371 struct corepower_softc *sc = (struct corepower_softc *)arg; 372 373 if (sc->sc_have_sens & 1) 374 corepower_sens_update(sc, &sc->sc_pkg_sens); 375 if (sc->sc_have_sens & 2) 376 corepower_sens_update(sc, &sc->sc_dram_sens); 377 if (sc->sc_have_sens & 4) 378 corepower_sens_update(sc, &sc->sc_pp0_sens); 379 if (sc->sc_have_sens & 8) 380 corepower_sens_update(sc, &sc->sc_pp1_sens); 381 if (sc->sc_have_sens & 0x10) 382 corepower_sens_update(sc, &sc->sc_platform_sens); 383 } 384 385 static void 386 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr, 387 int cpu) 388 { 389 ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s", 390 get_chip_ID(cpu), desc); 391 sens->sensor.type = SENSOR_WATTS; 392 sens->msr = msr; 393 sens->energy = rdmsr(sens->msr) & 0xffffffffU; 394 } 395 396 static void 397 corepower_sens_update(struct corepower_softc *sc, 398 struct corepower_sensor *sens) 399 { 400 uint64_t a, res; 401 402 a = rdmsr(sens->msr) & 0xffffffffU; 403 if (sens->energy > a) { 404 res = (0x100000000ULL - sens->energy) + a; 405 } else { 406 res = a - sens->energy; 407 } 408 sens->energy = a; 409 sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1); 410 } 411 412 static int 413 corepower_try(u_int msr, char *name) 414 { 415 uint64_t val; 416 417 if (rdmsr_safe(msr, &val) != 0) { 418 kprintf("msr %s (0x%08x) not available\n", name, msr); 419 return 0; 420 } 421 return 1; 422 } 423