1 /* 2 * Copyright (c) 2015 Imre Vadász <imre@vdsz.com> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 17 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, 18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN 23 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 24 * POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* 28 * Device driver for Intel's On Die power usage estimation via MSR. 29 * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs 30 * of the Silvermont and later architectures. 31 */ 32 33 #include <sys/param.h> 34 #include <sys/bus.h> 35 #include <sys/systm.h> 36 #include <sys/module.h> 37 #include <sys/conf.h> 38 #include <sys/cpu_topology.h> 39 #include <sys/kernel.h> 40 #include <sys/sensors.h> 41 #include <sys/bitops.h> 42 43 #include <machine/specialreg.h> 44 #include <machine/cpufunc.h> 45 #include <machine/cputypes.h> 46 #include <machine/md_var.h> 47 48 #include "cpu_if.h" 49 50 #define MSR_RAPL_POWER_UNIT_POWER __BITS64(0, 3) 51 #define MSR_RAPL_POWER_UNIT_ENERGY __BITS64(8, 12) 52 #define MSR_RAPL_POWER_UNIT_TIME __BITS64(16, 19) 53 54 struct corepower_sensor { 55 uint64_t energy; 56 u_int msr; 57 struct ksensor sensor; 58 }; 59 60 struct corepower_softc { 61 device_t sc_dev; 62 63 uint32_t sc_watt_divisor; 64 uint32_t sc_joule_divisor; 65 uint32_t sc_second_divisor; 66 67 int sc_have_sens; 68 69 struct corepower_sensor sc_pkg_sens; 70 struct corepower_sensor sc_dram_sens; 71 struct corepower_sensor sc_pp0_sens; 72 struct corepower_sensor sc_pp1_sens; 73 74 struct ksensordev sc_sensordev; 75 struct sensor_task *sc_senstask; 76 }; 77 78 /* 79 * Device methods. 80 */ 81 static void corepower_identify(driver_t *driver, device_t parent); 82 static int corepower_probe(device_t dev); 83 static int corepower_attach(device_t dev); 84 static int corepower_detach(device_t dev); 85 static uint32_t corepower_energy_to_uwatts(struct corepower_softc *sc, 86 uint32_t units, uint32_t secs); 87 static void corepower_refresh(void *arg); 88 static void corepower_sens_init(struct corepower_sensor *sens, 89 char *desc, u_int msr, int cpu); 90 static void corepower_sens_update(struct corepower_softc *sc, 91 struct corepower_sensor *sens); 92 93 static device_method_t corepower_methods[] = { 94 /* Device interface */ 95 DEVMETHOD(device_identify, corepower_identify), 96 DEVMETHOD(device_probe, corepower_probe), 97 DEVMETHOD(device_attach, corepower_attach), 98 DEVMETHOD(device_detach, corepower_detach), 99 100 DEVMETHOD_END 101 }; 102 103 static driver_t corepower_driver = { 104 "corepower", 105 corepower_methods, 106 sizeof(struct corepower_softc), 107 }; 108 109 static devclass_t corepower_devclass; 110 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL); 111 MODULE_VERSION(corepower, 1); 112 113 static void 114 corepower_identify(driver_t *driver, device_t parent) 115 { 116 device_t child; 117 const struct cpu_node *node; 118 int cpu, master_cpu; 119 120 /* Make sure we're not being doubly invoked. */ 121 if (device_find_child(parent, "corepower", -1) != NULL) 122 return; 123 124 /* Check that the vendor is Intel. */ 125 if (cpu_vendor_id != CPU_VENDOR_INTEL) 126 return; 127 128 /* We only want one child per CPU package */ 129 cpu = device_get_unit(parent); 130 node = get_cpu_node_by_cpuid(cpu); 131 while (node != NULL) { 132 if (node->type == PACKAGE_LEVEL) { 133 if (node->child_no == 0) 134 node = NULL; 135 break; 136 } 137 node = node->parent_node; 138 } 139 if (node == NULL) 140 return; 141 142 master_cpu = BSRCPUMASK(node->members); 143 if (cpu != master_cpu) 144 return; 145 146 child = device_add_child(parent, "corepower", -1); 147 if (child == NULL) 148 device_printf(parent, "add corepower child failed\n"); 149 } 150 151 static int 152 corepower_probe(device_t dev) 153 { 154 int cpu_family, cpu_model; 155 156 if (resource_disabled("corepower", 0)) 157 return (ENXIO); 158 159 cpu_model = CPUID_TO_MODEL(cpu_id); 160 cpu_family = CPUID_TO_FAMILY(cpu_id); 161 162 if (cpu_family == 0x06) { 163 switch (cpu_model) { 164 /* Core CPUs */ 165 case 0x2a: 166 case 0x3a: 167 /* Xeon CPUs */ 168 case 0x2d: 169 case 0x3e: 170 case 0x3f: 171 case 0x4f: 172 case 0x56: 173 /* Haswell, Broadwell, Skylake */ 174 case 0x3c: 175 case 0x3d: 176 case 0x45: 177 case 0x46: 178 case 0x47: 179 case 0x4e: 180 case 0x5e: 181 /* Atom CPUs */ 182 case 0x37: 183 case 0x4a: 184 case 0x4c: 185 case 0x5a: 186 case 0x5d: 187 break; 188 default: 189 return (ENXIO); 190 } 191 } 192 193 device_set_desc(dev, "CPU On-Die Power Usage Estimation"); 194 195 return (BUS_PROBE_GENERIC); 196 } 197 198 static int 199 corepower_attach(device_t dev) 200 { 201 struct corepower_softc *sc = device_get_softc(dev); 202 uint64_t val; 203 uint32_t power_units; 204 uint32_t energy_units; 205 uint32_t time_units; 206 int cpu_family, cpu_model; 207 int cpu; 208 209 sc->sc_dev = dev; 210 sc->sc_have_sens = 0; 211 212 cpu_family = CPUID_TO_FAMILY(cpu_id); 213 cpu_model = CPUID_TO_MODEL(cpu_id); 214 215 /* XXX Check CPU version */ 216 if (cpu_family == 0x06) { 217 switch (cpu_model) { 218 /* Core CPUs */ 219 case 0x2a: 220 case 0x3a: 221 sc->sc_have_sens = 0xd; 222 break; 223 /* Xeon CPUs */ 224 case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */ 225 case 0x3e: 226 case 0x3f: 227 case 0x4f: 228 case 0x56: 229 sc->sc_have_sens = 0x7; 230 break; 231 /* Haswell, Broadwell, Skylake */ 232 case 0x3c: 233 case 0x3d: 234 case 0x45: 235 case 0x46: 236 case 0x47: 237 case 0x4e: 238 case 0x5e: 239 /* Check if Core or Xeon (Xeon CPUs might be 0x7) */ 240 sc->sc_have_sens = 0xf; 241 break; 242 /* Atom CPUs */ 243 case 0x37: 244 case 0x4a: 245 case 0x4c: 246 case 0x5a: 247 case 0x5d: 248 sc->sc_have_sens = 0x5; 249 break; 250 default: 251 return (ENXIO); 252 } 253 } 254 255 val = rdmsr(MSR_RAPL_POWER_UNIT); 256 257 power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER); 258 energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY); 259 time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME); 260 261 sc->sc_watt_divisor = (1 << power_units); 262 sc->sc_joule_divisor = (1 << energy_units); 263 sc->sc_second_divisor = (1 << time_units); 264 265 /* 266 * Add hw.sensors.cpu_nodeN MIB. 267 */ 268 cpu = device_get_unit(device_get_parent(dev)); 269 ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname), 270 "cpu_node%d", get_chip_ID(cpu)); 271 if (sc->sc_have_sens & 1) { 272 corepower_sens_init(&sc->sc_pkg_sens, "Package Power", 273 MSR_PKG_ENERGY_STATUS, cpu); 274 sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor); 275 } 276 if (sc->sc_have_sens & 2) { 277 corepower_sens_init(&sc->sc_dram_sens, "DRAM Power", 278 MSR_DRAM_ENERGY_STATUS, cpu); 279 sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor); 280 } 281 if (sc->sc_have_sens & 4) { 282 corepower_sens_init(&sc->sc_pp0_sens, "Cores Power", 283 MSR_PP0_ENERGY_STATUS, cpu); 284 sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor); 285 } 286 if (sc->sc_have_sens & 8) { 287 corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power", 288 MSR_PP1_ENERGY_STATUS, cpu); 289 sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor); 290 } 291 292 sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu); 293 294 sensordev_install(&sc->sc_sensordev); 295 296 return (0); 297 } 298 299 static int 300 corepower_detach(device_t dev) 301 { 302 struct corepower_softc *sc = device_get_softc(dev); 303 304 sensordev_deinstall(&sc->sc_sensordev); 305 sensor_task_unregister2(sc->sc_senstask); 306 307 return (0); 308 } 309 310 static uint32_t 311 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units, 312 uint32_t secs) 313 { 314 uint64_t val; 315 316 val = ((uint64_t)units) * 1000ULL * 1000ULL; 317 val /= sc->sc_joule_divisor; 318 319 return val / secs; 320 } 321 322 static void 323 corepower_refresh(void *arg) 324 { 325 struct corepower_softc *sc = (struct corepower_softc *)arg; 326 327 if (sc->sc_have_sens & 1) 328 corepower_sens_update(sc, &sc->sc_pkg_sens); 329 if (sc->sc_have_sens & 2) 330 corepower_sens_update(sc, &sc->sc_dram_sens); 331 if (sc->sc_have_sens & 4) 332 corepower_sens_update(sc, &sc->sc_pp0_sens); 333 if (sc->sc_have_sens & 8) 334 corepower_sens_update(sc, &sc->sc_pp1_sens); 335 } 336 337 static void 338 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr, 339 int cpu) 340 { 341 ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s", 342 get_chip_ID(cpu), desc); 343 sens->sensor.type = SENSOR_WATTS; 344 sens->msr = msr; 345 sens->energy = rdmsr(sens->msr) & 0xffffffffU; 346 } 347 348 static void 349 corepower_sens_update(struct corepower_softc *sc, 350 struct corepower_sensor *sens) 351 { 352 uint64_t a, res; 353 354 a = rdmsr(sens->msr) & 0xffffffffU; 355 if (sens->energy > a) { 356 res = (0x100000000ULL - sens->energy) + a; 357 } else { 358 res = a - sens->energy; 359 } 360 sens->energy = a; 361 sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1); 362 } 363