1 /*
2 * Copyright (c) 2015 Imre Vadász <imre@vdsz.com>
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24 * POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /*
28 * Device driver for Intel's On Die power usage estimation via MSR.
29 * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs
30 * of the Silvermont and later architectures.
31 */
32
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/systm.h>
36 #include <sys/module.h>
37 #include <sys/conf.h>
38 #include <sys/cpu_topology.h>
39 #include <sys/kernel.h>
40 #include <sys/sensors.h>
41 #include <sys/bitops.h>
42
43 #include <machine/specialreg.h>
44 #include <machine/cpufunc.h>
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
47
48 #include "cpu_if.h"
49
50 #define MSR_RAPL_POWER_UNIT_POWER __BITS64(0, 3)
51 #define MSR_RAPL_POWER_UNIT_ENERGY __BITS64(8, 12)
52 #define MSR_RAPL_POWER_UNIT_TIME __BITS64(16, 19)
53
54 struct corepower_sensor {
55 uint64_t energy;
56 u_int msr;
57 struct ksensor sensor;
58 };
59
60 struct corepower_softc {
61 device_t sc_dev;
62
63 uint32_t sc_watt_unit;
64 uint32_t sc_joule_unit;
65 uint32_t sc_second_unit;
66
67 int sc_have_sens;
68 int sc_is_atom;
69
70 struct corepower_sensor sc_pkg_sens;
71 struct corepower_sensor sc_dram_sens;
72 struct corepower_sensor sc_pp0_sens;
73 struct corepower_sensor sc_pp1_sens;
74 struct corepower_sensor sc_platform_sens;
75
76 struct ksensordev sc_sensordev;
77 struct sensor_task *sc_senstask;
78 };
79
80 /*
81 * Device methods.
82 */
83 static void corepower_identify(driver_t *driver, device_t parent);
84 static int corepower_probe(device_t dev);
85 static int corepower_attach(device_t dev);
86 static int corepower_detach(device_t dev);
87 static uint32_t corepower_energy_to_uwatts(struct corepower_softc *sc,
88 uint32_t units, uint32_t secs);
89 static void corepower_refresh(void *arg);
90 static void corepower_sens_init(struct corepower_sensor *sens,
91 char *desc, u_int msr, int cpu);
92 static void corepower_sens_update(struct corepower_softc *sc,
93 struct corepower_sensor *sens);
94 static int corepower_try(u_int msr, char *name);
95
96 static device_method_t corepower_methods[] = {
97 /* Device interface */
98 DEVMETHOD(device_identify, corepower_identify),
99 DEVMETHOD(device_probe, corepower_probe),
100 DEVMETHOD(device_attach, corepower_attach),
101 DEVMETHOD(device_detach, corepower_detach),
102
103 DEVMETHOD_END
104 };
105
106 static driver_t corepower_driver = {
107 "corepower",
108 corepower_methods,
109 sizeof(struct corepower_softc),
110 };
111
112 static devclass_t corepower_devclass;
113 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL);
114 MODULE_VERSION(corepower, 1);
115
116 static void
corepower_identify(driver_t * driver,device_t parent)117 corepower_identify(driver_t *driver, device_t parent)
118 {
119 device_t child;
120 const struct cpu_node *node;
121 int cpu, master_cpu;
122
123 /* Make sure we're not being doubly invoked. */
124 if (device_find_child(parent, "corepower", -1) != NULL)
125 return;
126
127 /* Check that the vendor is Intel. */
128 if (cpu_vendor_id != CPU_VENDOR_INTEL)
129 return;
130
131 /* We only want one child per CPU package */
132 cpu = device_get_unit(parent);
133 node = get_cpu_node_by_cpuid(cpu);
134 while (node != NULL) {
135 if (node->type == CHIP_LEVEL) {
136 if (node->child_no == 0)
137 node = NULL;
138 break;
139 }
140 node = node->parent_node;
141 }
142 if (node == NULL)
143 return;
144
145 master_cpu = BSRCPUMASK(node->members);
146 if (cpu != master_cpu)
147 return;
148
149 child = device_add_child(parent, "corepower", -1);
150 if (child == NULL)
151 device_printf(parent, "add corepower child failed\n");
152 }
153
154 static int
corepower_probe(device_t dev)155 corepower_probe(device_t dev)
156 {
157 int cpu_family, cpu_model;
158
159 if (resource_disabled("corepower", 0))
160 return (ENXIO);
161
162 cpu_model = CPUID_TO_MODEL(cpu_id);
163 cpu_family = CPUID_TO_FAMILY(cpu_id);
164
165 if (cpu_family == 0x06) {
166 switch (cpu_model) {
167 /* Core CPUs */
168 case 0x2a:
169 case 0x3a:
170 /* Xeon CPUs */
171 case 0x2d:
172 case 0x3e:
173 case 0x3f:
174 case 0x4f:
175 case 0x56:
176 /* Haswell, Broadwell, Skylake, Kaby Lake, Coffee Lake */
177 case 0x3c:
178 case 0x3d:
179 case 0x45:
180 case 0x46:
181 case 0x47:
182 case 0x4e:
183 case 0x5e:
184 case 0x8e: /* Kaby Lake, Coffee Lake */
185 case 0x9e: /* dito */
186 /* Atom CPUs */
187 case 0x37:
188 case 0x4a:
189 case 0x4c:
190 case 0x4d:
191 case 0x5a:
192 case 0x5d:
193 break;
194 default:
195 return (ENXIO);
196 }
197 }
198
199 if (corepower_try(MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT") == 0)
200 return (ENXIO);
201
202 device_set_desc(dev, "CPU On-Die Power Usage Estimation");
203
204 return (BUS_PROBE_GENERIC);
205 }
206
207 static int
corepower_attach(device_t dev)208 corepower_attach(device_t dev)
209 {
210 struct corepower_softc *sc = device_get_softc(dev);
211 uint64_t val;
212 uint32_t power_units;
213 uint32_t energy_units;
214 uint32_t time_units;
215 int cpu_family, cpu_model;
216 int cpu;
217
218 sc->sc_dev = dev;
219 sc->sc_have_sens = 0;
220 sc->sc_is_atom = 0;
221
222 cpu_family = CPUID_TO_FAMILY(cpu_id);
223 cpu_model = CPUID_TO_MODEL(cpu_id);
224
225 /* Check CPU model */
226 if (cpu_family == 0x06) {
227 switch (cpu_model) {
228 /* Core CPUs */
229 case 0x2a:
230 case 0x3a:
231 sc->sc_have_sens = 0xd;
232 break;
233 /* Xeon CPUs */
234 case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */
235 case 0x3e:
236 case 0x3f:
237 case 0x4f:
238 case 0x56:
239 sc->sc_have_sens = 0x7;
240 break;
241 /* Haswell, Broadwell */
242 case 0x3c:
243 case 0x3d:
244 case 0x45:
245 case 0x46:
246 case 0x47:
247 /* Check if Core or Xeon (Xeon CPUs might be 0x7) */
248 sc->sc_have_sens = 0xf;
249 break;
250 /* Skylake, Kaby Lake, Coffee Lake */
251 case 0x4e:
252 case 0x5e:
253 case 0x8e: /* Kaby Lake, Coffee Lake */
254 case 0x9e: /* dito */
255 sc->sc_have_sens = 0x1f;
256 break;
257 /* Atom CPUs */
258 case 0x37:
259 case 0x4a:
260 case 0x4c:
261 case 0x4d:
262 case 0x5a:
263 case 0x5d:
264 sc->sc_have_sens = 0x5;
265 /* use quirk for Valleyview Atom CPUs */
266 sc->sc_is_atom = 1;
267 break;
268 default:
269 return (ENXIO);
270 }
271 }
272
273 val = rdmsr(MSR_RAPL_POWER_UNIT);
274
275 power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER);
276 energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY);
277 time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME);
278
279 sc->sc_watt_unit = (1 << power_units);
280 sc->sc_joule_unit = (1 << energy_units);
281 sc->sc_second_unit = (1 << time_units);
282
283 /*
284 * Add hw.sensors.cpu_nodeN MIB.
285 */
286 cpu = device_get_unit(device_get_parent(dev));
287 ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname),
288 "cpu_node%d", get_chip_ID(cpu));
289 if ((sc->sc_have_sens & 1) &&
290 corepower_try(MSR_PKG_ENERGY_STATUS, "MSR_PKG_ENERGY_STATUS")) {
291 corepower_sens_init(&sc->sc_pkg_sens, "Package Power",
292 MSR_PKG_ENERGY_STATUS, cpu);
293 sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor);
294 } else {
295 sc->sc_have_sens &= ~1;
296 }
297 if ((sc->sc_have_sens & 2) &&
298 corepower_try(MSR_DRAM_ENERGY_STATUS, "MSR_DRAM_ENERGY_STATUS")) {
299 corepower_sens_init(&sc->sc_dram_sens, "DRAM Power",
300 MSR_DRAM_ENERGY_STATUS, cpu);
301 sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor);
302 } else {
303 sc->sc_have_sens &= ~2;
304 }
305 if ((sc->sc_have_sens & 4) &&
306 corepower_try(MSR_PP0_ENERGY_STATUS, "MSR_PP0_ENERGY_STATUS")) {
307 corepower_sens_init(&sc->sc_pp0_sens, "Cores Power",
308 MSR_PP0_ENERGY_STATUS, cpu);
309 sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor);
310 } else {
311 sc->sc_have_sens &= ~4;
312 }
313 if ((sc->sc_have_sens & 8) &&
314 corepower_try(MSR_PP1_ENERGY_STATUS, "MSR_PP1_ENERGY_STATUS")) {
315 corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power",
316 MSR_PP1_ENERGY_STATUS, cpu);
317 sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor);
318 } else {
319 sc->sc_have_sens &= ~8;
320 }
321 if ((sc->sc_have_sens & 0x10) &&
322 corepower_try(MSR_PLATFORM_ENERGY_COUNTER, "MSR_PLATFORM_ENERGY_COUNTER") &&
323 (rdmsr(MSR_PLATFORM_ENERGY_COUNTER) & 0xffffffffU) != 0) {
324 corepower_sens_init(&sc->sc_platform_sens, "Platform Power",
325 MSR_PLATFORM_ENERGY_COUNTER, cpu);
326 sensor_attach(&sc->sc_sensordev, &sc->sc_platform_sens.sensor);
327 } else {
328 sc->sc_have_sens &= ~0x10;
329 }
330
331 if (sc->sc_have_sens == 0)
332 return (ENXIO);
333
334 sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu);
335
336 sensordev_install(&sc->sc_sensordev);
337
338 return (0);
339 }
340
341 static int
corepower_detach(device_t dev)342 corepower_detach(device_t dev)
343 {
344 struct corepower_softc *sc = device_get_softc(dev);
345
346 sensordev_deinstall(&sc->sc_sensordev);
347 sensor_task_unregister2(sc->sc_senstask);
348
349 return (0);
350 }
351
352 static uint32_t
corepower_energy_to_uwatts(struct corepower_softc * sc,uint32_t units,uint32_t secs)353 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units,
354 uint32_t secs)
355 {
356 uint64_t val;
357
358 if (sc->sc_is_atom) {
359 val = ((uint64_t)units) * sc->sc_joule_unit;
360 } else {
361 val = ((uint64_t)units) * 1000ULL * 1000ULL;
362 val /= sc->sc_joule_unit;
363 }
364
365 return val / secs;
366 }
367
368 static void
corepower_refresh(void * arg)369 corepower_refresh(void *arg)
370 {
371 struct corepower_softc *sc = (struct corepower_softc *)arg;
372
373 if (sc->sc_have_sens & 1)
374 corepower_sens_update(sc, &sc->sc_pkg_sens);
375 if (sc->sc_have_sens & 2)
376 corepower_sens_update(sc, &sc->sc_dram_sens);
377 if (sc->sc_have_sens & 4)
378 corepower_sens_update(sc, &sc->sc_pp0_sens);
379 if (sc->sc_have_sens & 8)
380 corepower_sens_update(sc, &sc->sc_pp1_sens);
381 if (sc->sc_have_sens & 0x10)
382 corepower_sens_update(sc, &sc->sc_platform_sens);
383 }
384
385 static void
corepower_sens_init(struct corepower_sensor * sens,char * desc,u_int msr,int cpu)386 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr,
387 int cpu)
388 {
389 ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s",
390 get_chip_ID(cpu), desc);
391 sens->sensor.type = SENSOR_WATTS;
392 sens->msr = msr;
393 sens->energy = rdmsr(sens->msr) & 0xffffffffU;
394 }
395
396 static void
corepower_sens_update(struct corepower_softc * sc,struct corepower_sensor * sens)397 corepower_sens_update(struct corepower_softc *sc,
398 struct corepower_sensor *sens)
399 {
400 uint64_t a, res;
401
402 a = rdmsr(sens->msr) & 0xffffffffU;
403 if (sens->energy > a) {
404 res = (0x100000000ULL - sens->energy) + a;
405 } else {
406 res = a - sens->energy;
407 }
408 sens->energy = a;
409 sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1);
410 }
411
412 static int
corepower_try(u_int msr,char * name)413 corepower_try(u_int msr, char *name)
414 {
415 uint64_t val;
416
417 if (rdmsr_safe(msr, &val) != 0) {
418 kprintf("msr %s (0x%08x) not available\n", name, msr);
419 return 0;
420 }
421 return 1;
422 }
423