1 /*
2  * Copyright (c) 2015 Imre Vadász <imre@vdsz.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Device driver for Intel's On Die power usage estimation via MSR.
29  * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs
30  * of the Silvermont and later architectures.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/systm.h>
36 #include <sys/module.h>
37 #include <sys/conf.h>
38 #include <sys/cpu_topology.h>
39 #include <sys/kernel.h>
40 #include <sys/sensors.h>
41 #include <sys/bitops.h>
42 
43 #include <machine/specialreg.h>
44 #include <machine/cpufunc.h>
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
47 
48 #include "cpu_if.h"
49 
50 #define MSR_RAPL_POWER_UNIT_POWER	__BITS64(0, 3)
51 #define MSR_RAPL_POWER_UNIT_ENERGY	__BITS64(8, 12)
52 #define MSR_RAPL_POWER_UNIT_TIME	__BITS64(16, 19)
53 
54 struct corepower_sensor {
55 	uint64_t	energy;
56 	u_int		msr;
57 	struct ksensor	sensor;
58 };
59 
60 struct corepower_softc {
61 	device_t		sc_dev;
62 
63 	uint32_t		sc_watt_unit;
64 	uint32_t		sc_joule_unit;
65 	uint32_t		sc_second_unit;
66 
67 	int			sc_have_sens;
68 	int			sc_is_atom;
69 
70 	struct corepower_sensor	sc_pkg_sens;
71 	struct corepower_sensor	sc_dram_sens;
72 	struct corepower_sensor	sc_pp0_sens;
73 	struct corepower_sensor	sc_pp1_sens;
74 	struct corepower_sensor	sc_platform_sens;
75 
76 	struct ksensordev	sc_sensordev;
77 	struct sensor_task	*sc_senstask;
78 };
79 
80 /*
81  * Device methods.
82  */
83 static void	corepower_identify(driver_t *driver, device_t parent);
84 static int	corepower_probe(device_t dev);
85 static int	corepower_attach(device_t dev);
86 static int	corepower_detach(device_t dev);
87 static uint32_t	corepower_energy_to_uwatts(struct corepower_softc *sc,
88 					   uint32_t units, uint32_t secs);
89 static void	corepower_refresh(void *arg);
90 static void	corepower_sens_init(struct corepower_sensor *sens,
91 				    char *desc, u_int msr, int cpu);
92 static void	corepower_sens_update(struct corepower_softc *sc,
93 				      struct corepower_sensor *sens);
94 static int	corepower_try(u_int msr, char *name);
95 
96 static device_method_t corepower_methods[] = {
97 	/* Device interface */
98 	DEVMETHOD(device_identify,	corepower_identify),
99 	DEVMETHOD(device_probe,		corepower_probe),
100 	DEVMETHOD(device_attach,	corepower_attach),
101 	DEVMETHOD(device_detach,	corepower_detach),
102 
103 	DEVMETHOD_END
104 };
105 
106 static driver_t corepower_driver = {
107 	"corepower",
108 	corepower_methods,
109 	sizeof(struct corepower_softc),
110 };
111 
112 static devclass_t corepower_devclass;
113 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL);
114 MODULE_VERSION(corepower, 1);
115 
116 static void
117 corepower_identify(driver_t *driver, device_t parent)
118 {
119 	device_t child;
120 	const struct cpu_node *node;
121 	int cpu, master_cpu;
122 
123 	/* Make sure we're not being doubly invoked. */
124 	if (device_find_child(parent, "corepower", -1) != NULL)
125 		return;
126 
127 	/* Check that the vendor is Intel. */
128 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
129 		return;
130 
131 	/* We only want one child per CPU package */
132 	cpu = device_get_unit(parent);
133 	node = get_cpu_node_by_cpuid(cpu);
134 	while (node != NULL) {
135 		if (node->type == CHIP_LEVEL) {
136 			if (node->child_no == 0)
137 				node = NULL;
138 			break;
139 		}
140 		node = node->parent_node;
141 	}
142 	if (node == NULL)
143 		return;
144 
145 	master_cpu = BSRCPUMASK(node->members);
146 	if (cpu != master_cpu)
147 		return;
148 
149 	child = device_add_child(parent, "corepower", -1);
150 	if (child == NULL)
151 		device_printf(parent, "add corepower child failed\n");
152 }
153 
154 static int
155 corepower_probe(device_t dev)
156 {
157 	int cpu_family, cpu_model;
158 
159 	if (resource_disabled("corepower", 0))
160 		return (ENXIO);
161 
162 	cpu_model = CPUID_TO_MODEL(cpu_id);
163 	cpu_family = CPUID_TO_FAMILY(cpu_id);
164 
165 	if (cpu_family == 0x06) {
166 		switch (cpu_model) {
167 		/* Core CPUs */
168 		case 0x2a:
169 		case 0x3a:
170 		/* Xeon CPUs */
171 		case 0x2d:
172 		case 0x3e:
173 		case 0x3f:
174 		case 0x4f:
175 		case 0x56:
176 		/* Haswell, Broadwell, Skylake, Kabylake */
177 		case 0x3c:
178 		case 0x3d:
179 		case 0x45:
180 		case 0x46:
181 		case 0x47:
182 		case 0x4e:
183 		case 0x5e:
184 		case 0x8e:	/* Kabylake */
185 		/* Atom CPUs */
186 		case 0x37:
187 		case 0x4a:
188 		case 0x4c:
189 		case 0x4d:
190 		case 0x5a:
191 		case 0x5d:
192 			break;
193 		default:
194 			return (ENXIO);
195 		}
196 	}
197 
198 	if (corepower_try(MSR_RAPL_POWER_UNIT, "MSR_RAPL_POWER_UNIT") == 0)
199 		return (ENXIO);
200 
201 	device_set_desc(dev, "CPU On-Die Power Usage Estimation");
202 
203 	return (BUS_PROBE_GENERIC);
204 }
205 
206 static int
207 corepower_attach(device_t dev)
208 {
209 	struct corepower_softc *sc = device_get_softc(dev);
210 	uint64_t val;
211 	uint32_t power_units;
212 	uint32_t energy_units;
213 	uint32_t time_units;
214 	int cpu_family, cpu_model;
215 	int cpu;
216 
217 	sc->sc_dev = dev;
218 	sc->sc_have_sens = 0;
219 	sc->sc_is_atom = 0;
220 
221 	cpu_family = CPUID_TO_FAMILY(cpu_id);
222 	cpu_model = CPUID_TO_MODEL(cpu_id);
223 
224 	/* Check CPU model */
225 	if (cpu_family == 0x06) {
226 		switch (cpu_model) {
227 		/* Core CPUs */
228 		case 0x2a:
229 		case 0x3a:
230 			sc->sc_have_sens = 0xd;
231 			break;
232 		/* Xeon CPUs */
233 		case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */
234 		case 0x3e:
235 		case 0x3f:
236 		case 0x4f:
237 		case 0x56:
238 			sc->sc_have_sens = 0x7;
239 			break;
240 		/* Haswell, Broadwell */
241 		case 0x3c:
242 		case 0x3d:
243 		case 0x45:
244 		case 0x46:
245 		case 0x47:
246 			/* Check if Core or Xeon (Xeon CPUs might be 0x7) */
247 			sc->sc_have_sens = 0xf;
248 			break;
249 		/* Skylake, Kabylake, Coffeelake */
250 		case 0x4e:
251 		case 0x5e:
252 		case 0x8e:	/* Kabylake */
253 			sc->sc_have_sens = 0x1f;
254 			break;
255 		/* Atom CPUs */
256 		case 0x37:
257 		case 0x4a:
258 		case 0x4c:
259 		case 0x4d:
260 		case 0x5a:
261 		case 0x5d:
262 			sc->sc_have_sens = 0x5;
263 			/* use quirk for Valleyview Atom CPUs */
264 			sc->sc_is_atom = 1;
265 			break;
266 		default:
267 			return (ENXIO);
268 		}
269 	}
270 
271 	val = rdmsr(MSR_RAPL_POWER_UNIT);
272 
273 	power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER);
274 	energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY);
275 	time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME);
276 
277 	sc->sc_watt_unit = (1 << power_units);
278 	sc->sc_joule_unit = (1 << energy_units);
279 	sc->sc_second_unit = (1 << time_units);
280 
281 	/*
282 	 * Add hw.sensors.cpu_nodeN MIB.
283 	 */
284 	cpu = device_get_unit(device_get_parent(dev));
285 	ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname),
286 	    "cpu_node%d", get_chip_ID(cpu));
287 	if ((sc->sc_have_sens & 1) &&
288 	    corepower_try(MSR_PKG_ENERGY_STATUS, "MSR_PKG_ENERGY_STATUS")) {
289 		corepower_sens_init(&sc->sc_pkg_sens, "Package Power",
290 		    MSR_PKG_ENERGY_STATUS, cpu);
291 		sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor);
292 	} else {
293 		sc->sc_have_sens &= ~1;
294 	}
295 	if ((sc->sc_have_sens & 2) &&
296 	    corepower_try(MSR_DRAM_ENERGY_STATUS, "MSR_DRAM_ENERGY_STATUS")) {
297 		corepower_sens_init(&sc->sc_dram_sens, "DRAM Power",
298 		    MSR_DRAM_ENERGY_STATUS, cpu);
299 		sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor);
300 	} else {
301 		sc->sc_have_sens &= ~2;
302 	}
303 	if ((sc->sc_have_sens & 4) &&
304 	    corepower_try(MSR_PP0_ENERGY_STATUS, "MSR_PP0_ENERGY_STATUS")) {
305 		corepower_sens_init(&sc->sc_pp0_sens, "Cores Power",
306 		    MSR_PP0_ENERGY_STATUS, cpu);
307 		sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor);
308 	} else {
309 		sc->sc_have_sens &= ~4;
310 	}
311 	if ((sc->sc_have_sens & 8) &&
312 	    corepower_try(MSR_PP1_ENERGY_STATUS, "MSR_PP1_ENERGY_STATUS")) {
313 		corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power",
314 		    MSR_PP1_ENERGY_STATUS, cpu);
315 		sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor);
316 	} else {
317 		sc->sc_have_sens &= ~8;
318 	}
319 	if ((sc->sc_have_sens & 0x10) &&
320 	    corepower_try(MSR_PLATFORM_ENERGY_COUNTER, "MSR_PLATFORM_ENERGY_COUNTER") &&
321 	    (rdmsr(MSR_PLATFORM_ENERGY_COUNTER) & 0xffffffffU) != 0) {
322 		corepower_sens_init(&sc->sc_platform_sens, "Platform Power",
323 		    MSR_PLATFORM_ENERGY_COUNTER, cpu);
324 		sensor_attach(&sc->sc_sensordev, &sc->sc_platform_sens.sensor);
325 	} else {
326 		sc->sc_have_sens &= ~0x10;
327 	}
328 
329 	if (sc->sc_have_sens == 0)
330 		return (ENXIO);
331 
332 	sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu);
333 
334 	sensordev_install(&sc->sc_sensordev);
335 
336 	return (0);
337 }
338 
339 static int
340 corepower_detach(device_t dev)
341 {
342 	struct corepower_softc *sc = device_get_softc(dev);
343 
344 	sensordev_deinstall(&sc->sc_sensordev);
345 	sensor_task_unregister2(sc->sc_senstask);
346 
347 	return (0);
348 }
349 
350 static uint32_t
351 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units,
352     uint32_t secs)
353 {
354 	uint64_t val;
355 
356 	if (sc->sc_is_atom) {
357 		val = ((uint64_t)units) * sc->sc_joule_unit;
358 	} else {
359 		val = ((uint64_t)units) * 1000ULL * 1000ULL;
360 		val /= sc->sc_joule_unit;
361 	}
362 
363 	return val / secs;
364 }
365 
366 static void
367 corepower_refresh(void *arg)
368 {
369 	struct corepower_softc *sc = (struct corepower_softc *)arg;
370 
371 	if (sc->sc_have_sens & 1)
372 		corepower_sens_update(sc, &sc->sc_pkg_sens);
373 	if (sc->sc_have_sens & 2)
374 		corepower_sens_update(sc, &sc->sc_dram_sens);
375 	if (sc->sc_have_sens & 4)
376 		corepower_sens_update(sc, &sc->sc_pp0_sens);
377 	if (sc->sc_have_sens & 8)
378 		corepower_sens_update(sc, &sc->sc_pp1_sens);
379 	if (sc->sc_have_sens & 0x10)
380 		corepower_sens_update(sc, &sc->sc_platform_sens);
381 }
382 
383 static void
384 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr,
385     int cpu)
386 {
387 	ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s",
388 	    get_chip_ID(cpu), desc);
389 	sens->sensor.type = SENSOR_WATTS;
390 	sens->msr = msr;
391 	sens->energy = rdmsr(sens->msr) & 0xffffffffU;
392 }
393 
394 static void
395 corepower_sens_update(struct corepower_softc *sc,
396     struct corepower_sensor *sens)
397 {
398 	uint64_t a, res;
399 
400 	a = rdmsr(sens->msr) & 0xffffffffU;
401 	if (sens->energy > a) {
402 		res = (0x100000000ULL - sens->energy) + a;
403 	} else {
404 		res = a - sens->energy;
405 	}
406 	sens->energy = a;
407 	sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1);
408 }
409 
410 static int
411 corepower_try(u_int msr, char *name)
412 {
413 	uint64_t val;
414 
415 	if (rdmsr_safe(msr, &val) != 0) {
416 		kprintf("msr %s (0x%08x) not available\n", name, msr);
417 		return 0;
418 	}
419 	return 1;
420 }
421