1 /*
2  * Copyright (c) 2015 Imre Vadász <imre@vdsz.com>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
15  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
16  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
17  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
18  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
19  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
20  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
22  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
23  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
24  * POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /*
28  * Device driver for Intel's On Die power usage estimation via MSR.
29  * Supported by Sandy Bridge and later CPUs, and also by Atom CPUs
30  * of the Silvermont and later architectures.
31  */
32 
33 #include <sys/param.h>
34 #include <sys/bus.h>
35 #include <sys/systm.h>
36 #include <sys/module.h>
37 #include <sys/conf.h>
38 #include <sys/cpu_topology.h>
39 #include <sys/kernel.h>
40 #include <sys/sensors.h>
41 #include <sys/bitops.h>
42 
43 #include <machine/specialreg.h>
44 #include <machine/cpufunc.h>
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
47 
48 #include "cpu_if.h"
49 
50 #define MSR_RAPL_POWER_UNIT_POWER	__BITS64(0, 3)
51 #define MSR_RAPL_POWER_UNIT_ENERGY	__BITS64(8, 12)
52 #define MSR_RAPL_POWER_UNIT_TIME	__BITS64(16, 19)
53 
54 struct corepower_sensor {
55 	uint64_t	energy;
56 	u_int		msr;
57 	struct ksensor	sensor;
58 };
59 
60 struct corepower_softc {
61 	device_t		sc_dev;
62 
63 	uint32_t		sc_watt_divisor;
64 	uint32_t		sc_joule_divisor;
65 	uint32_t		sc_second_divisor;
66 
67 	int			sc_have_sens;
68 
69 	struct corepower_sensor	sc_pkg_sens;
70 	struct corepower_sensor	sc_dram_sens;
71 	struct corepower_sensor	sc_pp0_sens;
72 	struct corepower_sensor	sc_pp1_sens;
73 
74 	struct ksensordev	sc_sensordev;
75 	struct sensor_task	*sc_senstask;
76 };
77 
78 /*
79  * Device methods.
80  */
81 static void	corepower_identify(driver_t *driver, device_t parent);
82 static int	corepower_probe(device_t dev);
83 static int	corepower_attach(device_t dev);
84 static int	corepower_detach(device_t dev);
85 static uint32_t	corepower_energy_to_uwatts(struct corepower_softc *sc,
86 					   uint32_t units, uint32_t secs);
87 static void	corepower_refresh(void *arg);
88 static void	corepower_sens_init(struct corepower_sensor *sens,
89 				    char *desc, u_int msr, int cpu);
90 static void	corepower_sens_update(struct corepower_softc *sc,
91 				      struct corepower_sensor *sens);
92 
93 static device_method_t corepower_methods[] = {
94 	/* Device interface */
95 	DEVMETHOD(device_identify,	corepower_identify),
96 	DEVMETHOD(device_probe,		corepower_probe),
97 	DEVMETHOD(device_attach,	corepower_attach),
98 	DEVMETHOD(device_detach,	corepower_detach),
99 
100 	DEVMETHOD_END
101 };
102 
103 static driver_t corepower_driver = {
104 	"corepower",
105 	corepower_methods,
106 	sizeof(struct corepower_softc),
107 };
108 
109 static devclass_t corepower_devclass;
110 DRIVER_MODULE(corepower, cpu, corepower_driver, corepower_devclass, NULL, NULL);
111 MODULE_VERSION(corepower, 1);
112 
113 static void
114 corepower_identify(driver_t *driver, device_t parent)
115 {
116 	device_t child;
117 	const struct cpu_node *node;
118 	int cpu, master_cpu;
119 
120 	/* Make sure we're not being doubly invoked. */
121 	if (device_find_child(parent, "corepower", -1) != NULL)
122 		return;
123 
124 	/* Check that the vendor is Intel. */
125 	if (cpu_vendor_id != CPU_VENDOR_INTEL)
126 		return;
127 
128 	/* We only want one child per CPU package */
129 	cpu = device_get_unit(parent);
130 	node = get_cpu_node_by_cpuid(cpu);
131 	while (node != NULL) {
132 		if (node->type == PACKAGE_LEVEL) {
133 			if (node->child_no == 0)
134 				node = NULL;
135 			break;
136 		}
137 		node = node->parent_node;
138 	}
139 	if (node == NULL)
140 		return;
141 
142 	master_cpu = BSRCPUMASK(node->members);
143 	if (cpu != master_cpu)
144 		return;
145 
146 	child = device_add_child(parent, "corepower", -1);
147 	if (child == NULL)
148 		device_printf(parent, "add corepower child failed\n");
149 }
150 
151 static int
152 corepower_probe(device_t dev)
153 {
154 	int cpu_family, cpu_model;
155 
156 	if (resource_disabled("corepower", 0))
157 		return (ENXIO);
158 
159 	cpu_model = CPUID_TO_MODEL(cpu_id);
160 	cpu_family = CPUID_TO_FAMILY(cpu_id);
161 
162 	if (cpu_family == 0x06) {
163 		switch (cpu_model) {
164 		/* Core CPUs */
165 		case 0x2a:
166 		case 0x3a:
167 		/* Xeon CPUs */
168 		case 0x2d:
169 		case 0x3e:
170 		case 0x3f:
171 		case 0x4f:
172 		case 0x56:
173 		/* Haswell, Broadwell, Skylake */
174 		case 0x3c:
175 		case 0x3d:
176 		case 0x45:
177 		case 0x46:
178 		case 0x47:
179 		case 0x4e:
180 		case 0x5e:
181 		/* Atom CPUs */
182 		case 0x37:
183 		case 0x4a:
184 		case 0x4c:
185 		case 0x5a:
186 		case 0x5d:
187 			break;
188 		default:
189 			return (ENXIO);
190 		}
191 	}
192 
193 	device_set_desc(dev, "CPU On-Die Power Usage Estimation");
194 
195 	return (BUS_PROBE_GENERIC);
196 }
197 
198 static int
199 corepower_attach(device_t dev)
200 {
201 	struct corepower_softc *sc = device_get_softc(dev);
202 	uint64_t val;
203 	uint32_t power_units;
204 	uint32_t energy_units;
205 	uint32_t time_units;
206 	int cpu_family, cpu_model;
207 	int cpu;
208 
209 	sc->sc_dev = dev;
210 	sc->sc_have_sens = 0;
211 
212 	cpu_family = CPUID_TO_FAMILY(cpu_id);
213 	cpu_model = CPUID_TO_MODEL(cpu_id);
214 
215 	/* XXX Check CPU version */
216 	if (cpu_family == 0x06) {
217 		switch (cpu_model) {
218 		/* Core CPUs */
219 		case 0x2a:
220 		case 0x3a:
221 			sc->sc_have_sens = 0xd;
222 			break;
223 		/* Xeon CPUs */
224 		case 0x2d: /* Only Xeon branded, Core i version should probably be 0x5 */
225 		case 0x3e:
226 		case 0x3f:
227 		case 0x4f:
228 		case 0x56:
229 			sc->sc_have_sens = 0x7;
230 			break;
231 		/* Haswell, Broadwell, Skylake */
232 		case 0x3c:
233 		case 0x3d:
234 		case 0x45:
235 		case 0x46:
236 		case 0x47:
237 		case 0x4e:
238 		case 0x5e:
239 			/* Check if Core or Xeon (Xeon CPUs might be 0x7) */
240 			sc->sc_have_sens = 0xf;
241 			break;
242 		/* Atom CPUs */
243 		case 0x37:
244 		case 0x4a:
245 		case 0x4c:
246 		case 0x5a:
247 		case 0x5d:
248 			sc->sc_have_sens = 0x5;
249 			break;
250 		default:
251 			return (ENXIO);
252 		}
253 	}
254 
255 	val = rdmsr(MSR_RAPL_POWER_UNIT);
256 
257 	power_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_POWER);
258 	energy_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_ENERGY);
259 	time_units = __SHIFTOUT(val, MSR_RAPL_POWER_UNIT_TIME);
260 
261 	sc->sc_watt_divisor = (1 << power_units);
262 	sc->sc_joule_divisor = (1 << energy_units);
263 	sc->sc_second_divisor = (1 << time_units);
264 
265 	/*
266 	 * Add hw.sensors.cpu_nodeN MIB.
267 	 */
268 	cpu = device_get_unit(device_get_parent(dev));
269 	ksnprintf(sc->sc_sensordev.xname, sizeof(sc->sc_sensordev.xname),
270 	    "cpu_node%d", get_chip_ID(cpu));
271 	if (sc->sc_have_sens & 1) {
272 		corepower_sens_init(&sc->sc_pkg_sens, "Package Power",
273 		    MSR_PKG_ENERGY_STATUS, cpu);
274 		sensor_attach(&sc->sc_sensordev, &sc->sc_pkg_sens.sensor);
275 	}
276 	if (sc->sc_have_sens & 2) {
277 		corepower_sens_init(&sc->sc_dram_sens, "DRAM Power",
278 		    MSR_DRAM_ENERGY_STATUS, cpu);
279 		sensor_attach(&sc->sc_sensordev, &sc->sc_dram_sens.sensor);
280 	}
281 	if (sc->sc_have_sens & 4) {
282 		corepower_sens_init(&sc->sc_pp0_sens, "Cores Power",
283 		    MSR_PP0_ENERGY_STATUS, cpu);
284 		sensor_attach(&sc->sc_sensordev, &sc->sc_pp0_sens.sensor);
285 	}
286 	if (sc->sc_have_sens & 8) {
287 		corepower_sens_init(&sc->sc_pp1_sens, "Graphics Power",
288 		    MSR_PP1_ENERGY_STATUS, cpu);
289 		sensor_attach(&sc->sc_sensordev, &sc->sc_pp1_sens.sensor);
290 	}
291 
292 	sc->sc_senstask = sensor_task_register2(sc, corepower_refresh, 1, cpu);
293 
294 	sensordev_install(&sc->sc_sensordev);
295 
296 	return (0);
297 }
298 
299 static int
300 corepower_detach(device_t dev)
301 {
302 	struct corepower_softc *sc = device_get_softc(dev);
303 
304 	sensordev_deinstall(&sc->sc_sensordev);
305 	sensor_task_unregister2(sc->sc_senstask);
306 
307 	return (0);
308 }
309 
310 static uint32_t
311 corepower_energy_to_uwatts(struct corepower_softc *sc, uint32_t units,
312     uint32_t secs)
313 {
314 	uint64_t val;
315 
316 	val = ((uint64_t)units) * 1000ULL * 1000ULL;
317 	val /= sc->sc_joule_divisor;
318 
319 	return val / secs;
320 }
321 
322 static void
323 corepower_refresh(void *arg)
324 {
325 	struct corepower_softc *sc = (struct corepower_softc *)arg;
326 
327 	if (sc->sc_have_sens & 1)
328 		corepower_sens_update(sc, &sc->sc_pkg_sens);
329 	if (sc->sc_have_sens & 2)
330 		corepower_sens_update(sc, &sc->sc_dram_sens);
331 	if (sc->sc_have_sens & 4)
332 		corepower_sens_update(sc, &sc->sc_pp0_sens);
333 	if (sc->sc_have_sens & 8)
334 		corepower_sens_update(sc, &sc->sc_pp1_sens);
335 }
336 
337 static void
338 corepower_sens_init(struct corepower_sensor *sens, char *desc, u_int msr,
339     int cpu)
340 {
341 	ksnprintf(sens->sensor.desc, sizeof(sens->sensor.desc), "node%d %s",
342 	    get_chip_ID(cpu), desc);
343 	sens->sensor.type = SENSOR_WATTS;
344 	sens->msr = msr;
345 	sens->energy = rdmsr(sens->msr) & 0xffffffffU;
346 }
347 
348 static void
349 corepower_sens_update(struct corepower_softc *sc,
350     struct corepower_sensor *sens)
351 {
352 	uint64_t a, res;
353 
354 	a = rdmsr(sens->msr) & 0xffffffffU;
355 	if (sens->energy > a) {
356 		res = (0x100000000ULL - sens->energy) + a;
357 	} else {
358 		res = a - sens->energy;
359 	}
360 	sens->energy = a;
361 	sens->sensor.value = corepower_energy_to_uwatts(sc, res, 1);
362 }
363