1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2008, 2009 Rui Paulo <rpaulo@FreeBSD.org>
5 * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org>
6 * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org>
7 * All rights reserved.
8 * Copyright (c) 2017-2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22 * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23 * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 * $FreeBSD: head/sys/dev/amdtemp/amdtemp.c 366136 2020-09-25 04:16:28Z cem $
32 */
33
34 /*
35 * Driver for the AMD CPU on-die thermal sensors.
36 * Initially based on the k8temp Linux driver.
37 */
38
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/module.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/sensors.h>
48
49 #include <machine/cpufunc.h>
50 #include <machine/md_var.h>
51 #include <machine/specialreg.h>
52
53 #include <bus/pci/pcivar.h>
54 #include <bus/pci/pci_cfgreg.h>
55
56 #include <dev/powermng/amdsmn/amdsmn.h>
57
58 typedef enum {
59 CORE0_SENSOR0,
60 CORE0_SENSOR1,
61 CORE1_SENSOR0,
62 CORE1_SENSOR1,
63 CORE0,
64 CORE1,
65 CCD1,
66 CCD_BASE = CCD1,
67 CCD2,
68 CCD3,
69 CCD4,
70 CCD5,
71 CCD6,
72 CCD7,
73 CCD8,
74 CCD9,
75 CCD10,
76 CCD11,
77 CCD12,
78 MAXSENSORS,
79
80 CCD_MAX = CCD12,
81 NUM_CCDS = CCD_MAX - CCD_BASE + 1,
82 } amdsensor_t;
83
84 struct amdtemp_softc {
85 int sc_ncores;
86 int sc_ntemps;
87 int sc_flags;
88 int sc_ccd_display;
89 #define AMDTEMP_FLAG_CS_SWAP 0x01 /* ThermSenseCoreSel is inverted. */
90 #define AMDTEMP_FLAG_CT_10BIT 0x02 /* CurTmp is 10-bit wide. */
91 #define AMDTEMP_FLAG_ALT_OFFSET 0x04 /* CurTmp starts at -28C. */
92 int32_t sc_offset;
93 int32_t sc_ccd_offset;
94 int32_t (*sc_gettemp)(device_t, amdsensor_t);
95 struct sysctl_oid *sc_sysctl_cpu[MAXCPU];
96 struct intr_config_hook sc_ich;
97 device_t sc_smn;
98 uint32_t sc_probed_regmask;
99
100 /*
101 * NOTE: We put common sensors like the CCDs on cpu0. Remaining
102 * cores are only applicable if ntemps == 2 (with no CCDs).
103 * When ntemps == 1 the temp sensors are CCD-based and shared.
104 */
105 struct sensorcpu {
106 device_t dev;
107 struct amdtemp_softc *sc;
108 struct ksensordev sensordev;
109 struct ksensor *sensors;
110 struct sensor_task *senstask;
111 uint32_t regmask;
112 } *sc_sensorcpus;
113 };
114
115 /*
116 * N.B. The numbers in macro names below are significant and represent CPU
117 * family and model numbers. Do not make up fictitious family or model numbers
118 * when adding support for new devices.
119 */
120 #define VENDORID_AMD 0x1022
121
122 #define DEVICEID_AMD_MISC0F 0x1103
123 #define DEVICEID_AMD_MISC10 0x1203
124 #define DEVICEID_AMD_MISC11 0x1303
125 #define DEVICEID_AMD_MISC14 0x1703
126 #define DEVICEID_AMD_MISC15 0x1603
127 #define DEVICEID_AMD_MISC15_M10H 0x1403
128 #define DEVICEID_AMD_MISC15_M30H 0x141d
129 #define DEVICEID_AMD_MISC15_M60H_ROOT 0x1576
130 #define DEVICEID_AMD_MISC16 0x1533
131 #define DEVICEID_AMD_MISC16_M30H 0x1583
132 #define DEVICEID_AMD_HOSTB17H_ROOT 0x1450
133 #define DEVICEID_AMD_HOSTB17H_M10H_ROOT 0x15d0
134 #define DEVICEID_AMD_HOSTB17H_M30H_ROOT 0x1480 /* Also M70h. */
135 #define DEVICEID_AMD_HOSTB17H_M60H_ROOT 0x1630
136 #define DEVICEID_AMD_HOSTB17H_M70H_ROOT 0x1443
137 #define DEVICEID_AMD_HOSTB17H_MA0H_ROOT 0x1727
138
139 #if 0
140 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14b0
141 #define DEVICEID_AMD_HOSTB19H_M40H_ROOT 0x167c
142 #define DEVICEID_AMD_HOSTB19H_M50H_ROOT 0x166d
143 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14e3
144 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14f3
145 #endif
146 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14a4
147 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14d8
148 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14e8
149
150
151 static const struct amdtemp_product {
152 uint16_t amdtemp_vendorid;
153 uint16_t amdtemp_deviceid;
154 /*
155 * 0xFC register is only valid on the D18F3 PCI device; SMN temp
156 * drivers do not attach to that device.
157 */
158 bool amdtemp_has_cpuid;
159 } amdtemp_products[] = {
160 { VENDORID_AMD, DEVICEID_AMD_MISC0F, true },
161 { VENDORID_AMD, DEVICEID_AMD_MISC10, true },
162 { VENDORID_AMD, DEVICEID_AMD_MISC11, true },
163 { VENDORID_AMD, DEVICEID_AMD_MISC14, true },
164 { VENDORID_AMD, DEVICEID_AMD_MISC15, true },
165 { VENDORID_AMD, DEVICEID_AMD_MISC15_M10H, true },
166 { VENDORID_AMD, DEVICEID_AMD_MISC15_M30H, true },
167 { VENDORID_AMD, DEVICEID_AMD_MISC15_M60H_ROOT, false },
168 { VENDORID_AMD, DEVICEID_AMD_MISC16, true },
169 { VENDORID_AMD, DEVICEID_AMD_MISC16_M30H, true },
170
171 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_ROOT, false },
172 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M10H_ROOT, false },
173 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M30H_ROOT, false },
174 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M60H_ROOT, false },
175 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M70H_ROOT, false },
176 { VENDORID_AMD, DEVICEID_AMD_HOSTB17H_MA0H_ROOT, false },
177
178 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M10H_ROOT, false },
179 #if 0
180 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M40H_ROOT, false },
181 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M50H_ROOT, false },
182 #endif
183 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M60H_ROOT, false },
184 { VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M70H_ROOT, false },
185 };
186
187 /*
188 * Reported Temperature Control Register, family 0Fh-15h (some models), 16h.
189 */
190 #define AMDTEMP_REPTMP_CTRL 0xa4
191
192 #define AMDTEMP_REPTMP10H_CURTMP_MASK 0x7ff
193 #define AMDTEMP_REPTMP10H_CURTMP_SHIFT 21
194 #define AMDTEMP_REPTMP10H_TJSEL_MASK 0x3
195 #define AMDTEMP_REPTMP10H_TJSEL_SHIFT 16
196
197 /*
198 * Reported Temperature, Family 15h, M60+
199 *
200 * Same register bit definitions as other Family 15h CPUs, but access is
201 * indirect via SMN, like Family 17h.
202 */
203 #define AMDTEMP_15H_M60H_REPTMP_CTRL 0xd8200ca4
204
205 /*
206 * Reported Temperature, Family 17h
207 *
208 * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
209 * provide the current temp. bit 19, when clear, means the temp is reported in
210 * a range 0.."225C" (probable typo for 255C), and when set changes the range
211 * to -49..206C.
212 *
213 * Family 17H and 19H
214 */
215 #define AMDTEMP_17H_CUR_TMP 0x59800
216 #define AMDTEMP_17H_CUR_TMP_RANGE_SEL (1u << 19)
217 /*
218 *
219 */
220 #define AMDTEMP_17H_CCD_TMP_VALID (1u << 11)
221
222 /*
223 * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius).
224 */
225 #define AMDTEMP_CURTMP_RANGE_ADJUST 490
226
227 /*
228 * Thermaltrip Status Register (Family 0Fh only)
229 */
230 #define AMDTEMP_THERMTP_STAT 0xe4
231 #define AMDTEMP_TTSR_SELCORE 0x04
232 #define AMDTEMP_TTSR_SELSENSOR 0x40
233
234 /*
235 * DRAM Configuration High Register
236 */
237 #define AMDTEMP_DRAM_CONF_HIGH 0x94 /* Function 2 */
238 #define AMDTEMP_DRAM_MODE_DDR3 0x0100
239
240 /*
241 * CPU Family/Model Register
242 */
243 #define AMDTEMP_CPUID 0xfc
244
245 /*
246 * Device methods.
247 */
248 static void amdtemp_identify(driver_t *driver, device_t parent);
249 static int amdtemp_probe(device_t dev);
250 static int amdtemp_attach(device_t dev);
251 static void amdtemp_intrhook(void *arg);
252 static int amdtemp_detach(device_t dev);
253 static int32_t amdtemp_gettemp0f(device_t dev, amdsensor_t sensor);
254 static int32_t amdtemp_gettemp(device_t dev, amdsensor_t sensor);
255 static int32_t amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor);
256 static int32_t amdtemp_gettemp17to19h(device_t dev, amdsensor_t sensor);
257 static void amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model);
258 static void amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model);
259 static int amdtemp_sysctl(SYSCTL_HANDLER_ARGS);
260 static void amdtemp_sensor_task(void *);
261
262 static device_method_t amdtemp_methods[] = {
263 /* Device interface */
264 DEVMETHOD(device_identify, amdtemp_identify),
265 DEVMETHOD(device_probe, amdtemp_probe),
266 DEVMETHOD(device_attach, amdtemp_attach),
267 DEVMETHOD(device_detach, amdtemp_detach),
268
269 DEVMETHOD_END
270 };
271
272 static driver_t amdtemp_driver = {
273 "amdtemp",
274 amdtemp_methods,
275 sizeof(struct amdtemp_softc),
276 };
277
278 static devclass_t amdtemp_devclass;
279 DRIVER_MODULE_ORDERED(amdtemp, hostb, amdtemp_driver,
280 &amdtemp_devclass, NULL, NULL, SI_ORDER_LATER);
281 MODULE_VERSION(amdtemp, 1);
282 MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
283 #if !defined(__DragonFly__)
284 MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
285 nitems(amdtemp_products));
286 #endif
287
288 static bool
amdtemp_match(device_t dev,const struct amdtemp_product ** product_out)289 amdtemp_match(device_t dev, const struct amdtemp_product **product_out)
290 {
291 int i;
292 uint16_t vendor, devid;
293
294 vendor = pci_get_vendor(dev);
295 devid = pci_get_device(dev);
296
297 for (i = 0; i < nitems(amdtemp_products); i++) {
298 if (vendor == amdtemp_products[i].amdtemp_vendorid &&
299 devid == amdtemp_products[i].amdtemp_deviceid) {
300 if (product_out != NULL)
301 *product_out = &amdtemp_products[i];
302 return (true);
303 }
304 }
305 return (false);
306 }
307
308 static void
amdtemp_identify(driver_t * driver,device_t parent)309 amdtemp_identify(driver_t *driver, device_t parent)
310 {
311 device_t child;
312
313 /* Make sure we're not being doubly invoked. */
314 if (device_find_child(parent, "amdtemp", -1) != NULL)
315 return;
316
317 if (amdtemp_match(parent, NULL)) {
318 child = device_add_child(parent, "amdtemp", -1);
319 if (child == NULL)
320 device_printf(parent, "add amdtemp child failed\n");
321 }
322 }
323
324 static int
amdtemp_probe(device_t dev)325 amdtemp_probe(device_t dev)
326 {
327 uint32_t family, model;
328
329 if (resource_disabled("amdtemp", 0))
330 return (ENXIO);
331 if (!amdtemp_match(device_get_parent(dev), NULL))
332 return (ENXIO);
333
334 family = CPUID_TO_FAMILY(cpu_id);
335 model = CPUID_TO_MODEL(cpu_id);
336
337 switch (family) {
338 case 0x0f:
339 if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) ||
340 (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1))
341 return (ENXIO);
342 break;
343 case 0x10:
344 case 0x11:
345 case 0x12:
346 case 0x14:
347 case 0x15:
348 case 0x16:
349 case 0x17:
350 case 0x19:
351 break;
352 default:
353 return (ENXIO);
354 }
355 device_set_desc(dev, "AMD CPU On-Die Thermal Sensors");
356
357 return (BUS_PROBE_GENERIC);
358 }
359
360 static int
amdtemp_attach(device_t dev)361 amdtemp_attach(device_t dev)
362 {
363 char tn[32];
364 u_int regs[4];
365 const struct amdtemp_product *product;
366 struct amdtemp_softc *sc;
367 struct sysctl_ctx_list *sysctlctx;
368 struct sysctl_oid *sysctlnode;
369 uint32_t cpuid, family, model;
370 u_int bid;
371 int erratum319, unit;
372 bool needsmn;
373
374 sc = device_get_softc(dev);
375 erratum319 = 0;
376 needsmn = false;
377
378 if (!amdtemp_match(device_get_parent(dev), &product))
379 return (ENXIO);
380
381 cpuid = cpu_id;
382 family = CPUID_TO_FAMILY(cpuid);
383 model = CPUID_TO_MODEL(cpuid);
384
385 /*
386 * This checks for the byzantine condition of running a heterogenous
387 * revision multi-socket system where the attach thread is potentially
388 * probing a remote socket's PCI device.
389 *
390 * Currently, such scenarios are unsupported on models using the SMN
391 * (because on those models, amdtemp(4) attaches to a different PCI
392 * device than the one that contains AMDTEMP_CPUID).
393 *
394 * The ancient 0x0F family of devices only supports this register from
395 * models 40h+.
396 */
397 if (product->amdtemp_has_cpuid && (family > 0x0f ||
398 (family == 0x0f && model >= 0x40))) {
399 cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID,
400 4);
401 family = CPUID_TO_FAMILY(cpuid);
402 model = CPUID_TO_MODEL(cpuid);
403 }
404
405 switch (family) {
406 case 0x0f:
407 /*
408 * Thermaltrip Status Register
409 *
410 * - ThermSenseCoreSel
411 *
412 * Revision F & G: 0 - Core1, 1 - Core0
413 * Other: 0 - Core0, 1 - Core1
414 *
415 * - CurTmp
416 *
417 * Revision G: bits 23-14
418 * Other: bits 23-16
419 *
420 * XXX According to the BKDG, CurTmp, ThermSenseSel and
421 * ThermSenseCoreSel bits were introduced in Revision F
422 * but CurTmp seems working fine as early as Revision C.
423 * However, it is not clear whether ThermSenseSel and/or
424 * ThermSenseCoreSel work in undocumented cases as well.
425 * In fact, the Linux driver suggests it may not work but
426 * we just assume it does until we find otherwise.
427 *
428 * XXX According to Linux, CurTmp starts at -28C on
429 * Socket AM2 Revision G processors, which is not
430 * documented anywhere.
431 */
432 if (model >= 0x40)
433 sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP;
434 if (model >= 0x60 && model != 0xc1) {
435 do_cpuid(0x80000001, regs);
436 bid = (regs[1] >> 9) & 0x1f;
437 switch (model) {
438 case 0x68: /* Socket S1g1 */
439 case 0x6c:
440 case 0x7c:
441 break;
442 case 0x6b: /* Socket AM2 and ASB1 (2 cores) */
443 if (bid != 0x0b && bid != 0x0c)
444 sc->sc_flags |=
445 AMDTEMP_FLAG_ALT_OFFSET;
446 break;
447 case 0x6f: /* Socket AM2 and ASB1 (1 core) */
448 case 0x7f:
449 if (bid != 0x07 && bid != 0x09 &&
450 bid != 0x0c)
451 sc->sc_flags |=
452 AMDTEMP_FLAG_ALT_OFFSET;
453 break;
454 default:
455 sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET;
456 }
457 sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT;
458 }
459
460 /*
461 * There are two sensors per core.
462 */
463 sc->sc_ntemps = 2;
464 sc->sc_ccd_display = 0;
465
466 sc->sc_gettemp = amdtemp_gettemp0f;
467 break;
468 case 0x10:
469 /*
470 * Erratum 319 Inaccurate Temperature Measurement
471 *
472 * http://support.amd.com/us/Processor_TechDocs/41322.pdf
473 */
474 do_cpuid(0x80000001, regs);
475 switch ((regs[1] >> 28) & 0xf) {
476 case 0: /* Socket F */
477 erratum319 = 1;
478 break;
479 case 1: /* Socket AM2+ or AM3 */
480 if ((pci_cfgregread(pci_get_bus(dev),
481 pci_get_slot(dev), 2, AMDTEMP_DRAM_CONF_HIGH, 2) &
482 AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 ||
483 (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3))
484 break;
485 /* XXX 00100F42h (RB-C2) exists in both formats. */
486 erratum319 = 1;
487 break;
488 }
489 /* FALLTHROUGH */
490 case 0x11:
491 case 0x12:
492 case 0x14:
493 case 0x15:
494 case 0x16:
495 sc->sc_ntemps = 1;
496 sc->sc_ccd_display = 1;
497 /*
498 * Some later (60h+) models of family 15h use a similar SMN
499 * network as family 17h. (However, the register index differs
500 * from 17h and the decoding matches other 10h-15h models,
501 * which differ from 17h.)
502 */
503 if (family == 0x15 && model >= 0x60) {
504 sc->sc_gettemp = amdtemp_gettemp15hm60h;
505 needsmn = true;
506 } else
507 sc->sc_gettemp = amdtemp_gettemp;
508 break;
509 case 0x17:
510 case 0x19:
511 sc->sc_ntemps = 1;
512 sc->sc_ccd_display = 1;
513 sc->sc_gettemp = amdtemp_gettemp17to19h;
514 switch(model) {
515 case 0x10 ... 0x1f:
516 case 0xa0 ... 0xaf:
517 case 0x40 ... 0x4f:
518 sc->sc_ccd_offset = 0x300;
519 break;
520 case 0x60 ... 0x6f:
521 case 0x70 ... 0x7f:
522 sc->sc_ccd_offset = 0x308;
523 break;
524 default:
525 sc->sc_ccd_offset = 0x154;
526 break;
527 }
528 needsmn = true;
529 device_printf(dev, "sc_ccd_offset = %08x\n", sc->sc_ccd_offset);
530 break;
531 default:
532 device_printf(dev, "Bogus family 0x%x\n", family);
533 return (ENXIO);
534 }
535
536 if (needsmn) {
537 sc->sc_smn = device_find_child(
538 device_get_parent(dev), "amdsmn", -1);
539 if (sc->sc_smn == NULL) {
540 device_printf(dev, "No SMN device found\n");
541 return (ENXIO);
542 }
543 }
544
545 /*
546 * Find number of cores per package. XXX this does not work
547 * properly, it appears to be calculating the total number of cores.
548 */
549
550 sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ?
551 (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
552 if (sc->sc_ncores > MAXCPU)
553 return (ENXIO);
554
555 if (erratum319)
556 device_printf(dev,
557 "Erratum 319: temperature measurement may be inaccurate\n");
558 if (bootverbose)
559 device_printf(dev, "Found %d cores and %d sensors.\n",
560 sc->sc_ncores,
561 sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1);
562
563 /*
564 * dev.amdtemp.N tree.
565 */
566 unit = device_get_unit(dev);
567 ksnprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit);
568 TUNABLE_INT_FETCH(tn, &sc->sc_offset);
569
570 sysctlctx = device_get_sysctl_ctx(dev);
571 SYSCTL_ADD_INT(sysctlctx,
572 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
573 "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0,
574 "Temperature sensor offset");
575 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
576 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
577 "core0", CTLFLAG_RD, 0, "Core 0");
578
579 SYSCTL_ADD_PROC(sysctlctx,
580 SYSCTL_CHILDREN(sysctlnode),
581 OID_AUTO, "sensor0",
582 CTLTYPE_INT | CTLFLAG_RD,
583 dev, CORE0_SENSOR0, amdtemp_sysctl, "IK",
584 "Core 0 / Sensor 0 temperature");
585
586 sc->sc_probed_regmask |= 1U << CORE0_SENSOR0;
587
588 if (family == 0x17)
589 amdtemp_probe_ccd_sensors17h(dev, model);
590 else if (family == 0x19)
591 amdtemp_probe_ccd_sensors19h(dev, model);
592 else if (sc->sc_ntemps > 1) {
593 SYSCTL_ADD_PROC(sysctlctx,
594 SYSCTL_CHILDREN(sysctlnode),
595 OID_AUTO, "sensor1",
596 CTLTYPE_INT | CTLFLAG_RD,
597 dev, CORE0_SENSOR1, amdtemp_sysctl, "IK",
598 "Core 0 / Sensor 1 temperature");
599
600 sc->sc_probed_regmask |= 1U << CORE0_SENSOR1;
601
602 if (sc->sc_ncores > 1) {
603 sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
604 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
605 OID_AUTO, "core1", CTLFLAG_RD,
606 0, "Core 1");
607
608 SYSCTL_ADD_PROC(sysctlctx,
609 SYSCTL_CHILDREN(sysctlnode),
610 OID_AUTO, "sensor0",
611 CTLTYPE_INT | CTLFLAG_RD,
612 dev, CORE1_SENSOR0, amdtemp_sysctl, "IK",
613 "Core 1 / Sensor 0 temperature");
614
615 SYSCTL_ADD_PROC(sysctlctx,
616 SYSCTL_CHILDREN(sysctlnode),
617 OID_AUTO, "sensor1",
618 CTLTYPE_INT | CTLFLAG_RD,
619 dev, CORE1_SENSOR1, amdtemp_sysctl, "IK",
620 "Core 1 / Sensor 1 temperature");
621
622 sc->sc_probed_regmask |= 1U << CORE1_SENSOR0;
623 sc->sc_probed_regmask |= 1U << CORE1_SENSOR1;
624 }
625 }
626
627 /*
628 * Try to create dev.cpu sysctl entries and setup intrhook function.
629 * This is needed because the cpu driver may be loaded late on boot,
630 * after us.
631 */
632 amdtemp_intrhook(dev);
633 sc->sc_ich.ich_func = amdtemp_intrhook;
634 sc->sc_ich.ich_arg = dev;
635 if (config_intrhook_establish(&sc->sc_ich) != 0) {
636 device_printf(dev, "config_intrhook_establish failed!\n");
637 return (ENXIO);
638 }
639
640 return (0);
641 }
642
643 void
amdtemp_intrhook(void * arg)644 amdtemp_intrhook(void *arg)
645 {
646 struct amdtemp_softc *sc;
647 struct sysctl_ctx_list *sysctlctx;
648 device_t dev = (device_t)arg;
649 device_t acpi, cpu, nexus;
650 amdsensor_t sensor;
651 int i;
652 int j;
653
654 sc = device_get_softc(dev);
655 if (sc->sc_ich.ich_arg == NULL)
656 return;
657
658 /*
659 * dev.cpu.N.temperature.
660 */
661 nexus = device_find_child(root_bus, "nexus", 0);
662 acpi = device_find_child(nexus, "acpi", 0);
663
664 for (i = 0; i < sc->sc_ncores; i++) {
665 if (sc->sc_sysctl_cpu[i] != NULL)
666 continue;
667 cpu = device_find_child(acpi, "cpu",
668 device_get_unit(dev) * sc->sc_ncores + i);
669 if (cpu != NULL) {
670 sysctlctx = device_get_sysctl_ctx(cpu);
671
672 sensor = sc->sc_ntemps > 1 ?
673 (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0;
674 sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx,
675 SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)),
676 OID_AUTO, "temperature",
677 CTLTYPE_INT | CTLFLAG_RD,
678 dev, sensor, amdtemp_sysctl, "IK",
679 "Current temparature");
680 }
681 }
682 config_intrhook_disestablish(&sc->sc_ich);
683
684 /*
685 * sensor infrastructure. Use [ncpus] for globally shared sensors
686 */
687 sc->sc_sensorcpus = kmalloc(sizeof(*sc->sc_sensorcpus) *
688 (sc->sc_ncores + 1),
689 M_DEVBUF, M_WAITOK | M_ZERO);
690
691 for (i = 0; i <= sc->sc_ncores; i++) {
692 struct sensorcpu *scpu = &sc->sc_sensorcpus[i];
693
694 if (i == 0)
695 scpu->regmask = sc->sc_probed_regmask & 0x0003U;
696 else if (i == 1)
697 scpu->regmask = sc->sc_probed_regmask & 0x000CU;
698 else if (i != sc->sc_ncores)
699 scpu->regmask = 0;
700 else
701 scpu->regmask = sc->sc_probed_regmask & ~0xFU;
702
703 if (scpu->regmask == 0)
704 continue;
705
706 if (sc->sc_ccd_display) {
707 ksnprintf(scpu->sensordev.xname,
708 sizeof(scpu->sensordev.xname),
709 "die%d", device_get_unit(dev));
710 } else {
711 ksnprintf(scpu->sensordev.xname,
712 sizeof(scpu->sensordev.xname),
713 "cpu%d", i);
714 }
715
716 scpu->dev = dev;
717 scpu->sc = sc;
718 scpu->sensors = kmalloc(sizeof(*scpu->sensors) * MAXSENSORS,
719 M_DEVBUF, M_WAITOK | M_ZERO);
720 for (j = 0; j < MAXSENSORS; ++j) {
721 if ((scpu->regmask & (1U << j)) == 0)
722 continue;
723
724 switch(j) {
725 case CORE0_SENSOR0:
726 case CORE0_SENSOR1:
727 case CORE1_SENSOR0:
728 case CORE1_SENSOR1:
729 if (sc->sc_ccd_display) {
730 ksnprintf(scpu->sensors[j].desc,
731 sizeof(scpu->sensors[0].desc),
732 "high temp");
733 } else {
734 ksnprintf(scpu->sensors[j].desc,
735 sizeof(scpu->sensors[0].desc),
736 "temp%d", j & 1);
737 }
738 break;
739 case CORE0:
740 ksnprintf(scpu->sensors[j].desc,
741 sizeof(scpu->sensors[0].desc),
742 "core0 rollup temp");
743 break;
744 case CORE1:
745 ksnprintf(scpu->sensors[j].desc,
746 sizeof(scpu->sensors[0].desc),
747 "core1 rollup temp");
748 break;
749 case CCD_BASE ... CCD_MAX:
750 ksnprintf(scpu->sensors[j].desc,
751 sizeof(scpu->sensors[0].desc),
752 "ccd%u temp", j - CCD_BASE);
753 break;
754 }
755 scpu->sensors[j].type = SENSOR_TEMP;
756 sensor_set_unknown(&scpu->sensors[j]);
757 sensor_attach(&scpu->sensordev, &scpu->sensors[j]);
758 }
759 scpu->senstask = sensor_task_register2(scpu,
760 amdtemp_sensor_task,
761 2,
762 ((i < sc->sc_ncores) ?
763 i : -1));
764 sensordev_install(&scpu->sensordev);
765 }
766 }
767
768 int
amdtemp_detach(device_t dev)769 amdtemp_detach(device_t dev)
770 {
771 struct amdtemp_softc *sc = device_get_softc(dev);
772 int i;
773 int j;
774
775 for (i = 0; i < sc->sc_ncores; i++) {
776 if (sc->sc_sysctl_cpu[i] != NULL)
777 sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0);
778 }
779
780 if (sc->sc_sensorcpus) {
781 for (i = 0; i <= sc->sc_ncores; i++) {
782 struct sensorcpu *scpu = &sc->sc_sensorcpus[i];
783
784 if (scpu->sensors) {
785 for (j = 0; j < MAXSENSORS; ++j) {
786 if ((scpu->regmask & (1U << j)) == 0)
787 continue;
788 sensor_detach(&scpu->sensordev,
789 &scpu->sensors[j]);
790 }
791 if (scpu->senstask) {
792 sensor_task_unregister2(scpu->senstask);
793 scpu->senstask = NULL;
794 }
795 sensordev_deinstall(&scpu->sensordev);
796 kfree(scpu->sensors, M_DEVBUF);
797 scpu->sensors = NULL;
798 }
799 }
800 kfree(sc->sc_sensorcpus, M_DEVBUF);
801 sc->sc_sensorcpus = NULL;
802 }
803
804 /* NewBus removes the dev.amdtemp.N tree by itself. */
805
806 return (0);
807 }
808
809 static int
amdtemp_sysctl(SYSCTL_HANDLER_ARGS)810 amdtemp_sysctl(SYSCTL_HANDLER_ARGS)
811 {
812 device_t dev = (device_t)arg1;
813 struct amdtemp_softc *sc = device_get_softc(dev);
814 amdsensor_t sensor = (amdsensor_t)arg2;
815 int32_t auxtemp[2], temp;
816 int error;
817
818 switch (sensor) {
819 case CORE0:
820 auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0);
821 auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1);
822 temp = imax(auxtemp[0], auxtemp[1]);
823 break;
824 case CORE1:
825 auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0);
826 auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1);
827 temp = imax(auxtemp[0], auxtemp[1]);
828 break;
829 default:
830 temp = sc->sc_gettemp(dev, sensor);
831 break;
832 }
833 error = sysctl_handle_int(oidp, &temp, 0, req);
834
835 return (error);
836 }
837
838 #define AMDTEMP_ZERO_C_TO_K 2731
839
840 static int32_t
amdtemp_gettemp0f(device_t dev,amdsensor_t sensor)841 amdtemp_gettemp0f(device_t dev, amdsensor_t sensor)
842 {
843 struct amdtemp_softc *sc = device_get_softc(dev);
844 uint32_t mask, offset, temp;
845
846 /* Set Sensor/Core selector. */
847 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1);
848 temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR);
849 switch (sensor) {
850 case CORE0_SENSOR1:
851 temp |= AMDTEMP_TTSR_SELSENSOR;
852 /* FALLTHROUGH */
853 case CORE0_SENSOR0:
854 case CORE0:
855 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0)
856 temp |= AMDTEMP_TTSR_SELCORE;
857 break;
858 case CORE1_SENSOR1:
859 temp |= AMDTEMP_TTSR_SELSENSOR;
860 /* FALLTHROUGH */
861 case CORE1_SENSOR0:
862 case CORE1:
863 if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0)
864 temp |= AMDTEMP_TTSR_SELCORE;
865 break;
866 default:
867 __assert_unreachable();
868 }
869 pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1);
870
871 mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc;
872 offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49;
873 temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4);
874 temp = ((temp >> 14) & mask) * 5 / 2;
875 temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10;
876
877 return (temp);
878 }
879
880 static uint32_t
amdtemp_decode_fam10h_to_17h(int32_t sc_offset,uint32_t val,bool minus49)881 amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49)
882 {
883 uint32_t temp;
884
885 /* Convert raw register subfield units (0.125C) to units of 0.1C. */
886 temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4;
887
888 if (minus49)
889 temp -= AMDTEMP_CURTMP_RANGE_ADJUST;
890
891 temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10;
892 return (temp);
893 }
894
895 static uint32_t
amdtemp_decode_fam10h_to_16h(int32_t sc_offset,uint32_t val)896 amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val)
897 {
898 bool minus49;
899
900 /*
901 * On Family 15h and higher, if CurTmpTjSel is 11b, the range is
902 * adjusted down by 49.0 degrees Celsius. (This adjustment is not
903 * documented in BKDGs prior to family 15h model 00h.)
904 */
905 minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 &&
906 ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) &
907 AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3);
908
909 return (amdtemp_decode_fam10h_to_17h(sc_offset,
910 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
911 }
912
913 static uint32_t
amdtemp_decode_fam17h_tctl(int32_t sc_offset,uint32_t val)914 amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val)
915 {
916 bool minus49;
917
918 minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0);
919 return (amdtemp_decode_fam10h_to_17h(sc_offset,
920 val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
921 }
922
923 static int32_t
amdtemp_gettemp(device_t dev,amdsensor_t sensor)924 amdtemp_gettemp(device_t dev, amdsensor_t sensor)
925 {
926 struct amdtemp_softc *sc = device_get_softc(dev);
927 uint32_t temp;
928
929 temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4);
930 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp));
931 }
932
933 static int32_t
amdtemp_gettemp15hm60h(device_t dev,amdsensor_t sensor)934 amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor)
935 {
936 struct amdtemp_softc *sc = device_get_softc(dev);
937 uint32_t val;
938 int error;
939
940 error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val);
941 KASSERT(error == 0, ("amdsmn_read"));
942 return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val));
943 }
944
945 static int32_t
amdtemp_gettemp17to19h(device_t dev,amdsensor_t sensor)946 amdtemp_gettemp17to19h(device_t dev, amdsensor_t sensor)
947 {
948 struct amdtemp_softc *sc = device_get_softc(dev);
949 uint32_t val;
950 int error;
951
952 switch (sensor) {
953 case CORE0_SENSOR0:
954 /* Tctl */
955 error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
956 KASSERT(error == 0, ("amdsmn_read"));
957 return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val));
958 case CCD_BASE ... CCD_MAX:
959 /* Tccd<N> */
960 error = amdsmn_read(sc->sc_smn,
961 AMDTEMP_17H_CUR_TMP +
962 sc->sc_ccd_offset +
963 (((int)sensor - CCD_BASE) * sizeof(val)),
964 &val);
965 KASSERT(error == 0, ("amdsmn_read2"));
966 KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0,
967 ("sensor %d: not valid", (int)sensor));
968 return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true));
969 default:
970 __assert_unreachable();
971 }
972 }
973
974 static void
amdtemp_probe_ccd_sensors17h(device_t dev,uint32_t model)975 amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model)
976 {
977 char sensor_name[16], sensor_descr[32];
978 struct amdtemp_softc *sc;
979 uint32_t maxreg, i, val;
980 int error;
981
982 switch (model) {
983 case 0x00 ... 0x1f: /* Zen1, Zen+ */
984 maxreg = 4;
985 break;
986 case 0x30 ... 0x3f: /* Zen2 TR/Epyc */
987 case 0x60: /* Renoir */
988 case 0x68: /* Lucienne */
989 case 0x70 ... 0x7f: /* Zen2 Ryzen */
990 maxreg = 8;
991 _Static_assert((int)NUM_CCDS >= 8, "");
992 break;
993 case 0xa0 ... 0xaf: /* Zen3 ? */
994 maxreg = 8;
995 _Static_assert((int)NUM_CCDS >= 8, "");
996 break;
997 default:
998 device_printf(dev,
999 "Unrecognized Family 17h Model: %02xh\n", model);
1000 return;
1001 }
1002
1003 sc = device_get_softc(dev);
1004 for (i = 0; i < maxreg; i++) {
1005 error = amdsmn_read(sc->sc_smn,
1006 AMDTEMP_17H_CUR_TMP +
1007 sc->sc_ccd_offset +
1008 (i * sizeof(val)),
1009 &val);
1010 if (error != 0)
1011 continue;
1012 if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
1013 continue;
1014
1015 ksnprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
1016 ksnprintf(sensor_descr, sizeof(sensor_descr),
1017 "CCD %u temperature (Tccd%u)", i, i);
1018
1019 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1020 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1021 sensor_name, CTLTYPE_INT | CTLFLAG_RD,
1022 dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
1023
1024 sc->sc_probed_regmask |= 1U << (CCD_BASE + i);
1025 }
1026 }
1027
1028 static void
amdtemp_probe_ccd_sensors19h(device_t dev,uint32_t model)1029 amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model)
1030 {
1031 char sensor_name[16], sensor_descr[32];
1032 struct amdtemp_softc *sc;
1033 uint32_t maxreg, i, val;
1034 int error;
1035
1036 device_printf(dev, "probe ccd sensors 19h %02x\n", model);
1037
1038 switch (model) {
1039 case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */
1040 case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */
1041 case 0x40 ... 0x4f:
1042 case 0x50 ... 0x5f:
1043 case 0x60 ... 0x6f:
1044 case 0x70 ... 0x7f:
1045 maxreg = 8;
1046 _Static_assert((int)NUM_CCDS >= 8, "");
1047 break;
1048 case 0x10 ... 0x1f:
1049 case 0xa0 ... 0xaf:
1050 maxreg = 12;
1051 _Static_assert((int)NUM_CCDS >= 12, "");
1052 break;
1053 default:
1054 device_printf(dev,
1055 "Unrecognized Family 19h Model: %02xh\n", model);
1056 return;
1057 }
1058
1059 sc = device_get_softc(dev);
1060 for (i = 0; i < maxreg; i++) {
1061 error = amdsmn_read(sc->sc_smn,
1062 AMDTEMP_17H_CUR_TMP +
1063 sc->sc_ccd_offset +
1064 (i * sizeof(val)),
1065 &val);
1066 device_printf(dev, "probe ccd%d error %d val=%08x\n",
1067 i, error, val);
1068 if (error != 0)
1069 continue;
1070 if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
1071 continue;
1072
1073 ksnprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
1074 ksnprintf(sensor_descr, sizeof(sensor_descr),
1075 "CCD %u temperature (Tccd%u)", i, i);
1076
1077 SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1078 SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1079 sensor_name, CTLTYPE_INT | CTLFLAG_RD,
1080 dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
1081
1082 sc->sc_probed_regmask |= 1U << (CCD_BASE + i);
1083 }
1084 }
1085
1086 static void
amdtemp_sensor_task(void * sc_arg)1087 amdtemp_sensor_task(void *sc_arg)
1088 {
1089 struct sensorcpu *scpu = sc_arg;
1090 struct amdtemp_softc *sc;
1091 uint32_t mask;
1092 int32_t temp;
1093 int j;
1094
1095 sc = scpu->sc;
1096 if (sc->sc_ich.ich_arg == NULL)
1097 return;
1098 mask = scpu->regmask;
1099
1100 for (j = 0; mask; ++j) {
1101 if ((mask & (1U << j)) == 0)
1102 continue;
1103 temp = sc->sc_gettemp(scpu->dev, j);
1104 sensor_set(&scpu->sensors[j], temp * 100000L, 0);
1105 mask &= ~(1U << j);
1106 }
1107 }
1108