xref: /dragonfly/sys/dev/powermng/amdtemp/amdtemp.c (revision a765cedf)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2008, 2009 Rui Paulo <rpaulo@FreeBSD.org>
5  * Copyright (c) 2009 Norikatsu Shigemura <nork@FreeBSD.org>
6  * Copyright (c) 2009-2012 Jung-uk Kim <jkim@FreeBSD.org>
7  * All rights reserved.
8  * Copyright (c) 2017-2020 Conrad Meyer <cem@FreeBSD.org>. All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
20  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22  * DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
23  * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
27  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
28  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  * $FreeBSD: head/sys/dev/amdtemp/amdtemp.c 366136 2020-09-25 04:16:28Z cem $
32  */
33 
34 /*
35  * Driver for the AMD CPU on-die thermal sensors.
36  * Initially based on the k8temp Linux driver.
37  */
38 
39 #include <sys/param.h>
40 #include <sys/bus.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/module.h>
44 #include <sys/sysctl.h>
45 #include <sys/systm.h>
46 #include <sys/malloc.h>
47 #include <sys/sensors.h>
48 
49 #include <machine/cpufunc.h>
50 #include <machine/md_var.h>
51 #include <machine/specialreg.h>
52 
53 #include <bus/pci/pcivar.h>
54 #include <bus/pci/pci_cfgreg.h>
55 
56 #include <dev/powermng/amdsmn/amdsmn.h>
57 
58 typedef enum {
59 	CORE0_SENSOR0,
60 	CORE0_SENSOR1,
61 	CORE1_SENSOR0,
62 	CORE1_SENSOR1,
63 	CORE0,
64 	CORE1,
65 	CCD1,
66 	CCD_BASE = CCD1,
67 	CCD2,
68 	CCD3,
69 	CCD4,
70 	CCD5,
71 	CCD6,
72 	CCD7,
73 	CCD8,
74 	CCD9,
75 	CCD10,
76 	CCD11,
77 	CCD12,
78 	MAXSENSORS,
79 
80 	CCD_MAX = CCD12,
81 	NUM_CCDS = CCD_MAX - CCD_BASE + 1,
82 } amdsensor_t;
83 
84 struct amdtemp_softc {
85 	int		sc_ncores;
86 	int		sc_ntemps;
87 	int		sc_flags;
88 	int		sc_ccd_display;
89 #define	AMDTEMP_FLAG_CS_SWAP	0x01	/* ThermSenseCoreSel is inverted. */
90 #define	AMDTEMP_FLAG_CT_10BIT	0x02	/* CurTmp is 10-bit wide. */
91 #define	AMDTEMP_FLAG_ALT_OFFSET	0x04	/* CurTmp starts at -28C. */
92 	int32_t		sc_offset;
93 	int32_t		sc_ccd_offset;
94 	int32_t		(*sc_gettemp)(device_t, amdsensor_t);
95 	struct sysctl_oid *sc_sysctl_cpu[MAXCPU];
96 	struct intr_config_hook sc_ich;
97 	device_t	sc_smn;
98 	uint32_t	sc_probed_regmask;
99 
100 	/*
101 	 * NOTE: We put common sensors like the CCDs on cpu0.  Remaining
102 	 *	 cores are only applicable if ntemps == 2 (with no CCDs).
103 	 *	 When ntemps == 1 the temp sensors are CCD-based and shared.
104 	 */
105 	struct sensorcpu {
106 		device_t dev;
107 		struct amdtemp_softc *sc;
108 		struct ksensordev    sensordev;
109 		struct ksensor	     *sensors;
110 		struct sensor_task   *senstask;
111 		uint32_t regmask;
112 	} *sc_sensorcpus;
113 };
114 
115 /*
116  * N.B. The numbers in macro names below are significant and represent CPU
117  * family and model numbers.  Do not make up fictitious family or model numbers
118  * when adding support for new devices.
119  */
120 #define	VENDORID_AMD			0x1022
121 
122 #define	DEVICEID_AMD_MISC0F		0x1103
123 #define	DEVICEID_AMD_MISC10		0x1203
124 #define	DEVICEID_AMD_MISC11		0x1303
125 #define	DEVICEID_AMD_MISC14		0x1703
126 #define	DEVICEID_AMD_MISC15		0x1603
127 #define	DEVICEID_AMD_MISC15_M10H	0x1403
128 #define	DEVICEID_AMD_MISC15_M30H	0x141d
129 #define	DEVICEID_AMD_MISC15_M60H_ROOT	0x1576
130 #define	DEVICEID_AMD_MISC16		0x1533
131 #define	DEVICEID_AMD_MISC16_M30H	0x1583
132 #define	DEVICEID_AMD_HOSTB17H_ROOT	0x1450
133 #define	DEVICEID_AMD_HOSTB17H_M10H_ROOT	0x15d0
134 #define	DEVICEID_AMD_HOSTB17H_M30H_ROOT	0x1480	/* Also M70h. */
135 #define	DEVICEID_AMD_HOSTB17H_M60H_ROOT	0x1630
136 #define DEVICEID_AMD_HOSTB17H_M70H_ROOT 0x1443
137 #define DEVICEID_AMD_HOSTB17H_MA0H_ROOT 0x1727
138 
139 #if 0
140 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14b0
141 #define DEVICEID_AMD_HOSTB19H_M40H_ROOT 0x167c
142 #define DEVICEID_AMD_HOSTB19H_M50H_ROOT 0x166d
143 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14e3
144 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14f3
145 #endif
146 #define DEVICEID_AMD_HOSTB19H_M10H_ROOT 0x14a4
147 #define DEVICEID_AMD_HOSTB19H_M60H_ROOT 0x14d8
148 #define DEVICEID_AMD_HOSTB19H_M70H_ROOT 0x14e8
149 
150 
151 static const struct amdtemp_product {
152 	uint16_t	amdtemp_vendorid;
153 	uint16_t	amdtemp_deviceid;
154 	/*
155 	 * 0xFC register is only valid on the D18F3 PCI device; SMN temp
156 	 * drivers do not attach to that device.
157 	 */
158 	bool		amdtemp_has_cpuid;
159 } amdtemp_products[] = {
160 	{ VENDORID_AMD,	DEVICEID_AMD_MISC0F, true },
161 	{ VENDORID_AMD,	DEVICEID_AMD_MISC10, true },
162 	{ VENDORID_AMD,	DEVICEID_AMD_MISC11, true },
163 	{ VENDORID_AMD,	DEVICEID_AMD_MISC14, true },
164 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15, true },
165 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M10H, true },
166 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M30H, true },
167 	{ VENDORID_AMD,	DEVICEID_AMD_MISC15_M60H_ROOT, false },
168 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16, true },
169 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16_M30H, true },
170 
171 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_ROOT, false },
172 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M10H_ROOT, false },
173 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M30H_ROOT, false },
174 	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M60H_ROOT, false },
175 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB17H_M70H_ROOT, false },
176 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB17H_MA0H_ROOT, false },
177 
178 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M10H_ROOT, false },
179 #if 0
180 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M40H_ROOT, false },
181 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M50H_ROOT, false },
182 #endif
183 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M60H_ROOT, false },
184 	{ VENDORID_AMD, DEVICEID_AMD_HOSTB19H_M70H_ROOT, false },
185 };
186 
187 /*
188  * Reported Temperature Control Register, family 0Fh-15h (some models), 16h.
189  */
190 #define	AMDTEMP_REPTMP_CTRL	0xa4
191 
192 #define	AMDTEMP_REPTMP10H_CURTMP_MASK	0x7ff
193 #define	AMDTEMP_REPTMP10H_CURTMP_SHIFT	21
194 #define	AMDTEMP_REPTMP10H_TJSEL_MASK	0x3
195 #define	AMDTEMP_REPTMP10H_TJSEL_SHIFT	16
196 
197 /*
198  * Reported Temperature, Family 15h, M60+
199  *
200  * Same register bit definitions as other Family 15h CPUs, but access is
201  * indirect via SMN, like Family 17h.
202  */
203 #define	AMDTEMP_15H_M60H_REPTMP_CTRL	0xd8200ca4
204 
205 /*
206  * Reported Temperature, Family 17h
207  *
208  * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
209  * provide the current temp.  bit 19, when clear, means the temp is reported in
210  * a range 0.."225C" (probable typo for 255C), and when set changes the range
211  * to -49..206C.
212  *
213  * Family 17H and 19H
214  */
215 #define	AMDTEMP_17H_CUR_TMP		0x59800
216 #define	AMDTEMP_17H_CUR_TMP_RANGE_SEL	(1u << 19)
217 /*
218  *
219  */
220 #define	AMDTEMP_17H_CCD_TMP_VALID	(1u << 11)
221 
222 /*
223  * AMD temperature range adjustment, in deciKelvins (i.e., 49.0 Celsius).
224  */
225 #define	AMDTEMP_CURTMP_RANGE_ADJUST	490
226 
227 /*
228  * Thermaltrip Status Register (Family 0Fh only)
229  */
230 #define	AMDTEMP_THERMTP_STAT	0xe4
231 #define	AMDTEMP_TTSR_SELCORE	0x04
232 #define	AMDTEMP_TTSR_SELSENSOR	0x40
233 
234 /*
235  * DRAM Configuration High Register
236  */
237 #define	AMDTEMP_DRAM_CONF_HIGH	0x94	/* Function 2 */
238 #define	AMDTEMP_DRAM_MODE_DDR3	0x0100
239 
240 /*
241  * CPU Family/Model Register
242  */
243 #define	AMDTEMP_CPUID		0xfc
244 
245 /*
246  * Device methods.
247  */
248 static void 	amdtemp_identify(driver_t *driver, device_t parent);
249 static int	amdtemp_probe(device_t dev);
250 static int	amdtemp_attach(device_t dev);
251 static void	amdtemp_intrhook(void *arg);
252 static int	amdtemp_detach(device_t dev);
253 static int32_t	amdtemp_gettemp0f(device_t dev, amdsensor_t sensor);
254 static int32_t	amdtemp_gettemp(device_t dev, amdsensor_t sensor);
255 static int32_t	amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor);
256 static int32_t	amdtemp_gettemp17to19h(device_t dev, amdsensor_t sensor);
257 static void	amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model);
258 static void	amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model);
259 static int	amdtemp_sysctl(SYSCTL_HANDLER_ARGS);
260 static void	amdtemp_sensor_task(void *);
261 
262 static device_method_t amdtemp_methods[] = {
263 	/* Device interface */
264 	DEVMETHOD(device_identify,	amdtemp_identify),
265 	DEVMETHOD(device_probe,		amdtemp_probe),
266 	DEVMETHOD(device_attach,	amdtemp_attach),
267 	DEVMETHOD(device_detach,	amdtemp_detach),
268 
269 	DEVMETHOD_END
270 };
271 
272 static driver_t amdtemp_driver = {
273 	"amdtemp",
274 	amdtemp_methods,
275 	sizeof(struct amdtemp_softc),
276 };
277 
278 static devclass_t amdtemp_devclass;
279 DRIVER_MODULE_ORDERED(amdtemp, hostb, amdtemp_driver,
280 		      &amdtemp_devclass, NULL, NULL, SI_ORDER_LATER);
281 MODULE_VERSION(amdtemp, 1);
282 MODULE_DEPEND(amdtemp, amdsmn, 1, 1, 1);
283 #if !defined(__DragonFly__)
284 MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdtemp, amdtemp_products,
285     nitems(amdtemp_products));
286 #endif
287 
288 static bool
289 amdtemp_match(device_t dev, const struct amdtemp_product **product_out)
290 {
291 	int i;
292 	uint16_t vendor, devid;
293 
294 	vendor = pci_get_vendor(dev);
295 	devid = pci_get_device(dev);
296 
297 	for (i = 0; i < nitems(amdtemp_products); i++) {
298 		if (vendor == amdtemp_products[i].amdtemp_vendorid &&
299 		    devid == amdtemp_products[i].amdtemp_deviceid) {
300 			if (product_out != NULL)
301 				*product_out = &amdtemp_products[i];
302 			return (true);
303 		}
304 	}
305 	return (false);
306 }
307 
308 static void
309 amdtemp_identify(driver_t *driver, device_t parent)
310 {
311 	device_t child;
312 
313 	/* Make sure we're not being doubly invoked. */
314 	if (device_find_child(parent, "amdtemp", -1) != NULL)
315 		return;
316 
317 	if (amdtemp_match(parent, NULL)) {
318 		child = device_add_child(parent, "amdtemp", -1);
319 		if (child == NULL)
320 			device_printf(parent, "add amdtemp child failed\n");
321 	}
322 }
323 
324 static int
325 amdtemp_probe(device_t dev)
326 {
327 	uint32_t family, model;
328 
329 	if (resource_disabled("amdtemp", 0))
330 		return (ENXIO);
331 	if (!amdtemp_match(device_get_parent(dev), NULL))
332 		return (ENXIO);
333 
334 	family = CPUID_TO_FAMILY(cpu_id);
335 	model = CPUID_TO_MODEL(cpu_id);
336 
337 	switch (family) {
338 	case 0x0f:
339 		if ((model == 0x04 && (cpu_id & CPUID_STEPPING) == 0) ||
340 		    (model == 0x05 && (cpu_id & CPUID_STEPPING) <= 1))
341 			return (ENXIO);
342 		break;
343 	case 0x10:
344 	case 0x11:
345 	case 0x12:
346 	case 0x14:
347 	case 0x15:
348 	case 0x16:
349 	case 0x17:
350 	case 0x19:
351 		break;
352 	default:
353 		return (ENXIO);
354 	}
355 	device_set_desc(dev, "AMD CPU On-Die Thermal Sensors");
356 
357 	return (BUS_PROBE_GENERIC);
358 }
359 
360 static int
361 amdtemp_attach(device_t dev)
362 {
363 	char tn[32];
364 	u_int regs[4];
365 	const struct amdtemp_product *product;
366 	struct amdtemp_softc *sc;
367 	struct sysctl_ctx_list *sysctlctx;
368 	struct sysctl_oid *sysctlnode;
369 	uint32_t cpuid, family, model;
370 	u_int bid;
371 	int erratum319, unit;
372 	bool needsmn;
373 
374 	sc = device_get_softc(dev);
375 	erratum319 = 0;
376 	needsmn = false;
377 
378 	if (!amdtemp_match(device_get_parent(dev), &product))
379 		return (ENXIO);
380 
381 	cpuid = cpu_id;
382 	family = CPUID_TO_FAMILY(cpuid);
383 	model = CPUID_TO_MODEL(cpuid);
384 
385 	/*
386 	 * This checks for the byzantine condition of running a heterogenous
387 	 * revision multi-socket system where the attach thread is potentially
388 	 * probing a remote socket's PCI device.
389 	 *
390 	 * Currently, such scenarios are unsupported on models using the SMN
391 	 * (because on those models, amdtemp(4) attaches to a different PCI
392 	 * device than the one that contains AMDTEMP_CPUID).
393 	 *
394 	 * The ancient 0x0F family of devices only supports this register from
395 	 * models 40h+.
396 	 */
397 	if (product->amdtemp_has_cpuid && (family > 0x0f ||
398 	    (family == 0x0f && model >= 0x40))) {
399 		cpuid = pci_read_config(device_get_parent(dev), AMDTEMP_CPUID,
400 		    4);
401 		family = CPUID_TO_FAMILY(cpuid);
402 		model = CPUID_TO_MODEL(cpuid);
403 	}
404 
405 	switch (family) {
406 	case 0x0f:
407 		/*
408 		 * Thermaltrip Status Register
409 		 *
410 		 * - ThermSenseCoreSel
411 		 *
412 		 * Revision F & G:	0 - Core1, 1 - Core0
413 		 * Other:		0 - Core0, 1 - Core1
414 		 *
415 		 * - CurTmp
416 		 *
417 		 * Revision G:		bits 23-14
418 		 * Other:		bits 23-16
419 		 *
420 		 * XXX According to the BKDG, CurTmp, ThermSenseSel and
421 		 * ThermSenseCoreSel bits were introduced in Revision F
422 		 * but CurTmp seems working fine as early as Revision C.
423 		 * However, it is not clear whether ThermSenseSel and/or
424 		 * ThermSenseCoreSel work in undocumented cases as well.
425 		 * In fact, the Linux driver suggests it may not work but
426 		 * we just assume it does until we find otherwise.
427 		 *
428 		 * XXX According to Linux, CurTmp starts at -28C on
429 		 * Socket AM2 Revision G processors, which is not
430 		 * documented anywhere.
431 		 */
432 		if (model >= 0x40)
433 			sc->sc_flags |= AMDTEMP_FLAG_CS_SWAP;
434 		if (model >= 0x60 && model != 0xc1) {
435 			do_cpuid(0x80000001, regs);
436 			bid = (regs[1] >> 9) & 0x1f;
437 			switch (model) {
438 			case 0x68: /* Socket S1g1 */
439 			case 0x6c:
440 			case 0x7c:
441 				break;
442 			case 0x6b: /* Socket AM2 and ASB1 (2 cores) */
443 				if (bid != 0x0b && bid != 0x0c)
444 					sc->sc_flags |=
445 					    AMDTEMP_FLAG_ALT_OFFSET;
446 				break;
447 			case 0x6f: /* Socket AM2 and ASB1 (1 core) */
448 			case 0x7f:
449 				if (bid != 0x07 && bid != 0x09 &&
450 				    bid != 0x0c)
451 					sc->sc_flags |=
452 					    AMDTEMP_FLAG_ALT_OFFSET;
453 				break;
454 			default:
455 				sc->sc_flags |= AMDTEMP_FLAG_ALT_OFFSET;
456 			}
457 			sc->sc_flags |= AMDTEMP_FLAG_CT_10BIT;
458 		}
459 
460 		/*
461 		 * There are two sensors per core.
462 		 */
463 		sc->sc_ntemps = 2;
464 		sc->sc_ccd_display = 0;
465 
466 		sc->sc_gettemp = amdtemp_gettemp0f;
467 		break;
468 	case 0x10:
469 		/*
470 		 * Erratum 319 Inaccurate Temperature Measurement
471 		 *
472 		 * http://support.amd.com/us/Processor_TechDocs/41322.pdf
473 		 */
474 		do_cpuid(0x80000001, regs);
475 		switch ((regs[1] >> 28) & 0xf) {
476 		case 0:	/* Socket F */
477 			erratum319 = 1;
478 			break;
479 		case 1:	/* Socket AM2+ or AM3 */
480 			if ((pci_cfgregread(pci_get_bus(dev),
481 			    pci_get_slot(dev), 2, AMDTEMP_DRAM_CONF_HIGH, 2) &
482 			    AMDTEMP_DRAM_MODE_DDR3) != 0 || model > 0x04 ||
483 			    (model == 0x04 && (cpuid & CPUID_STEPPING) >= 3))
484 				break;
485 			/* XXX 00100F42h (RB-C2) exists in both formats. */
486 			erratum319 = 1;
487 			break;
488 		}
489 		/* FALLTHROUGH */
490 	case 0x11:
491 	case 0x12:
492 	case 0x14:
493 	case 0x15:
494 	case 0x16:
495 		sc->sc_ntemps = 1;
496 		sc->sc_ccd_display = 1;
497 		/*
498 		 * Some later (60h+) models of family 15h use a similar SMN
499 		 * network as family 17h.  (However, the register index differs
500 		 * from 17h and the decoding matches other 10h-15h models,
501 		 * which differ from 17h.)
502 		 */
503 		if (family == 0x15 && model >= 0x60) {
504 			sc->sc_gettemp = amdtemp_gettemp15hm60h;
505 			needsmn = true;
506 		} else
507 			sc->sc_gettemp = amdtemp_gettemp;
508 		break;
509 	case 0x17:
510 	case 0x19:
511 		sc->sc_ntemps = 1;
512 		sc->sc_ccd_display = 1;
513 		sc->sc_gettemp = amdtemp_gettemp17to19h;
514 		switch(model) {
515 		case 0x10 ... 0x1f:
516 		case 0xa0 ... 0xaf:
517 		case 0x40 ... 0x4f:
518 			sc->sc_ccd_offset = 0x300;
519 			break;
520 		case 0x60 ... 0x6f:
521 		case 0x70 ... 0x7f:
522 			sc->sc_ccd_offset = 0x308;
523 			break;
524 		default:
525 			sc->sc_ccd_offset = 0x154;
526 			break;
527 		}
528 		needsmn = true;
529 		device_printf(dev, "sc_ccd_offset = %08x\n", sc->sc_ccd_offset);
530 		break;
531 	default:
532 		device_printf(dev, "Bogus family 0x%x\n", family);
533 		return (ENXIO);
534 	}
535 
536 	if (needsmn) {
537 		sc->sc_smn = device_find_child(
538 		    device_get_parent(dev), "amdsmn", -1);
539 		if (sc->sc_smn == NULL) {
540 			device_printf(dev, "No SMN device found\n");
541 			return (ENXIO);
542 		}
543 	}
544 
545 	/*
546 	 * Find number of cores per package.  XXX this does not work
547 	 * properly, it appears to be calculating the total number of cores.
548 	 */
549 
550 	sc->sc_ncores = (amd_feature2 & AMDID2_CMP) != 0 ?
551 	    (cpu_procinfo2 & AMDID_CMP_CORES) + 1 : 1;
552 	if (sc->sc_ncores > MAXCPU)
553 		return (ENXIO);
554 
555 	if (erratum319)
556 		device_printf(dev,
557 		    "Erratum 319: temperature measurement may be inaccurate\n");
558 	if (bootverbose)
559 		device_printf(dev, "Found %d cores and %d sensors.\n",
560 		    sc->sc_ncores,
561 		    sc->sc_ntemps > 1 ? sc->sc_ntemps * sc->sc_ncores : 1);
562 
563 	/*
564 	 * dev.amdtemp.N tree.
565 	 */
566 	unit = device_get_unit(dev);
567 	ksnprintf(tn, sizeof(tn), "dev.amdtemp.%d.sensor_offset", unit);
568 	TUNABLE_INT_FETCH(tn, &sc->sc_offset);
569 
570 	sysctlctx = device_get_sysctl_ctx(dev);
571 	SYSCTL_ADD_INT(sysctlctx,
572 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
573 	    "sensor_offset", CTLFLAG_RW, &sc->sc_offset, 0,
574 	    "Temperature sensor offset");
575 	sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
576 	    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
577 	    "core0", CTLFLAG_RD, 0, "Core 0");
578 
579 	SYSCTL_ADD_PROC(sysctlctx,
580 	    SYSCTL_CHILDREN(sysctlnode),
581 	    OID_AUTO, "sensor0",
582 	    CTLTYPE_INT | CTLFLAG_RD,
583 	    dev, CORE0_SENSOR0, amdtemp_sysctl, "IK",
584 	    "Core 0 / Sensor 0 temperature");
585 
586 	sc->sc_probed_regmask |= 1U << CORE0_SENSOR0;
587 
588 	if (family == 0x17)
589 		amdtemp_probe_ccd_sensors17h(dev, model);
590 	else if (family == 0x19)
591 		amdtemp_probe_ccd_sensors19h(dev, model);
592 	else if (sc->sc_ntemps > 1) {
593 		SYSCTL_ADD_PROC(sysctlctx,
594 		    SYSCTL_CHILDREN(sysctlnode),
595 		    OID_AUTO, "sensor1",
596 		    CTLTYPE_INT | CTLFLAG_RD,
597 		    dev, CORE0_SENSOR1, amdtemp_sysctl, "IK",
598 		    "Core 0 / Sensor 1 temperature");
599 
600 		sc->sc_probed_regmask |= 1U << CORE0_SENSOR1;
601 
602 		if (sc->sc_ncores > 1) {
603 			sysctlnode = SYSCTL_ADD_NODE(sysctlctx,
604 			    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
605 			    OID_AUTO, "core1", CTLFLAG_RD,
606 			    0, "Core 1");
607 
608 			SYSCTL_ADD_PROC(sysctlctx,
609 			    SYSCTL_CHILDREN(sysctlnode),
610 			    OID_AUTO, "sensor0",
611 			    CTLTYPE_INT | CTLFLAG_RD,
612 			    dev, CORE1_SENSOR0, amdtemp_sysctl, "IK",
613 			    "Core 1 / Sensor 0 temperature");
614 
615 			SYSCTL_ADD_PROC(sysctlctx,
616 			    SYSCTL_CHILDREN(sysctlnode),
617 			    OID_AUTO, "sensor1",
618 			    CTLTYPE_INT | CTLFLAG_RD,
619 			    dev, CORE1_SENSOR1, amdtemp_sysctl, "IK",
620 			    "Core 1 / Sensor 1 temperature");
621 
622 			sc->sc_probed_regmask |= 1U << CORE1_SENSOR0;
623 			sc->sc_probed_regmask |= 1U << CORE1_SENSOR1;
624 		}
625 	}
626 
627 	/*
628 	 * Try to create dev.cpu sysctl entries and setup intrhook function.
629 	 * This is needed because the cpu driver may be loaded late on boot,
630 	 * after us.
631 	 */
632 	amdtemp_intrhook(dev);
633 	sc->sc_ich.ich_func = amdtemp_intrhook;
634 	sc->sc_ich.ich_arg = dev;
635 	if (config_intrhook_establish(&sc->sc_ich) != 0) {
636 		device_printf(dev, "config_intrhook_establish failed!\n");
637 		return (ENXIO);
638 	}
639 
640 	return (0);
641 }
642 
643 void
644 amdtemp_intrhook(void *arg)
645 {
646 	struct amdtemp_softc *sc;
647 	struct sysctl_ctx_list *sysctlctx;
648 	device_t dev = (device_t)arg;
649 	device_t acpi, cpu, nexus;
650 	amdsensor_t sensor;
651 	int i;
652 	int j;
653 
654 	sc = device_get_softc(dev);
655 	if (sc->sc_ich.ich_arg == NULL)
656 		return;
657 
658 	/*
659 	 * dev.cpu.N.temperature.
660 	 */
661 	nexus = device_find_child(root_bus, "nexus", 0);
662 	acpi = device_find_child(nexus, "acpi", 0);
663 
664 	for (i = 0; i < sc->sc_ncores; i++) {
665 		if (sc->sc_sysctl_cpu[i] != NULL)
666 			continue;
667 		cpu = device_find_child(acpi, "cpu",
668 		    device_get_unit(dev) * sc->sc_ncores + i);
669 		if (cpu != NULL) {
670 			sysctlctx = device_get_sysctl_ctx(cpu);
671 
672 			sensor = sc->sc_ntemps > 1 ?
673 			    (i == 0 ? CORE0 : CORE1) : CORE0_SENSOR0;
674 			sc->sc_sysctl_cpu[i] = SYSCTL_ADD_PROC(sysctlctx,
675 			    SYSCTL_CHILDREN(device_get_sysctl_tree(cpu)),
676 			    OID_AUTO, "temperature",
677 			    CTLTYPE_INT | CTLFLAG_RD,
678 			    dev, sensor, amdtemp_sysctl, "IK",
679 			    "Current temparature");
680 		}
681 	}
682 	config_intrhook_disestablish(&sc->sc_ich);
683 
684 	/*
685 	 * sensor infrastructure.  Use [ncpus] for globally shared sensors
686 	 */
687 	sc->sc_sensorcpus = kmalloc(sizeof(*sc->sc_sensorcpus) *
688 				   (sc->sc_ncores + 1),
689 				   M_DEVBUF, M_WAITOK | M_ZERO);
690 
691 	for (i = 0; i <= sc->sc_ncores; i++) {
692 		struct sensorcpu *scpu = &sc->sc_sensorcpus[i];
693 
694 		if (i == 0)
695 			scpu->regmask = sc->sc_probed_regmask & 0x0003U;
696 		else if (i == 1)
697 			scpu->regmask = sc->sc_probed_regmask & 0x000CU;
698 		else if (i != sc->sc_ncores)
699 			scpu->regmask = 0;
700 		else
701 			scpu->regmask = sc->sc_probed_regmask & ~0xFU;
702 
703 		if (scpu->regmask == 0)
704 			continue;
705 
706 		if (sc->sc_ccd_display) {
707 			ksnprintf(scpu->sensordev.xname,
708 				  sizeof(scpu->sensordev.xname),
709 				  "die%d", device_get_unit(dev));
710 		} else {
711 			ksnprintf(scpu->sensordev.xname,
712 				  sizeof(scpu->sensordev.xname),
713 				  "cpu%d", i);
714 		}
715 
716 		scpu->dev = dev;
717 		scpu->sc = sc;
718 		scpu->sensors = kmalloc(sizeof(*scpu->sensors) * MAXSENSORS,
719 					M_DEVBUF, M_WAITOK | M_ZERO);
720 		for (j = 0; j < MAXSENSORS; ++j) {
721 			if ((scpu->regmask & (1U << j)) == 0)
722 				continue;
723 
724 			switch(j) {
725 			case CORE0_SENSOR0:
726 			case CORE0_SENSOR1:
727 			case CORE1_SENSOR0:
728 			case CORE1_SENSOR1:
729 				if (sc->sc_ccd_display) {
730 					ksnprintf(scpu->sensors[j].desc,
731 						  sizeof(scpu->sensors[0].desc),
732 						  "high temp");
733 				} else {
734 					ksnprintf(scpu->sensors[j].desc,
735 						  sizeof(scpu->sensors[0].desc),
736 						  "temp%d", j & 1);
737 				}
738 				break;
739 			case CORE0:
740 				ksnprintf(scpu->sensors[j].desc,
741 					  sizeof(scpu->sensors[0].desc),
742 					  "core0 rollup temp");
743 				break;
744 			case CORE1:
745 				ksnprintf(scpu->sensors[j].desc,
746 					  sizeof(scpu->sensors[0].desc),
747 					  "core1 rollup temp");
748 				break;
749 			case CCD_BASE ... CCD_MAX:
750 				ksnprintf(scpu->sensors[j].desc,
751 					  sizeof(scpu->sensors[0].desc),
752 					  "ccd%u temp", j - CCD_BASE);
753 				break;
754 			}
755 			scpu->sensors[j].type = SENSOR_TEMP;
756 			sensor_set_unknown(&scpu->sensors[j]);
757 			sensor_attach(&scpu->sensordev, &scpu->sensors[j]);
758 		}
759 		scpu->senstask = sensor_task_register2(scpu,
760 						       amdtemp_sensor_task,
761 						       2,
762 						       ((i < sc->sc_ncores) ?
763 							i : -1));
764 		sensordev_install(&scpu->sensordev);
765 	}
766 }
767 
768 int
769 amdtemp_detach(device_t dev)
770 {
771 	struct amdtemp_softc *sc = device_get_softc(dev);
772 	int i;
773 	int j;
774 
775 	for (i = 0; i < sc->sc_ncores; i++) {
776 		if (sc->sc_sysctl_cpu[i] != NULL)
777 			sysctl_remove_oid(sc->sc_sysctl_cpu[i], 1, 0);
778 	}
779 
780 	if (sc->sc_sensorcpus) {
781 		for (i = 0; i <= sc->sc_ncores; i++) {
782 			struct sensorcpu *scpu = &sc->sc_sensorcpus[i];
783 
784 			if (scpu->sensors) {
785 				for (j = 0; j < MAXSENSORS; ++j) {
786 					if ((scpu->regmask & (1U << j)) == 0)
787 						continue;
788 					sensor_detach(&scpu->sensordev,
789 						      &scpu->sensors[j]);
790 				}
791 				if (scpu->senstask) {
792 					sensor_task_unregister2(scpu->senstask);
793 					scpu->senstask = NULL;
794 				}
795 				sensordev_deinstall(&scpu->sensordev);
796 				kfree(scpu->sensors, M_DEVBUF);
797 				scpu->sensors = NULL;
798 			}
799 		}
800 		kfree(sc->sc_sensorcpus, M_DEVBUF);
801 		sc->sc_sensorcpus = NULL;
802 	}
803 
804 	/* NewBus removes the dev.amdtemp.N tree by itself. */
805 
806 	return (0);
807 }
808 
809 static int
810 amdtemp_sysctl(SYSCTL_HANDLER_ARGS)
811 {
812 	device_t dev = (device_t)arg1;
813 	struct amdtemp_softc *sc = device_get_softc(dev);
814 	amdsensor_t sensor = (amdsensor_t)arg2;
815 	int32_t auxtemp[2], temp;
816 	int error;
817 
818 	switch (sensor) {
819 	case CORE0:
820 		auxtemp[0] = sc->sc_gettemp(dev, CORE0_SENSOR0);
821 		auxtemp[1] = sc->sc_gettemp(dev, CORE0_SENSOR1);
822 		temp = imax(auxtemp[0], auxtemp[1]);
823 		break;
824 	case CORE1:
825 		auxtemp[0] = sc->sc_gettemp(dev, CORE1_SENSOR0);
826 		auxtemp[1] = sc->sc_gettemp(dev, CORE1_SENSOR1);
827 		temp = imax(auxtemp[0], auxtemp[1]);
828 		break;
829 	default:
830 		temp = sc->sc_gettemp(dev, sensor);
831 		break;
832 	}
833 	error = sysctl_handle_int(oidp, &temp, 0, req);
834 
835 	return (error);
836 }
837 
838 #define	AMDTEMP_ZERO_C_TO_K	2731
839 
840 static int32_t
841 amdtemp_gettemp0f(device_t dev, amdsensor_t sensor)
842 {
843 	struct amdtemp_softc *sc = device_get_softc(dev);
844 	uint32_t mask, offset, temp;
845 
846 	/* Set Sensor/Core selector. */
847 	temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 1);
848 	temp &= ~(AMDTEMP_TTSR_SELCORE | AMDTEMP_TTSR_SELSENSOR);
849 	switch (sensor) {
850 	case CORE0_SENSOR1:
851 		temp |= AMDTEMP_TTSR_SELSENSOR;
852 		/* FALLTHROUGH */
853 	case CORE0_SENSOR0:
854 	case CORE0:
855 		if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) != 0)
856 			temp |= AMDTEMP_TTSR_SELCORE;
857 		break;
858 	case CORE1_SENSOR1:
859 		temp |= AMDTEMP_TTSR_SELSENSOR;
860 		/* FALLTHROUGH */
861 	case CORE1_SENSOR0:
862 	case CORE1:
863 		if ((sc->sc_flags & AMDTEMP_FLAG_CS_SWAP) == 0)
864 			temp |= AMDTEMP_TTSR_SELCORE;
865 		break;
866 	default:
867 		__assert_unreachable();
868 	}
869 	pci_write_config(dev, AMDTEMP_THERMTP_STAT, temp, 1);
870 
871 	mask = (sc->sc_flags & AMDTEMP_FLAG_CT_10BIT) != 0 ? 0x3ff : 0x3fc;
872 	offset = (sc->sc_flags & AMDTEMP_FLAG_ALT_OFFSET) != 0 ? 28 : 49;
873 	temp = pci_read_config(dev, AMDTEMP_THERMTP_STAT, 4);
874 	temp = ((temp >> 14) & mask) * 5 / 2;
875 	temp += AMDTEMP_ZERO_C_TO_K + (sc->sc_offset - offset) * 10;
876 
877 	return (temp);
878 }
879 
880 static uint32_t
881 amdtemp_decode_fam10h_to_17h(int32_t sc_offset, uint32_t val, bool minus49)
882 {
883 	uint32_t temp;
884 
885 	/* Convert raw register subfield units (0.125C) to units of 0.1C. */
886 	temp = (val & AMDTEMP_REPTMP10H_CURTMP_MASK) * 5 / 4;
887 
888 	if (minus49)
889 		temp -= AMDTEMP_CURTMP_RANGE_ADJUST;
890 
891 	temp += AMDTEMP_ZERO_C_TO_K + sc_offset * 10;
892 	return (temp);
893 }
894 
895 static uint32_t
896 amdtemp_decode_fam10h_to_16h(int32_t sc_offset, uint32_t val)
897 {
898 	bool minus49;
899 
900 	/*
901 	 * On Family 15h and higher, if CurTmpTjSel is 11b, the range is
902 	 * adjusted down by 49.0 degrees Celsius.  (This adjustment is not
903 	 * documented in BKDGs prior to family 15h model 00h.)
904 	 */
905 	minus49 = (CPUID_TO_FAMILY(cpu_id) >= 0x15 &&
906 	    ((val >> AMDTEMP_REPTMP10H_TJSEL_SHIFT) &
907 	    AMDTEMP_REPTMP10H_TJSEL_MASK) == 0x3);
908 
909 	return (amdtemp_decode_fam10h_to_17h(sc_offset,
910 	    val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
911 }
912 
913 static uint32_t
914 amdtemp_decode_fam17h_tctl(int32_t sc_offset, uint32_t val)
915 {
916 	bool minus49;
917 
918 	minus49 = ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0);
919 	return (amdtemp_decode_fam10h_to_17h(sc_offset,
920 	    val >> AMDTEMP_REPTMP10H_CURTMP_SHIFT, minus49));
921 }
922 
923 static int32_t
924 amdtemp_gettemp(device_t dev, amdsensor_t sensor)
925 {
926 	struct amdtemp_softc *sc = device_get_softc(dev);
927 	uint32_t temp;
928 
929 	temp = pci_read_config(dev, AMDTEMP_REPTMP_CTRL, 4);
930 	return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, temp));
931 }
932 
933 static int32_t
934 amdtemp_gettemp15hm60h(device_t dev, amdsensor_t sensor)
935 {
936 	struct amdtemp_softc *sc = device_get_softc(dev);
937 	uint32_t val;
938 	int error;
939 
940 	error = amdsmn_read(sc->sc_smn, AMDTEMP_15H_M60H_REPTMP_CTRL, &val);
941 	KASSERT(error == 0, ("amdsmn_read"));
942 	return (amdtemp_decode_fam10h_to_16h(sc->sc_offset, val));
943 }
944 
945 static int32_t
946 amdtemp_gettemp17to19h(device_t dev, amdsensor_t sensor)
947 {
948 	struct amdtemp_softc *sc = device_get_softc(dev);
949 	uint32_t val;
950 	int error;
951 
952 	switch (sensor) {
953 	case CORE0_SENSOR0:
954 		/* Tctl */
955 		error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
956 		KASSERT(error == 0, ("amdsmn_read"));
957 		return (amdtemp_decode_fam17h_tctl(sc->sc_offset, val));
958 	case CCD_BASE ... CCD_MAX:
959 		/* Tccd<N> */
960 		error = amdsmn_read(sc->sc_smn,
961 				    AMDTEMP_17H_CUR_TMP +
962 				    sc->sc_ccd_offset +
963 				    (((int)sensor - CCD_BASE) * sizeof(val)),
964 				    &val);
965 		KASSERT(error == 0, ("amdsmn_read2"));
966 		KASSERT((val & AMDTEMP_17H_CCD_TMP_VALID) != 0,
967 		    ("sensor %d: not valid", (int)sensor));
968 		return (amdtemp_decode_fam10h_to_17h(sc->sc_offset, val, true));
969 	default:
970 		__assert_unreachable();
971 	}
972 }
973 
974 static void
975 amdtemp_probe_ccd_sensors17h(device_t dev, uint32_t model)
976 {
977 	char sensor_name[16], sensor_descr[32];
978 	struct amdtemp_softc *sc;
979 	uint32_t maxreg, i, val;
980 	int error;
981 
982 	switch (model) {
983 	case 0x00 ... 0x1f: /* Zen1, Zen+ */
984 		maxreg = 4;
985 		break;
986 	case 0x30 ... 0x3f: /* Zen2 TR/Epyc */
987 	case 0x60:	    /* Renoir */
988 	case 0x68:	    /* Lucienne */
989 	case 0x70 ... 0x7f: /* Zen2 Ryzen */
990 		maxreg = 8;
991 		_Static_assert((int)NUM_CCDS >= 8, "");
992 		break;
993 	case 0xa0 ... 0xaf: /* Zen3 ? */
994 		maxreg = 8;
995 		_Static_assert((int)NUM_CCDS >= 8, "");
996 		break;
997 	default:
998 		device_printf(dev,
999 		    "Unrecognized Family 17h Model: %02xh\n", model);
1000 		return;
1001 	}
1002 
1003 	sc = device_get_softc(dev);
1004 	for (i = 0; i < maxreg; i++) {
1005 		error = amdsmn_read(sc->sc_smn,
1006 				    AMDTEMP_17H_CUR_TMP +
1007 				    sc->sc_ccd_offset +
1008 				    (i * sizeof(val)),
1009 				    &val);
1010 		if (error != 0)
1011 			continue;
1012 		if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
1013 			continue;
1014 
1015 		ksnprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
1016 		ksnprintf(sensor_descr, sizeof(sensor_descr),
1017 		    "CCD %u temperature (Tccd%u)", i, i);
1018 
1019 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1020 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1021 		    sensor_name, CTLTYPE_INT | CTLFLAG_RD,
1022 		    dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
1023 
1024 		sc->sc_probed_regmask |= 1U << (CCD_BASE + i);
1025 	}
1026 }
1027 
1028 static void
1029 amdtemp_probe_ccd_sensors19h(device_t dev, uint32_t model)
1030 {
1031 	char sensor_name[16], sensor_descr[32];
1032 	struct amdtemp_softc *sc;
1033 	uint32_t maxreg, i, val;
1034 	int error;
1035 
1036 	device_printf(dev, "probe ccd sensors 19h %02x\n", model);
1037 
1038         switch (model) {
1039         case 0x00 ... 0x0f: /* Zen3 EPYC "Milan" */
1040         case 0x20 ... 0x2f: /* Zen3 Ryzen "Vermeer" */
1041         case 0x40 ... 0x4f:
1042         case 0x50 ... 0x5f:
1043         case 0x60 ... 0x6f:
1044         case 0x70 ... 0x7f:
1045                 maxreg = 8;
1046                 _Static_assert((int)NUM_CCDS >= 8, "");
1047                 break;
1048         case 0x10 ... 0x1f:
1049         case 0xa0 ... 0xaf:
1050                 maxreg = 12;
1051                 _Static_assert((int)NUM_CCDS >= 12, "");
1052 		break;
1053         default:
1054                 device_printf(dev,
1055                     "Unrecognized Family 19h Model: %02xh\n", model);
1056                 return;
1057         }
1058 
1059 	sc = device_get_softc(dev);
1060 	for (i = 0; i < maxreg; i++) {
1061 		error = amdsmn_read(sc->sc_smn,
1062 				    AMDTEMP_17H_CUR_TMP +
1063 				    sc->sc_ccd_offset +
1064 				    (i * sizeof(val)),
1065 				    &val);
1066 		device_printf(dev, "probe ccd%d error %d val=%08x\n",
1067 			      i, error, val);
1068 		if (error != 0)
1069 			continue;
1070 		if ((val & AMDTEMP_17H_CCD_TMP_VALID) == 0)
1071 			continue;
1072 
1073 		ksnprintf(sensor_name, sizeof(sensor_name), "ccd%u", i);
1074 		ksnprintf(sensor_descr, sizeof(sensor_descr),
1075 		    "CCD %u temperature (Tccd%u)", i, i);
1076 
1077 		SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev),
1078 		    SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
1079 		    sensor_name, CTLTYPE_INT | CTLFLAG_RD,
1080 		    dev, CCD_BASE + i, amdtemp_sysctl, "IK", sensor_descr);
1081 
1082 		sc->sc_probed_regmask |= 1U << (CCD_BASE + i);
1083 	}
1084 }
1085 
1086 static void
1087 amdtemp_sensor_task(void *sc_arg)
1088 {
1089 	struct sensorcpu *scpu = sc_arg;
1090 	struct amdtemp_softc *sc;
1091 	uint32_t mask;
1092 	int32_t temp;
1093 	int j;
1094 
1095 	sc = scpu->sc;
1096 	if (sc->sc_ich.ich_arg == NULL)
1097 		return;
1098 	mask = scpu->regmask;
1099 
1100 	for (j = 0; mask; ++j) {
1101 		if ((mask & (1U << j)) == 0)
1102 			continue;
1103 		temp = sc->sc_gettemp(scpu->dev, j);
1104 		sensor_set(&scpu->sensors[j], temp * 100000L, 0);
1105 		mask &= ~(1U << j);
1106 	}
1107 }
1108