xref: /openbsd/sys/arch/i386/i386/amd64errata.c (revision 4bdff4be)
1 /*	$OpenBSD: amd64errata.c,v 1.16 2022/10/10 03:01:11 jsg Exp $	*/
2 /*	$NetBSD: errata.c,v 1.6 2007/02/05 21:05:45 ad Exp $	*/
3 
4 /*-
5  * Copyright (c) 2007 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 /*
34  * Detect, report on, and work around known errata with AMD amd64 CPUs.
35  *
36  * This is generalised because there are quite a few problems that the
37  * BIOS can patch via MSR, but it is not known if the OS can patch these
38  * yet.  The list is expected to grow over time.
39  *
40  * The data here is from:
41  *
42  * Revision Guide for AMD Athlon 64 and AMD Opteron Processors (0Fh)
43  * Publication #25759, Revision: 3.79, Issue Date: July 2009
44  * BH-E4, CH-CG, CH-D0, DH-CG, DH-D0, DH-E3, DH-E6, JH-E1, JH-E6, SH-B0,
45  * SH-B3, SH-C0, SH-CG, SH-D0, SH-E4, SH-E5
46  *
47  * Revision Guide for AMD Family 10h Processors
48  * Publication #41322, Revision: 3.92, Issue Date: March 2012
49  * BL-C2, BL-C3, DA-C2, DA-C3, DR-B2, DR-B3, DR-BA, HY-D0, HY-D1,
50  * HY-D1-G34R1, PH-E0, RB-C2, RB-C3
51  *
52  * Revision Guide for AMD Family 12h Processors
53  * Publication #44739, Revision: 3.10, Issue Date: March 2012
54  * LN-B0
55  */
56 
57 #include <sys/param.h>
58 
59 #include <sys/systm.h>
60 
61 #include <machine/cpu.h>
62 #include <machine/cpufunc.h>
63 #include <machine/specialreg.h>
64 
65 typedef struct errata {
66 	u_short		e_num;
67 	u_short		e_reported;
68 	u_int		e_data1;
69 	const uint8_t	*e_set;
70 	int		(*e_act)(struct cpu_info *, struct errata *);
71 	uint64_t	e_data2;
72 } errata_t;
73 
74 typedef enum cpurev {
75 	BH_E4, CH_CG, CH_D0, DH_CG, DH_D0, DH_E3, DH_E6, JH_E1,
76 	JH_E6, SH_B0, SH_B3, SH_C0, SH_CG, SH_D0, SH_E4, SH_E5,
77 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
78 	DA_C3, HY_D0, HY_D1, PH_E0, LN_B0,
79 	OINK
80 } cpurev_t;
81 
82 static const u_int cpurevs[] = {
83 	BH_E4, 0x0020fb1, CH_CG, 0x0000f82, CH_CG, 0x0000fb2,
84 	CH_D0, 0x0010f80, CH_D0, 0x0010fb0, DH_CG, 0x0000fc0,
85 	DH_CG, 0x0000fe0, DH_CG, 0x0000ff0, DH_D0, 0x0010fc0,
86 	DH_D0, 0x0010ff0, DH_E3, 0x0020fc0, DH_E3, 0x0020ff0,
87 	DH_E6, 0x0020fc2, DH_E6, 0x0020ff2, JH_E1, 0x0020f10,
88 	JH_E6, 0x0020f12, JH_E6, 0x0020f32, SH_B0, 0x0000f40,
89 	SH_B3, 0x0000f51, SH_C0, 0x0000f48, SH_C0, 0x0000f58,
90 	SH_CG, 0x0000f4a, SH_CG, 0x0000f5a, SH_CG, 0x0000f7a,
91 	SH_D0, 0x0010f40, SH_D0, 0x0010f50, SH_D0, 0x0010f70,
92 	SH_E4, 0x0020f51, SH_E4, 0x0020f71, SH_E5, 0x0020f42,
93 	DR_BA, 0x0100f2a, DR_B2, 0x0100f22, DR_B3, 0x0100f23,
94 	RB_C2, 0x0100f42, RB_C3, 0x0100f43, BL_C2, 0x0100f52,
95 	BL_C3, 0x0100f53, DA_C2, 0x0100f62, DA_C3, 0x0100f63,
96 	HY_D0, 0x0100f80, HY_D1, 0x0100f81, HY_D1, 0x0100f91,
97 	PH_E0, 0x0100fa0, LN_B0, 0x0300f10, SH_B0, 0x0000f50,
98 	OINK
99 };
100 
101 static const uint8_t amd64_errata_set1[] = {
102 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, OINK
103 };
104 
105 #ifdef MULTIPROCESSOR
106 static const uint8_t amd64_errata_set2[] = {
107 	SH_B3, SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
108 };
109 #endif
110 
111 static const uint8_t amd64_errata_set3[] = {
112 	JH_E1, DH_E3, OINK
113 };
114 
115 #if 0
116 static const uint8_t amd64_errata_set4[] = {
117 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, JH_E1,
118 	DH_E3, SH_E4, BH_E4, SH_E5, DH_E6, JH_E6, OINK
119 };
120 #endif
121 
122 static const uint8_t amd64_errata_set5[] = {
123 	SH_B3, OINK
124 };
125 
126 static const uint8_t amd64_errata_set6[] = {
127 	SH_C0, SH_CG, DH_CG, CH_CG, OINK
128 };
129 
130 static const uint8_t amd64_errata_set7[] = {
131 	SH_C0, SH_CG, DH_CG, CH_CG, SH_D0, DH_D0, CH_D0, OINK
132 };
133 
134 static const uint8_t amd64_errata_set8[] = {
135 	BH_E4, CH_CG, CH_CG, CH_D0, CH_D0, DH_CG, DH_CG, DH_CG,
136 	DH_D0, DH_D0, DH_E3, DH_E3, DH_E6, DH_E6, JH_E1, JH_E6,
137 	JH_E6, SH_B0, SH_B3, SH_C0, SH_C0, SH_CG, SH_CG, SH_CG,
138 	SH_D0, SH_D0, SH_D0, SH_E4, SH_E4, SH_E5, OINK
139 };
140 
141 static const uint8_t amd64_errata_set9[] = {
142 	DR_BA, DR_B2, DR_B3, RB_C2, RB_C3, BL_C2, BL_C3, DA_C2,
143 	DA_C3, HY_D0, HY_D1, PH_E0, LN_B0, OINK
144 };
145 
146 int amd64_errata_setmsr(struct cpu_info *, errata_t *);
147 int amd64_errata_testmsr(struct cpu_info *, errata_t *);
148 
149 static errata_t errata[] = {
150 	/*
151 	 * 81: Cache Coherency Problem with Hardware Prefetching
152 	 * and Streaming Stores
153 	 */
154 	{
155 		81, 0, MSR_DC_CFG, amd64_errata_set5,
156 		amd64_errata_testmsr, DC_CFG_DIS_SMC_CHK_BUF
157 	},
158 	/*
159 	 * 86: DRAM Data Masking Feature Can Cause ECC Failures
160 	 */
161 	{
162 		86, 0, MSR_NB_CFG, amd64_errata_set1,
163 		amd64_errata_testmsr, NB_CFG_DISDATMSK
164 	},
165 	/*
166 	 * 89: Potential Deadlock With Locked Transactions
167 	 */
168 	{
169 		89, 0, MSR_NB_CFG, amd64_errata_set8,
170 		amd64_errata_testmsr, NB_CFG_DISIOREQLOCK
171 	},
172 	/*
173 	 * 94: Sequential Prefetch Feature May Cause Incorrect
174 	 * Processor Operation
175 	 */
176 	{
177 		94, 0, MSR_IC_CFG, amd64_errata_set1,
178 		amd64_errata_testmsr, IC_CFG_DIS_SEQ_PREFETCH
179 	},
180 	/*
181 	 * 97: 128-Bit Streaming Stores May Cause Coherency
182 	 * Failure
183 	 *
184 	 * XXX "This workaround must not be applied to processors
185 	 * prior to revision C0."  We don't apply it, but if it
186 	 * can't be applied, it shouldn't be reported.
187 	 */
188 	{
189 		97, 0, MSR_DC_CFG, amd64_errata_set6,
190 		amd64_errata_testmsr, DC_CFG_DIS_CNV_WC_SSO
191 	},
192 	/*
193 	 * 104: DRAM Data Masking Feature Causes ChipKill ECC
194 	 * Failures When Enabled With x8/x16 DRAM Devices
195 	 */
196 	{
197 		104, 0, MSR_NB_CFG, amd64_errata_set7,
198 		amd64_errata_testmsr, NB_CFG_DISDATMSK
199 	},
200 	/*
201 	 * 113: Enhanced Write-Combining Feature Causes System Hang
202 	 */
203 	{
204 		113, 0, MSR_BU_CFG, amd64_errata_set3,
205 		amd64_errata_setmsr, BU_CFG_WBENHWSBDIS
206 	},
207 #ifdef MULTIPROCESSOR
208 	/*
209 	 * 69: Multiprocessor Coherency Problem with Hardware
210 	 * Prefetch Mechanism
211 	 */
212 	{
213 		69, 0, MSR_BU_CFG, amd64_errata_set5,
214 		amd64_errata_setmsr, BU_CFG_WBPFSMCCHKDIS
215 	},
216 	/*
217 	 * 101: DRAM Scrubber May Cause Data Corruption When Using
218 	 * Node-Interleaved Memory
219 	 */
220 	{
221 		101, 0, 0, amd64_errata_set2,
222 		NULL, 0
223 	},
224 	/*
225 	 * 106: Potential Deadlock with Tightly Coupled Semaphores
226 	 * in an MP System
227 	 */
228 	{
229 		106, 0, MSR_LS_CFG, amd64_errata_set2,
230 		amd64_errata_testmsr, LS_CFG_DIS_LS2_SQUISH
231 	},
232 	/*
233 	 * 107: Possible Multiprocessor Coherency Problem with
234 	 * Setting Page Table A/D Bits
235 	 */
236 	{
237 		107, 0, MSR_BU_CFG, amd64_errata_set2,
238 		amd64_errata_testmsr, BU_CFG_THRL2IDXCMPDIS
239 	},
240 #if 0
241 	/*
242 	 * 122: TLB Flush Filter May Cause Coherency Problem in
243 	 * Multiprocessor Systems
244 	 */
245 	{
246 		122, 0, MSR_HWCR, amd64_errata_set4,
247 		amd64_errata_setmsr, HWCR_FFDIS
248 	},
249 #endif
250 #endif	/* MULTIPROCESSOR */
251 	/*
252 	 * 721: Processor May Incorrectly Update Stack Pointer
253 	 */
254 	{
255 		721, 0, MSR_DE_CFG, amd64_errata_set9,
256 		amd64_errata_setmsr, DE_CFG_721
257 	},
258 };
259 
260 int
261 amd64_errata_testmsr(struct cpu_info *ci, errata_t *e)
262 {
263 	uint64_t val;
264 
265 	(void)ci;
266 
267 	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
268 	if ((val & e->e_data2) != 0)
269 		return 0;		/* not found */
270 
271 	e->e_reported = 1;
272 	return 1;			/* found */
273 }
274 
275 int
276 amd64_errata_setmsr(struct cpu_info *ci, errata_t *e)
277 {
278 	uint64_t val;
279 
280 	(void)ci;
281 
282 	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
283 	if ((val & e->e_data2) != 0)
284 		return 0;		/* not found */
285 
286 	wrmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE, val | e->e_data2);
287 
288 #ifdef ERRATA_DEBUG
289 	printf("ERRATA: writing a fix\n");
290 	val = rdmsr_locked(e->e_data1, OPTERON_MSR_PASSCODE);
291 	if ((val & e->e_data2) != 0)
292 		printf("ERRATA: fix seems to have worked!\n");
293 #endif
294 
295 	e->e_reported = 1;
296 	return 2;			/* found and fixed */
297 }
298 
299 void
300 amd64_errata(struct cpu_info *ci)
301 {
302 	errata_t *e, *ex;
303 	cpurev_t rev;
304 	int i, j;
305 	int rc;
306 	int found = 0;
307 	int corrected = 0;
308 	u_int32_t regs[4];
309 	static int printed = 0;
310 
311 	cpuid(0x80000001, regs);
312 
313 	for (i = 0; ; i += 2) {
314 		if ((rev = cpurevs[i]) == OINK) {
315 #ifdef ERRATA_DEBUG
316 			printf("ERRATA: this CPU ok\n");
317 #endif
318 			return;
319 		}
320 		if (cpurevs[i + 1] == regs[0]) {
321 #ifdef ERRATA_DEBUG
322 			printf("ERRATA: this CPU has errata\n");
323 #endif
324 			break;
325 		}
326 	}
327 
328 	ex = errata + sizeof(errata) / sizeof(errata[0]);
329 
330 	/* Reset e_reporteds (for multiple CPUs) */
331 	for (e = errata; e < ex; e++)
332 		e->e_reported = 0;
333 
334 	for (e = errata; e < ex; e++) {
335 		if (e->e_reported)
336 			continue;
337 		if (e->e_set != NULL) {
338 			for (j = 0; e->e_set[j] != OINK; j++)
339 				if (e->e_set[j] == rev)
340 					break;
341 			if (e->e_set[j] == OINK)
342 				continue;
343 		}
344 
345 #ifdef ERRATA_DEBUG
346 		printf("%s: testing for erratum %d\n",
347 		    ci->ci_dev->dv_xname, e->e_num);
348 #endif
349 
350 		/*
351 		 * If we have an action routine, call it, otherwise
352 		 * the default is that this erratum is present.
353 		 */
354 		rc = (e->e_act == NULL) ? 1 : (*e->e_act)(ci, e);
355 
356 		if (rc == 0)			/* not found */
357 			continue;
358 		if (rc == 1)
359 			found++;
360 		if (rc == 2)
361 			corrected++;
362 
363 		e->e_reported = rc;
364 
365 #ifdef ERRATA_DEBUG
366 		printf("%s: erratum %d present%s\n",
367 		    ci->ci_dev->dv_xname, e->e_num,
368 		    (rc == 2) ? " and patched" : "");
369 #endif
370 	}
371 
372 #define ERRATA_VERBOSE
373 #ifdef ERRATA_VERBOSE
374 	if (corrected) {
375 		int first = 1;
376 
377 		/* Print out found and corrected */
378 		if (!printed) {
379 			printf("%s: AMD %s", ci->ci_dev->dv_xname,
380 			    (corrected == 1) ? "erratum" : "errata");
381 		}
382 		for (e = errata; e < ex; e++) {
383 			if (e->e_reported == 2) {
384 				if (!printed) {
385 					if (! first)
386 						printf(",");
387 					printf(" %d", e->e_num);
388 				}
389 				first = 0;
390 			}
391 		}
392 		if (!printed)
393 			printf(" detected and fixed\n");
394 	}
395 #endif
396 
397 	if (found) {
398 		int first = 1;
399 
400 		/* Print out found but not corrected */
401 		if (!printed) {
402 			printf("%s: AMD %s", ci->ci_dev->dv_xname,
403 			    (found == 1) ? "erratum" : "errata");
404 		}
405 		for (e = errata; e < ex; e++) {
406 			if (e->e_reported == 1) {
407 				if (!printed) {
408 					if (! first)
409 						printf(",");
410 					printf(" %d", e->e_num);
411 				}
412 				first = 0;
413 			}
414 		}
415 		if (!printed)
416 			printf(" present, BIOS upgrade may be required\n");
417 	}
418 
419 	/* Print only one time for the first CPU */
420 	printed = 1;
421 }
422