xref: /freebsd/sys/dev/amd_ecc_inject/ecc_inject.c (revision e17f5b1d)
1 /*-
2  * Copyright (c) 2017 Andriy Gapon
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/bus.h>
32 #include <sys/kernel.h>
33 #include <sys/conf.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/sysctl.h>
37 #include <sys/types.h>
38 
39 #include <dev/pci/pcivar.h>
40 
41 #include <vm/vm.h>
42 #include <vm/vm_extern.h>
43 #include <vm/vm_kern.h>
44 
45 #include <machine/cputypes.h>
46 #include <machine/md_var.h>
47 
48 
49 /*
50  * See BKDG for AMD Family 15h Models 00h-0Fh Processors
51  * (publication 42301 Rev 3.08 - March 12, 2012):
52  * - 2.13.3.1 DRAM Error Injection
53  * - D18F3xB8 NB Array Address
54  * - D18F3xBC NB Array Data Port
55  * - D18F3xBC_x8 DRAM ECC
56  */
57 #define	NB_MCA_CFG		0x44
58 #define		DRAM_ECC_EN	(1 << 22)
59 #define	NB_MCA_EXTCFG		0x180
60 #define		ECC_SYMB_SZ	(1 << 25)
61 #define	NB_ARRAY_ADDR		0xb8
62 #define		DRAM_ECC_SEL	(0x8 << 28)
63 #define		QUADRANT_SHIFT	1
64 #define		QUADRANT_MASK	0x3
65 #define	NB_ARRAY_PORT		0xbc
66 #define		INJ_WORD_SHIFT	20
67 #define		INJ_WORD_MASK	0x1ff
68 #define		DRAM_ERR_EN	(1 << 18)
69 #define		DRAM_WR_REQ	(1 << 17)
70 #define		DRAM_RD_REQ	(1 << 16)
71 #define		INJ_VECTOR_MASK	0xffff
72 
73 static void ecc_ei_inject(int);
74 
75 static device_t nbdev;
76 static int delay_ms = 0;
77 static int quadrant = 0;	/* 0 - 3 */
78 static int word_mask = 0x001;	/* 9 bits: 8 + 1 for ECC */
79 static int bit_mask = 0x0001;	/* 16 bits */
80 
81 static int
82 sysctl_int_with_max(SYSCTL_HANDLER_ARGS)
83 {
84 	u_int value;
85 	int error;
86 
87 	value = *(u_int *)arg1;
88 	error = sysctl_handle_int(oidp, &value, 0, req);
89 	if (error || req->newptr == NULL)
90 		return (error);
91 	if (value > arg2)
92 		return (EINVAL);
93 	*(u_int *)arg1 = value;
94 	return (0);
95 }
96 
97 static int
98 sysctl_nonzero_int_with_max(SYSCTL_HANDLER_ARGS)
99 {
100 	u_int value;
101 	int error;
102 
103 	value = *(u_int *)arg1;
104 	error = sysctl_int_with_max(oidp, &value, arg2, req);
105 	if (error || req->newptr == NULL)
106 		return (error);
107 	if (value == 0)
108 		return (EINVAL);
109 	*(u_int *)arg1 = value;
110 	return (0);
111 }
112 
113 static int
114 sysctl_proc_inject(SYSCTL_HANDLER_ARGS)
115 {
116 	int error;
117 	int i;
118 
119 	i = 0;
120 	error = sysctl_handle_int(oidp, &i, 0, req);
121 	if (error)
122 		return (error);
123 	if (i != 0)
124 		ecc_ei_inject(i);
125 	return (0);
126 }
127 
128 static SYSCTL_NODE(_hw, OID_AUTO, error_injection,
129     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
130     "Hardware error injection");
131 static SYSCTL_NODE(_hw_error_injection, OID_AUTO, dram_ecc,
132     CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
133     "DRAM ECC error injection");
134 SYSCTL_UINT(_hw_error_injection_dram_ecc, OID_AUTO, delay,
135     CTLTYPE_UINT | CTLFLAG_RW, &delay_ms, 0,
136     "Delay in milliseconds between error injections");
137 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, quadrant,
138     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &quadrant, QUADRANT_MASK,
139     sysctl_int_with_max, "IU",
140     "Index of 16-byte quadrant within 64-byte line where errors "
141     "should be injected");
142 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, word_mask,
143     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &word_mask, INJ_WORD_MASK,
144     sysctl_nonzero_int_with_max, "IU",
145     "9-bit mask of words where errors should be injected (8 data + 1 ECC)");
146 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, bit_mask,
147     CTLTYPE_UINT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, &bit_mask, INJ_VECTOR_MASK,
148     sysctl_nonzero_int_with_max, "IU",
149     "16-bit mask of bits within each selected word where errors "
150     "should be injected");
151 SYSCTL_PROC(_hw_error_injection_dram_ecc, OID_AUTO, inject,
152     CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, NULL, 0, sysctl_proc_inject, "I",
153     "Inject a number of errors according to configured parameters");
154 
155 static void
156 ecc_ei_inject_one(void *arg, size_t size)
157 {
158 	volatile uint64_t *memory = arg;
159 	uint32_t val;
160 	int i;
161 
162 	val = DRAM_ECC_SEL | (quadrant << QUADRANT_SHIFT);
163 	pci_write_config(nbdev, NB_ARRAY_ADDR, val, 4);
164 
165 	val = (word_mask << INJ_WORD_SHIFT) | DRAM_WR_REQ | bit_mask;
166 	pci_write_config(nbdev, NB_ARRAY_PORT, val, 4);
167 
168 	for (i = 0; i < size / sizeof(uint64_t); i++) {
169 		memory[i] = 0;
170 		val = pci_read_config(nbdev, NB_ARRAY_PORT, 4);
171 		if ((val & DRAM_WR_REQ) == 0)
172 			break;
173 	}
174 	for (i = 0; i < size / sizeof(uint64_t); i++)
175 		memory[0] = memory[i];
176 }
177 
178 static void
179 ecc_ei_inject(int count)
180 {
181 	vm_offset_t memory;
182 	int injected;
183 
184 	KASSERT((quadrant & ~QUADRANT_MASK) == 0,
185 	    ("quadrant value is outside of range: %u", quadrant));
186 	KASSERT(word_mask != 0 && (word_mask & ~INJ_WORD_MASK) == 0,
187 	    ("word mask value is outside of range: 0x%x", word_mask));
188 	KASSERT(bit_mask != 0 && (bit_mask & ~INJ_VECTOR_MASK) == 0,
189 	    ("bit mask value is outside of range: 0x%x", bit_mask));
190 
191 	memory = kmem_alloc_attr(PAGE_SIZE, M_WAITOK, 0, ~0,
192 	    VM_MEMATTR_UNCACHEABLE);
193 
194 	for (injected = 0; injected < count; injected++) {
195 		ecc_ei_inject_one((void*)memory, PAGE_SIZE);
196 		if (delay_ms != 0 && injected != count - 1)
197 			pause_sbt("ecc_ei_inject", delay_ms * SBT_1MS, 0, 0);
198 	}
199 
200 	kmem_free(memory, PAGE_SIZE);
201 }
202 
203 static int
204 ecc_ei_load(void)
205 {
206 	uint32_t val;
207 
208 	if ((cpu_vendor_id != CPU_VENDOR_AMD || CPUID_TO_FAMILY(cpu_id) < 0x10) &&
209 	    cpu_vendor_id != CPU_VENDOR_HYGON) {
210 		printf("DRAM ECC error injection is not supported\n");
211 		return (ENXIO);
212 	}
213 	nbdev = pci_find_bsf(0, 24, 3);
214 	if (nbdev == NULL) {
215 		printf("Couldn't find NB PCI device\n");
216 		return (ENXIO);
217 	}
218 	val = pci_read_config(nbdev, NB_MCA_CFG, 4);
219 	if ((val & DRAM_ECC_EN) == 0) {
220 		printf("DRAM ECC is not supported or disabled\n");
221 		return (ENXIO);
222 	}
223 	printf("DRAM ECC error injection support loaded\n");
224 	return (0);
225 }
226 
227 static int
228 tsc_modevent(module_t mod __unused, int type, void *data __unused)
229 {
230 	int error;
231 
232 	error = 0;
233 	switch (type) {
234 	case MOD_LOAD:
235 		error = ecc_ei_load();
236 		break;
237 	case MOD_UNLOAD:
238 	case MOD_SHUTDOWN:
239 		break;
240 	default:
241 		return (EOPNOTSUPP);
242 	}
243 	return (0);
244 }
245 
246 DEV_MODULE(tsc, tsc_modevent, NULL);
247