xref: /dragonfly/sys/dev/misc/ecc/ecc_e3.c (revision b29f78b5)
1 /*
2  * Copyright (c) 2011 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41 
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 
47 #include <vm/pmap.h>
48 
49 #include "coremctl_if.h"
50 #include "pcib_if.h"
51 
52 #include <dev/misc/coremctl/coremctl_reg.h>
53 
54 #define ECC_E3_VER_1	1	/* Sandy Bridge */
55 #define ECC_E3_VER_2	2	/* Ivy Bridge */
56 #define ECC_E3_VER_3	3	/* Haswell */
57 
58 struct ecc_e3_type {
59 	uint16_t	did;
60 	const char	*desc;
61 	int		ver;		/* ECC_E3_VER_ */
62 };
63 
64 struct ecc_e3_softc {
65 	device_t	ecc_dev;
66 	device_t	ecc_parent;	/* non-NULL if parent has MCHBAR */
67 	struct callout	ecc_callout;
68 	int		ecc_ver;	/* ECC_E3_VER_ */
69 };
70 
71 #define ecc_printf(sc, fmt, arg...) \
72 	device_printf((sc)->ecc_dev, fmt , ##arg)
73 
74 static int	ecc_e3_probe(device_t);
75 static int	ecc_e3_attach(device_t);
76 static int	ecc_e3_detach(device_t);
77 static void	ecc_e3_shutdown(device_t);
78 
79 static void	ecc_e3_chaninfo(struct ecc_e3_softc *, uint32_t, const char *);
80 static void	ecc_e3_status(struct ecc_e3_softc *);
81 static void	ecc_e3_callout(void *);
82 static void	ecc_e3_errlog(struct ecc_e3_softc *);
83 static void	ecc_e3_errlog_ch(struct ecc_e3_softc *, int, int, const char *);
84 
85 static const struct ecc_e3_type ecc_e3_types[] = {
86 	{ PCI_E3V1_MEMCTL_DID, "Intel E3 ECC", ECC_E3_VER_1 },
87 	{ PCI_E3V2_MEMCTL_DID, "Intel E3 v2 ECC", ECC_E3_VER_2 },
88 	{ PCI_E3V3_MEMCTL_DID, "Intel E3 v3 ECC", ECC_E3_VER_3 },
89 	{ 0, NULL, 0 } /* required last entry */
90 };
91 
92 static device_method_t ecc_e3_methods[] = {
93 	/* Device interface */
94 	DEVMETHOD(device_probe,		ecc_e3_probe),
95 	DEVMETHOD(device_attach,	ecc_e3_attach),
96 	DEVMETHOD(device_detach,	ecc_e3_detach),
97 	DEVMETHOD(device_shutdown,	ecc_e3_shutdown),
98 	DEVMETHOD(device_suspend,	bus_generic_suspend),
99 	DEVMETHOD(device_resume,	bus_generic_resume),
100 	DEVMETHOD_END
101 };
102 
103 static driver_t ecc_e3_driver = {
104 	"ecc",
105 	ecc_e3_methods,
106 	sizeof(struct ecc_e3_softc)
107 };
108 static devclass_t ecc_devclass;
109 DRIVER_MODULE(ecc_e3, coremctl, ecc_e3_driver, ecc_devclass, NULL, NULL);
110 MODULE_DEPEND(ecc_e3, pci, 1, 1, 1);
111 MODULE_DEPEND(ecc_e3, coremctl, 1, 1, 1);
112 
113 static __inline uint32_t
114 CSR_READ_4(struct ecc_e3_softc *sc, int ofs)
115 {
116 	uint32_t val;
117 	int error;
118 
119 	error = COREMCTL_MCH_READ(sc->ecc_parent, ofs, &val);
120 	KASSERT(!error, ("mch read failed"));
121 
122 	return val;
123 }
124 
125 static int
126 ecc_e3_probe(device_t dev)
127 {
128 	const struct ecc_e3_type *t;
129 	uint16_t did;
130 
131 	if (pci_get_vendor(dev) != PCI_CORE_MEMCTL_VID)
132 		return ENXIO;
133 
134 	did = pci_get_device(dev);
135 	for (t = ecc_e3_types; t->desc != NULL; ++t) {
136 		if (t->did == did) {
137 			struct ecc_e3_softc *sc = device_get_softc(dev);
138 
139 			device_set_desc(dev, t->desc);
140 			sc->ecc_ver = t->ver;
141 			return 0;
142 		}
143 	}
144 	return ENXIO;
145 }
146 
147 static int
148 ecc_e3_attach(device_t dev)
149 {
150 	struct ecc_e3_softc *sc = device_get_softc(dev);
151 	uint32_t val;
152 	int error;
153 
154 	callout_init_mp(&sc->ecc_callout);
155 	sc->ecc_dev = dev;
156 
157 	/* Probe the existance of MCHBAR */
158 	error = COREMCTL_MCH_READ(device_get_parent(dev), MCH_CORE_DIMM_CH0,
159 	    &val);
160 	if (!error)
161 		sc->ecc_parent = device_get_parent(dev);
162 
163 	if (sc->ecc_parent != NULL) {
164 		uint32_t dimm_ch0, dimm_ch1;
165 		int ecc_active;
166 
167 		if (bootverbose) {
168 			ecc_printf(sc, "LOG0_C0 %#x\n",
169 			    CSR_READ_4(sc, MCH_E3_ERRLOG0_C0));
170 			ecc_printf(sc, "LOG0_C1 %#x\n",
171 			    CSR_READ_4(sc, MCH_E3_ERRLOG0_C1));
172 		}
173 
174 		dimm_ch0 = CSR_READ_4(sc, MCH_CORE_DIMM_CH0);
175 		dimm_ch1 = CSR_READ_4(sc, MCH_CORE_DIMM_CH1);
176 
177 		if (bootverbose) {
178 			ecc_e3_chaninfo(sc, dimm_ch0, "channel0");
179 			ecc_e3_chaninfo(sc, dimm_ch1, "channel1");
180 		}
181 
182 		ecc_active = 1;
183 		if (sc->ecc_ver == ECC_E3_VER_1 ||
184 		    sc->ecc_ver == ECC_E3_VER_2) {
185 			if (((dimm_ch0 | dimm_ch1) & MCH_E3_DIMM_ECC) ==
186 			    MCH_E3_DIMM_ECC_NONE) {
187 				ecc_active = 0;
188 				ecc_printf(sc, "No ECC active\n");
189 			}
190 		} else { /* v3 */
191 			uint32_t ecc_mode0, ecc_mode1;
192 
193 			ecc_mode0 = __SHIFTOUT(dimm_ch0, MCH_E3_DIMM_ECC);
194 			ecc_mode1 = __SHIFTOUT(dimm_ch1, MCH_E3_DIMM_ECC);
195 
196 			/*
197 			 * Only active ALL/NONE is supported
198 			 */
199 
200 			if (ecc_mode0 != MCH_E3_DIMM_ECC_NONE &&
201 			    ecc_mode0 != MCH_E3_DIMM_ECC_ALL) {
202 				ecc_active = 0;
203 				ecc_printf(sc, "channel0, invalid ECC "
204 				    "active 0x%x\n", ecc_mode0);
205 			}
206 			if (ecc_mode1 != MCH_E3_DIMM_ECC_NONE &&
207 			    ecc_mode1 != MCH_E3_DIMM_ECC_ALL) {
208 				ecc_active = 0;
209 				ecc_printf(sc, "channel1, invalid ECC "
210 				    "active 0x%x\n", ecc_mode1);
211 			}
212 
213 			if (ecc_mode0 == MCH_E3_DIMM_ECC_NONE &&
214 			    ecc_mode1 == MCH_E3_DIMM_ECC_NONE) {
215 				ecc_active = 0;
216 				ecc_printf(sc, "No ECC active\n");
217 			}
218 		}
219 
220 		if (!ecc_active)
221 			return 0;
222 	} else {
223 		ecc_printf(sc, "MCHBAR is not enabled\n");
224 	}
225 
226 	ecc_e3_status(sc);
227 	callout_reset(&sc->ecc_callout, hz, ecc_e3_callout, sc);
228 
229 	return 0;
230 }
231 
232 static void
233 ecc_e3_callout(void *xsc)
234 {
235 	struct ecc_e3_softc *sc = xsc;
236 
237 	ecc_e3_status(sc);
238 	callout_reset(&sc->ecc_callout, hz, ecc_e3_callout, sc);
239 }
240 
241 static void
242 ecc_e3_status(struct ecc_e3_softc *sc)
243 {
244 	device_t dev = sc->ecc_dev;
245 	uint16_t errsts;
246 
247 	errsts = pci_read_config(dev, PCI_E3_ERRSTS, 2);
248 	if (errsts & PCI_E3_ERRSTS_DMERR)
249 		ecc_printf(sc, "Uncorrectable multilple-bit ECC error\n");
250 	else if (errsts & PCI_E3_ERRSTS_DSERR)
251 		ecc_printf(sc, "Correctable single-bit ECC error\n");
252 
253 	if (errsts & (PCI_E3_ERRSTS_DSERR | PCI_E3_ERRSTS_DMERR)) {
254 		if (sc->ecc_parent != NULL)
255 			ecc_e3_errlog(sc);
256 
257 		/* Clear pending errors */
258 		pci_write_config(dev, PCI_E3_ERRSTS, errsts, 2);
259 	}
260 }
261 
262 static void
263 ecc_e3_chaninfo(struct ecc_e3_softc *sc, uint32_t dimm_ch, const char *desc)
264 {
265 	int size_a, size_b, ecc;
266 
267 	size_a = __SHIFTOUT(dimm_ch, MCH_CORE_DIMM_A_SIZE);
268 	size_b = __SHIFTOUT(dimm_ch, MCH_CORE_DIMM_B_SIZE);
269 	if (size_a == 0 && size_b == 0)
270 		return;
271 
272 	ecc = __SHIFTOUT(dimm_ch, MCH_E3_DIMM_ECC);
273 	if (ecc == MCH_E3_DIMM_ECC_NONE) {
274 		ecc_printf(sc, "%s, no ECC active\n", desc);
275 	} else if (ecc == MCH_E3_DIMM_ECC_ALL) {
276 		ecc_printf(sc, "%s, ECC active IO/logic\n", desc);
277 	} else {
278 		if (sc->ecc_ver == ECC_E3_VER_1 ||
279 		    sc->ecc_ver == ECC_E3_VER_2) {
280 			if (ecc == MCH_E3_DIMM_ECC_IO)
281 				ecc_printf(sc, "%s, ECC active IO\n", desc);
282 			else
283 				ecc_printf(sc, "%s, ECC active logic\n", desc);
284 		} else { /* v3 */
285 			ecc_printf(sc, "%s, invalid ECC active 0x%x\n",
286 			    desc, ecc);
287 		}
288 	}
289 }
290 
291 static void
292 ecc_e3_errlog(struct ecc_e3_softc *sc)
293 {
294 	ecc_e3_errlog_ch(sc, MCH_E3_ERRLOG0_C0, MCH_E3_ERRLOG1_C0,
295 	    "channel0");
296 	ecc_e3_errlog_ch(sc, MCH_E3_ERRLOG0_C1, MCH_E3_ERRLOG1_C1,
297 	    "channel1");
298 }
299 
300 static void
301 ecc_e3_errlog_ch(struct ecc_e3_softc *sc, int err0_ofs, int err1_ofs,
302     const char *desc)
303 {
304 	uint32_t err0, err1;
305 
306 	err0 = CSR_READ_4(sc, err0_ofs);
307 	if ((err0 & (MCH_E3_ERRLOG0_CERRSTS | MCH_E3_ERRLOG0_MERRSTS)) == 0)
308 		return;
309 
310 	err1 = CSR_READ_4(sc, err1_ofs);
311 
312 	ecc_printf(sc, "%s error @bank %d, rank %d, chunk %d, syndrome %d, "
313 	    "row %d, col %d\n", desc,
314 	    __SHIFTOUT(err0, MCH_E3_ERRLOG0_ERRBANK),
315 	    __SHIFTOUT(err0, MCH_E3_ERRLOG0_ERRRANK),
316 	    __SHIFTOUT(err0, MCH_E3_ERRLOG0_ERRCHUNK),
317 	    __SHIFTOUT(err0, MCH_E3_ERRLOG0_ERRSYND),
318 	    __SHIFTOUT(err1, MCH_E3_ERRLOG1_ERRROW),
319 	    __SHIFTOUT(err1, MCH_E3_ERRLOG1_ERRCOL));
320 }
321 
322 static int
323 ecc_e3_detach(device_t dev)
324 {
325 	struct ecc_e3_softc *sc = device_get_softc(dev);
326 
327 	callout_stop_sync(&sc->ecc_callout);
328 	return 0;
329 }
330 
331 static void
332 ecc_e3_shutdown(device_t dev)
333 {
334 	struct ecc_e3_softc *sc = device_get_softc(dev);
335 
336 	callout_stop_sync(&sc->ecc_callout);
337 }
338