xref: /dragonfly/sys/dev/misc/ecc/ecc_e5.c (revision 2249b4bc)
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41 
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 #include <bus/pci/pcib_private.h>
47 
48 #include "pcib_if.h"
49 
50 #include <dev/misc/ecc/ecc_e5_reg.h>
51 
52 #define UBOX_READ(dev, ofs, w)				\
53 	pcib_read_config((dev), pci_get_bus((dev)),	\
54 	    PCISLOT_E5_UBOX0, PCIFUNC_E5_UBOX0, (ofs), w)
55 #define UBOX_READ_2(dev, ofs)		UBOX_READ((dev), (ofs), 2)
56 #define UBOX_READ_4(dev, ofs)		UBOX_READ((dev), (ofs), 4)
57 
58 #define IMC_CPGC_READ(dev, ofs, w)			\
59 	pcib_read_config((dev), pci_get_bus((dev)),	\
60 	    PCISLOT_E5_IMC_CPGC, PCIFUNC_E5_IMC_CPGC, (ofs), w)
61 #define IMC_CPGC_READ_2(dev, ofs)	IMC_CPGC_READ((dev), (ofs), 2)
62 #define IMC_CPGC_READ_4(dev, ofs)	IMC_CPGC_READ((dev), (ofs), 4)
63 
64 #define IMC_CTAD_READ(dev, c, ofs, w)			\
65 	pcib_read_config((dev), pci_get_bus((dev)),	\
66 	    PCISLOT_E5_IMC_CTAD, PCIFUNC_E5_IMC_CTAD((c)), (ofs), w)
67 #define IMC_CTAD_READ_2(dev, c, ofs)	IMC_CTAD_READ((dev), (c), (ofs), 2)
68 #define IMC_CTAD_READ_4(dev, c, ofs)	IMC_CTAD_READ((dev), (c), (ofs), 4)
69 
70 struct ecc_e5_type {
71 	uint16_t	did;
72 	int		slot;
73 	int		func;
74 	int		chan;
75 	const char	*desc;
76 };
77 
78 struct ecc_e5_rank {
79 	int		rank_dimm;	/* owner dimm */
80 	int		rank_dimm_rank;	/* rank within the owner dimm */
81 };
82 
83 struct ecc_e5_softc {
84 	device_t		ecc_dev;
85 	int			ecc_chan;
86 	int			ecc_node;
87 	int			ecc_rank_cnt;
88 	struct ecc_e5_rank	ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
89 	struct callout		ecc_callout;
90 };
91 
92 #define ecc_printf(sc, fmt, arg...) \
93 	device_printf((sc)->ecc_dev, fmt , ##arg)
94 
95 static int	ecc_e5_probe(device_t);
96 static int	ecc_e5_attach(device_t);
97 static int	ecc_e5_detach(device_t);
98 static void	ecc_e5_shutdown(device_t);
99 
100 static void	ecc_e5_callout(void *);
101 
102 #define ECC_E5_TYPE_V2(c) \
103 { \
104 	.did	= PCI_E5_IMC_ERROR_CHN##c##_DID_ID, \
105 	.slot	= PCISLOT_E5_IMC_ERROR, \
106 	.func	= PCIFUNC_E5_IMC_ERROR_CHN##c, \
107 	.chan	= c, \
108 	.desc	= "Intel E5 v2 ECC" \
109 }
110 
111 #define ECC_E5_TYPE_END		{ 0, 0, 0, 0, NULL }
112 
113 static const struct ecc_e5_type ecc_types[] = {
114 	ECC_E5_TYPE_V2(0),
115 	ECC_E5_TYPE_V2(1),
116 	ECC_E5_TYPE_V2(2),
117 	ECC_E5_TYPE_V2(3),
118 
119 	ECC_E5_TYPE_END
120 };
121 
122 #undef ECC_E5_TYPE_V2
123 #undef ECC_E5_TYPE_END
124 
125 static device_method_t ecc_e5_methods[] = {
126 	/* Device interface */
127 	DEVMETHOD(device_probe,		ecc_e5_probe),
128 	DEVMETHOD(device_attach,	ecc_e5_attach),
129 	DEVMETHOD(device_detach,	ecc_e5_detach),
130 	DEVMETHOD(device_shutdown,	ecc_e5_shutdown),
131 	DEVMETHOD(device_suspend,	bus_generic_suspend),
132 	DEVMETHOD(device_resume,	bus_generic_resume),
133 	DEVMETHOD_END
134 };
135 
136 static driver_t ecc_e5_driver = {
137 	"ecc",
138 	ecc_e5_methods,
139 	sizeof(struct ecc_e5_softc)
140 };
141 static devclass_t ecc_devclass;
142 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
143 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
144 
145 static int
146 ecc_e5_probe(device_t dev)
147 {
148 	const struct ecc_e5_type *t;
149 	uint16_t vid, did;
150 	int slot, func;
151 
152 	vid = pci_get_vendor(dev);
153 	if (vid != PCI_E5_VID_ID)
154 		return ENXIO;
155 
156 	did = pci_get_device(dev);
157 	slot = pci_get_slot(dev);
158 	func = pci_get_function(dev);
159 
160 	for (t = ecc_types; t->desc != NULL; ++t) {
161 		if (t->did == did && t->slot == slot && t->func == func) {
162 			struct ecc_e5_softc *sc = device_get_softc(dev);
163 			char desc[32];
164 			uint32_t val;
165 			int node, dimm;
166 
167 			/* Check CPGC vid/did */
168 			if (IMC_CPGC_READ_2(dev, PCIR_VENDOR) !=
169 			    PCI_E5_VID_ID ||
170 			    IMC_CPGC_READ_2(dev, PCIR_DEVICE) !=
171 			    PCI_E5_IMC_CPGC_DID_ID)
172 				break;
173 
174 			/* Is this channel disabled */
175 			val = IMC_CPGC_READ_4(dev, PCI_E5_IMC_CPGC_MCMTR);
176 			if (val & PCI_E5_IMC_CPGC_MCMTR_CHN_DISABLE(t->chan))
177 				break;
178 
179 			/* Check CTAD vid/did */
180 			if (IMC_CTAD_READ_2(dev, t->chan, PCIR_VENDOR) !=
181 			    PCI_E5_VID_ID ||
182 			    IMC_CTAD_READ_2(dev, t->chan, PCIR_DEVICE) !=
183 			    PCI_E5_IMC_CTAD_DID_ID(t->chan))
184 				break;
185 
186 			/* Are there any DIMMs populated? */
187 			for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
188 				val = IMC_CTAD_READ_4(dev, t->chan,
189 				    PCI_E5_IMC_CTAD_DIMMMTR(dimm));
190 				if (val & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP)
191 					break;
192 			}
193 			if (dimm == PCI_E5_IMC_DIMM_MAX)
194 				break;
195 
196 			/* Check UBOX vid/did */
197 			if (UBOX_READ_2(dev, PCIR_VENDOR) != PCI_E5_VID_ID ||
198 			    UBOX_READ_2(dev, PCIR_DEVICE) !=
199 			    PCI_E5_UBOX0_DID_ID)
200 				break;
201 
202 			val = UBOX_READ_4(dev, PCI_E5_UBOX0_CPUNODEID);
203 			node = __SHIFTOUT(val,
204 			    PCI_E5_UBOX0_CPUNODEID_LCLNODEID);
205 
206 			ksnprintf(desc, sizeof(desc), "%s node%d channel%d",
207 			    t->desc, node, t->chan);
208 			device_set_desc_copy(dev, desc);
209 
210 			sc->ecc_chan = t->chan;
211 			sc->ecc_node = node;
212 			return 0;
213 		}
214 	}
215 	return ENXIO;
216 }
217 
218 static int
219 ecc_e5_attach(device_t dev)
220 {
221 	struct ecc_e5_softc *sc = device_get_softc(dev);
222 	uint32_t mcmtr;
223 	int dimm, rank;
224 
225 	callout_init_mp(&sc->ecc_callout);
226 	sc->ecc_dev = dev;
227 
228 	mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, PCI_E5_IMC_CPGC_MCMTR);
229 	if (bootverbose) {
230 		if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
231 		    PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3)
232 			ecc_printf(sc, "native DDR3\n");
233 	}
234 
235 	rank = 0;
236 	for (dimm = 0; dimm < PCI_E5_IMC_DIMM_MAX; ++dimm) {
237 		const char *width;
238 		uint32_t dimmmtr;
239 		int rank_cnt, r;
240 		int density;
241 		int val;
242 
243 		dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
244 		    PCI_E5_IMC_CTAD_DIMMMTR(dimm));
245 
246 		if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
247 			continue;
248 
249 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
250 		switch (val) {
251 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
252 			rank_cnt = 1;
253 			break;
254 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
255 			rank_cnt = 2;
256 			break;
257 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
258 			rank_cnt = 4;
259 			break;
260 		default:
261 			ecc_printf(sc, "unknown rank count 0x%x\n", val);
262 			return ENXIO;
263 		}
264 
265 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
266 		switch (val) {
267 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
268 			width = "x4";
269 			break;
270 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
271 			width = "x8";
272 			break;
273 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
274 			width = "x16";
275 			break;
276 		default:
277 			ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
278 			return ENXIO;
279 		}
280 
281 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
282 		switch (val) {
283 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
284 			density = 1;
285 			break;
286 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
287 			density = 2;
288 			break;
289 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
290 			density = 4;
291 			break;
292 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
293 			density = 8;
294 			break;
295 		default:
296 			ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
297 			return ENXIO;
298 		}
299 
300 		if (bootverbose) {
301 			ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
302 			    dimm, density * rank_cnt * 2,
303 			    rank_cnt, width, density);
304 		}
305 
306 		for (r = 0; r < rank_cnt; ++r) {
307 			struct ecc_e5_rank *rk;
308 
309 			if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
310 				ecc_printf(sc, "too many ranks\n");
311 				return ENXIO;
312 			}
313 			rk = &sc->ecc_rank[rank];
314 
315 			rk->rank_dimm = dimm;
316 			rk->rank_dimm_rank = r;
317 
318 			++rank;
319 		}
320 	}
321 	sc->ecc_rank_cnt = rank;
322 
323 	if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
324 		ecc_printf(sc, "ECC is not enabled\n");
325 		return 0;
326 	}
327 
328 	if (bootverbose) {
329 		for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
330 			const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
331 			uint32_t thr, mask;
332 			int ofs;
333 
334 			ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
335 			if (rank & 1)
336 				mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
337 			else
338 				mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
339 
340 			thr = pci_read_config(sc->ecc_dev, ofs, 4);
341 			ecc_printf(sc, "DIMM%d rank%d, "
342 			    "corrected error threshold %d\n",
343 			    rk->rank_dimm, rk->rank_dimm_rank,
344 			    __SHIFTOUT(thr, mask));
345 		}
346 	}
347 
348 	callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
349 	return 0;
350 }
351 
352 static void
353 ecc_e5_callout(void *xsc)
354 {
355 	struct ecc_e5_softc *sc = xsc;
356 	uint32_t err_ranks, val;
357 
358 	val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
359 
360 	err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
361 	while (err_ranks != 0) {
362 		int rank;
363 
364 		rank = ffs(err_ranks) - 1;
365 		err_ranks &= ~(1 << rank);
366 
367 		if (rank < sc->ecc_rank_cnt) {
368 			const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
369 			uint32_t err, mask;
370 			int ofs;
371 
372 			ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
373 			if (rank & 1)
374 				mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
375 			else
376 				mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
377 
378 			err = pci_read_config(sc->ecc_dev, ofs, 4);
379 			ecc_printf(sc, "node%d channel%d DIMM%d rank%d, "
380 			    "too many errors %d",
381 			    sc->ecc_node, sc->ecc_chan,
382 			    rk->rank_dimm, rk->rank_dimm_rank,
383 			    __SHIFTOUT(err, mask));
384 		}
385 	}
386 
387 	if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
388 		pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
389 		    val, 4);
390 	}
391 	callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
392 }
393 
394 static void
395 ecc_e5_stop(device_t dev)
396 {
397 	struct ecc_e5_softc *sc = device_get_softc(dev);
398 
399 	callout_stop_sync(&sc->ecc_callout);
400 }
401 
402 static int
403 ecc_e5_detach(device_t dev)
404 {
405 	ecc_e5_stop(dev);
406 	return 0;
407 }
408 
409 static void
410 ecc_e5_shutdown(device_t dev)
411 {
412 	ecc_e5_stop(dev);
413 }
414