xref: /dragonfly/sys/dev/misc/ecc/ecc_e5.c (revision b5523eac)
1 /*
2  * Copyright (c) 2015 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Sepherosa Ziehau <sepherosa@gmail.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bus.h>
38 #include <sys/kernel.h>
39 #include <sys/malloc.h>
40 #include <sys/bitops.h>
41 
42 #include <bus/pci/pcivar.h>
43 #include <bus/pci/pcireg.h>
44 #include <bus/pci/pcibus.h>
45 #include <bus/pci/pci_cfgreg.h>
46 #include <bus/pci/pcib_private.h>
47 
48 #include "pcib_if.h"
49 
50 #include <dev/misc/ecc/e5_imc_reg.h>
51 #include <dev/misc/ecc/e5_imc_var.h>
52 
53 struct ecc_e5_rank {
54 	int		rank_dimm;	/* owner dimm */
55 	int		rank_dimm_rank;	/* rank within the owner dimm */
56 };
57 
58 struct ecc_e5_softc {
59 	device_t		ecc_dev;
60 	const struct e5_imc_chan *ecc_chan;
61 	int			ecc_node;
62 	int			ecc_rank_cnt;
63 	struct ecc_e5_rank	ecc_rank[PCI_E5_IMC_ERROR_RANK_MAX];
64 	struct callout		ecc_callout;
65 };
66 
67 #define ecc_printf(sc, fmt, arg...) \
68 	device_printf((sc)->ecc_dev, fmt , ##arg)
69 
70 static int	ecc_e5_probe(device_t);
71 static int	ecc_e5_attach(device_t);
72 static int	ecc_e5_detach(device_t);
73 static void	ecc_e5_shutdown(device_t);
74 
75 static void	ecc_e5_callout(void *);
76 
77 #define ECC_E5_CHAN(v, imc, c, c_ext)				\
78 {								\
79 	.did		= PCI_E5V##v##_IMC##imc##_ERROR_CHN##c##_DID_ID, \
80 	.slot		= PCISLOT_E5V##v##_IMC##imc##_ERROR_CHN##c, \
81 	.func		= PCIFUNC_E5V##v##_IMC##imc##_ERROR_CHN##c, \
82 	.desc		= "Intel E5 v" #v " ECC",		\
83 								\
84 	E5_IMC_CHAN_FIELDS(v, imc, c, c_ext)			\
85 }
86 
87 #define ECC_E5_CHAN_V2(c)		ECC_E5_CHAN(2, 0, c, c)
88 #define ECC_E5_CHAN_IMC0_V3(c)		ECC_E5_CHAN(3, 0, c, c)
89 #define ECC_E5_CHAN_IMC1_V3(c, c_ext)	ECC_E5_CHAN(3, 1, c, c_ext)
90 #define ECC_E5_CHAN_END			E5_IMC_CHAN_END
91 
92 static const struct e5_imc_chan ecc_e5_chans[] = {
93 	ECC_E5_CHAN_V2(0),
94 	ECC_E5_CHAN_V2(1),
95 	ECC_E5_CHAN_V2(2),
96 	ECC_E5_CHAN_V2(3),
97 
98 	ECC_E5_CHAN_IMC0_V3(0),
99 	ECC_E5_CHAN_IMC0_V3(1),
100 	ECC_E5_CHAN_IMC0_V3(2),
101 	ECC_E5_CHAN_IMC0_V3(3),
102 	ECC_E5_CHAN_IMC1_V3(0, 2),	/* IMC1 chan0 -> channel2 */
103 	ECC_E5_CHAN_IMC1_V3(1, 3),	/* IMC1 chan1 -> channel3 */
104 
105 	ECC_E5_CHAN_END
106 };
107 
108 #undef ECC_E5_CHAN_END
109 #undef ECC_E5_CHAN_V2
110 #undef ECC_E5_CHAN
111 
112 static device_method_t ecc_e5_methods[] = {
113 	/* Device interface */
114 	DEVMETHOD(device_probe,		ecc_e5_probe),
115 	DEVMETHOD(device_attach,	ecc_e5_attach),
116 	DEVMETHOD(device_detach,	ecc_e5_detach),
117 	DEVMETHOD(device_shutdown,	ecc_e5_shutdown),
118 	DEVMETHOD(device_suspend,	bus_generic_suspend),
119 	DEVMETHOD(device_resume,	bus_generic_resume),
120 	DEVMETHOD_END
121 };
122 
123 static driver_t ecc_e5_driver = {
124 	"ecc",
125 	ecc_e5_methods,
126 	sizeof(struct ecc_e5_softc)
127 };
128 static devclass_t ecc_devclass;
129 DRIVER_MODULE(ecc_e5, pci, ecc_e5_driver, ecc_devclass, NULL, NULL);
130 MODULE_DEPEND(ecc_e5, pci, 1, 1, 1);
131 
132 static int
133 ecc_e5_probe(device_t dev)
134 {
135 	const struct e5_imc_chan *c;
136 	uint16_t vid, did;
137 	int slot, func;
138 
139 	vid = pci_get_vendor(dev);
140 	if (vid != PCI_E5_IMC_VID_ID)
141 		return ENXIO;
142 
143 	did = pci_get_device(dev);
144 	slot = pci_get_slot(dev);
145 	func = pci_get_function(dev);
146 
147 	for (c = ecc_e5_chans; c->desc != NULL; ++c) {
148 		if (c->did == did && c->slot == slot && c->func == func) {
149 			struct ecc_e5_softc *sc = device_get_softc(dev);
150 			char desc[32];
151 			int node;
152 
153 			node = e5_imc_node_probe(dev, c);
154 			if (node < 0)
155 				break;
156 
157 			ksnprintf(desc, sizeof(desc), "%s node%d channel%d",
158 			    c->desc, node, c->chan_ext);
159 			device_set_desc_copy(dev, desc);
160 
161 			sc->ecc_chan = c;
162 			sc->ecc_node = node;
163 			return 0;
164 		}
165 	}
166 	return ENXIO;
167 }
168 
169 static int
170 ecc_e5_attach(device_t dev)
171 {
172 	struct ecc_e5_softc *sc = device_get_softc(dev);
173 	uint32_t mcmtr;
174 	int dimm, rank;
175 
176 	callout_init_mp(&sc->ecc_callout);
177 	sc->ecc_dev = dev;
178 
179 	mcmtr = IMC_CPGC_READ_4(sc->ecc_dev, sc->ecc_chan,
180 	    PCI_E5_IMC_CPGC_MCMTR);
181 	if (bootverbose) {
182 		if (sc->ecc_chan->ver == E5_IMC_CHAN_VER3 &&
183 		    (mcmtr & PCI_E5V3_IMC_CPGC_MCMTR_DDR4))
184 			ecc_printf(sc, "DDR4\n");
185 		if (__SHIFTOUT(mcmtr, PCI_E5_IMC_CPGC_MCMTR_IMC_MODE) ==
186 		    PCI_E5_IMC_CPGC_MCMTR_IMC_MODE_DDR3) {
187 			ecc_printf(sc, "native %s\n",
188 			    sc->ecc_chan->ver == E5_IMC_CHAN_VER2 ?
189 			    "DDR3" : "DDR");
190 		}
191 	}
192 
193 	rank = 0;
194 	for (dimm = 0; dimm < PCI_E5_IMC_CHN_DIMM_MAX; ++dimm) {
195 		const char *width;
196 		uint32_t dimmmtr;
197 		int rank_cnt, r;
198 		int density;
199 		int val;
200 
201 		dimmmtr = IMC_CTAD_READ_4(sc->ecc_dev, sc->ecc_chan,
202 		    PCI_E5_IMC_CTAD_DIMMMTR(dimm));
203 
204 		if ((dimmmtr & PCI_E5_IMC_CTAD_DIMMMTR_DIMM_POP) == 0)
205 			continue;
206 
207 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT);
208 		switch (val) {
209 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_SR:
210 			rank_cnt = 1;
211 			break;
212 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_DR:
213 			rank_cnt = 2;
214 			break;
215 		case PCI_E5_IMC_CTAD_DIMMMTR_RANK_CNT_QR:
216 			rank_cnt = 4;
217 			break;
218 		case PCI_E5V3_IMC_CTAD_DIMMMTR_RANK_CNT_8R:
219 			if (sc->ecc_chan->ver >= E5_IMC_CHAN_VER3) {
220 				rank_cnt = 8;
221 				break;
222 			}
223 			/* FALL THROUGH */
224 		default:
225 			ecc_printf(sc, "unknown rank count 0x%x\n", val);
226 			return ENXIO;
227 		}
228 
229 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH);
230 		switch (val) {
231 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_4:
232 			width = "x4";
233 			break;
234 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_8:
235 			width = "x8";
236 			break;
237 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_WIDTH_16:
238 			width = "x16";
239 			break;
240 		default:
241 			ecc_printf(sc, "unknown ddr3 width 0x%x\n", val);
242 			return ENXIO;
243 		}
244 
245 		val = __SHIFTOUT(dimmmtr, PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY);
246 		switch (val) {
247 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_2G:
248 			density = 2;
249 			break;
250 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_4G:
251 			density = 4;
252 			break;
253 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_8G:
254 			density = 8;
255 			break;
256 		case PCI_E5_IMC_CTAD_DIMMMTR_DDR3_DNSTY_1G:
257 			if (sc->ecc_chan->ver < E5_IMC_CHAN_VER3) {
258 				density = 1;
259 				break;
260 			}
261 			/* FALL THROUGH */
262 		default:
263 			ecc_printf(sc, "unknown ddr3 density 0x%x\n", val);
264 			return ENXIO;
265 		}
266 
267 		if (bootverbose) {
268 			ecc_printf(sc, "DIMM%d %dGB, %d%s, density %dGB\n",
269 			    dimm, density * rank_cnt * 2,
270 			    rank_cnt, width, density);
271 		}
272 
273 		for (r = 0; r < rank_cnt; ++r) {
274 			struct ecc_e5_rank *rk;
275 
276 			if (rank >= PCI_E5_IMC_ERROR_RANK_MAX) {
277 				ecc_printf(sc, "too many ranks\n");
278 				return ENXIO;
279 			}
280 			rk = &sc->ecc_rank[rank];
281 
282 			rk->rank_dimm = dimm;
283 			rk->rank_dimm_rank = r;
284 
285 			++rank;
286 		}
287 	}
288 	sc->ecc_rank_cnt = rank;
289 
290 	if ((mcmtr & PCI_E5_IMC_CPGC_MCMTR_ECC_EN) == 0) {
291 		ecc_printf(sc, "ECC is not enabled\n");
292 		return 0;
293 	}
294 
295 	if (bootverbose) {
296 		for (rank = 0; rank < sc->ecc_rank_cnt; ++rank) {
297 			const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
298 			uint32_t thr, mask;
299 			int ofs;
300 
301 			ofs = PCI_E5_IMC_ERROR_COR_ERR_TH(rank / 2);
302 			if (rank & 1)
303 				mask = PCI_E5_IMC_ERROR_COR_ERR_TH_HI;
304 			else
305 				mask = PCI_E5_IMC_ERROR_COR_ERR_TH_LO;
306 
307 			thr = pci_read_config(sc->ecc_dev, ofs, 4);
308 			ecc_printf(sc, "DIMM%d rank%d, "
309 			    "corrected error threshold %d\n",
310 			    rk->rank_dimm, rk->rank_dimm_rank,
311 			    __SHIFTOUT(thr, mask));
312 		}
313 	}
314 
315 	callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
316 	return 0;
317 }
318 
319 static void
320 ecc_e5_callout(void *xsc)
321 {
322 	struct ecc_e5_softc *sc = xsc;
323 	uint32_t err_ranks, val;
324 
325 	val = pci_read_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT, 4);
326 
327 	err_ranks = (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS);
328 	while (err_ranks != 0) {
329 		int rank;
330 
331 		rank = ffs(err_ranks) - 1;
332 		err_ranks &= ~(1 << rank);
333 
334 		if (rank < sc->ecc_rank_cnt) {
335 			const struct ecc_e5_rank *rk = &sc->ecc_rank[rank];
336 			uint32_t err, mask;
337 			int ofs;
338 
339 			ofs = PCI_E5_IMC_ERROR_COR_ERR_CNT(rank / 2);
340 			if (rank & 1)
341 				mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_HI;
342 			else
343 				mask = PCI_E5_IMC_ERROR_COR_ERR_CNT_LO;
344 
345 			err = pci_read_config(sc->ecc_dev, ofs, 4);
346 			ecc_printf(sc, "node%d channel%d DIMM%d rank%d, "
347 			    "too many errors %d",
348 			    sc->ecc_node, sc->ecc_chan->chan_ext,
349 			    rk->rank_dimm, rk->rank_dimm_rank,
350 			    __SHIFTOUT(err, mask));
351 		}
352 	}
353 
354 	if (val & PCI_E5_IMC_ERROR_COR_ERR_STAT_RANKS) {
355 		pci_write_config(sc->ecc_dev, PCI_E5_IMC_ERROR_COR_ERR_STAT,
356 		    val, 4);
357 	}
358 	callout_reset(&sc->ecc_callout, hz, ecc_e5_callout, sc);
359 }
360 
361 static void
362 ecc_e5_stop(device_t dev)
363 {
364 	struct ecc_e5_softc *sc = device_get_softc(dev);
365 
366 	callout_stop_sync(&sc->ecc_callout);
367 }
368 
369 static int
370 ecc_e5_detach(device_t dev)
371 {
372 	ecc_e5_stop(dev);
373 	return 0;
374 }
375 
376 static void
377 ecc_e5_shutdown(device_t dev)
378 {
379 	ecc_e5_stop(dev);
380 }
381