xref: /netbsd/sys/arch/powerpc/ibm4xx/dev/ecc_plb.c (revision c4a72b64)
1 /*	$NetBSD: ecc_plb.c,v 1.6 2002/10/02 15:52:27 thorpej Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Eduardo Horvath and Simon Burge for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 
38 #include "locators.h"
39 
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/device.h>
43 #include <sys/properties.h>
44 
45 #include <machine/dcr.h>
46 #include <machine/cpu.h>
47 #include <powerpc/ibm4xx/dev/plbvar.h>
48 
49 
50 struct ecc_plb_softc {
51 	struct device sc_dev;
52 	u_quad_t sc_ecc_tb;
53 	u_quad_t sc_ecc_iv;	 /* Interval */
54 	u_int32_t sc_ecc_cnt;
55 	u_int sc_memsize;
56 	int sc_irq;
57 };
58 
59 static int	ecc_plbmatch(struct device *, struct cfdata *, void *);
60 static void	ecc_plbattach(struct device *, struct device *, void *);
61 static void	ecc_plb_deferred(struct device *);
62 static int	ecc_plb_intr(void *);
63 
64 CFATTACH_DECL(ecc_plb, sizeof(struct ecc_plb_softc),
65     ecc_plbmatch, ecc_plbattach, NULL, NULL);
66 
67 static int ecc_plb_found;
68 
69 static int
70 ecc_plbmatch(struct device *parent, struct cfdata *cf, void *aux)
71 {
72 	struct plb_attach_args *paa = aux;
73 
74 	if (strcmp(paa->plb_name, cf->cf_name) != 0)
75 		return (0);
76 
77 	if (cf->cf_loc[PLBCF_IRQ] == PLBCF_IRQ_DEFAULT)
78 		panic("ecc_plbmatch: wildcard IRQ not allowed");
79 
80 	paa->plb_irq = cf->cf_loc[PLBCF_IRQ];
81 
82 	return (!ecc_plb_found);
83 }
84 
85 static void
86 ecc_plbattach(struct device *parent, struct device *self, void *aux)
87 {
88 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
89 	struct plb_attach_args *paa = aux;
90 	unsigned int processor_freq;
91 	unsigned int memsiz;
92 
93 	ecc_plb_found++;
94 
95 	if (board_info_get("processor-frequency",
96 		&processor_freq, sizeof(processor_freq)) == -1)
97 		panic("no processor-frequency");
98 
99 	if (board_info_get("mem-size", &memsiz, sizeof(memsiz)) == -1)
100 		panic("no mem-size");
101 
102 	printf(": ECC controller\n");
103 
104 	sc->sc_ecc_tb = 0;
105 	sc->sc_ecc_cnt = 0;
106 	sc->sc_ecc_iv = processor_freq; /* Set interval */
107 	sc->sc_memsize = memsiz;
108 	sc->sc_irq = paa->plb_irq;
109 
110 	/*
111 	 * Defer hooking the interrupt until all PLB devices have attached
112 	 * since the interrupt controller may well be one of those devices...
113 	 */
114 	config_defer(self, ecc_plb_deferred);
115 }
116 
117 static void
118 ecc_plb_deferred(struct device *self)
119 {
120 	struct ecc_plb_softc *sc = (struct ecc_plb_softc *)self;
121 
122 	intr_establish(sc->sc_irq, IST_LEVEL, IPL_SERIAL, ecc_plb_intr, NULL);
123 }
124 
125 /*
126  * ECC fault handler.
127  */
128 static int
129 ecc_plb_intr(void *arg)
130 {
131 	struct ecc_plb_softc *sc = arg;
132 	u_int32_t		esr, ear;
133 	int			ce, ue;
134 	u_quad_t		tb;
135 	u_long			tmp, msr, dat;
136 
137 	/* This code needs to be improved to handle double-bit errors */
138 	/* in some intelligent fashion. */
139 
140 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
141 	esr = mfdcr(DCR_SDRAM0_CFGDATA);
142 
143 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_BEAR);
144 	ear = mfdcr(DCR_SDRAM0_CFGDATA);
145 
146 	/* Always clear the error to stop the intr ASAP. */
147 
148 	mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
149 	mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
150 
151 	if (esr == 0x00) {
152 		/* No current error.  Could happen due to intr. nesting */
153 		return(1);
154 	}
155 
156 	/*
157 	 * Only report errors every once per second max. Do this using the TB,
158 	 * because the system time (via microtime) may be adjusted when the
159 	 * date is set and can't reliably be used to measure intervals.
160 	 */
161 
162 	asm ("1: mftbu %0; mftb %0+1; mftbu %1; cmpw %0,%1; bne 1b"
163 		: "=r"(tb), "=r"(tmp));
164 	sc->sc_ecc_cnt++;
165 
166 	if ((tb - sc->sc_ecc_tb) < sc->sc_ecc_iv)
167 		return(1);
168 
169 	ce = (esr & SDRAM0_ECCESR_CE) != 0x00;
170 	ue = (esr & SDRAM0_ECCESR_UE) != 0x00;
171 
172 	printf("ECC: Error CNT=%d ESR=%x EAR=%x %s BKNE=%d%d%d%d "
173 		"BLCE=%d%d%d%d CBE=%d%d.\n",
174 		sc->sc_ecc_cnt, esr, ear,
175 		(ue) ? "Uncorrectable" : "Correctable",
176 		((esr & SDRAM0_ECCESR_BKEN(0)) != 0x00),
177 		((esr & SDRAM0_ECCESR_BKEN(1)) != 0x00),
178 		((esr & SDRAM0_ECCESR_BKEN(2)) != 0x00),
179 		((esr & SDRAM0_ECCESR_BKEN(3)) != 0x00),
180 		((esr & SDRAM0_ECCESR_BLCEN(0)) != 0x00),
181 		((esr & SDRAM0_ECCESR_BLCEN(1)) != 0x00),
182 		((esr & SDRAM0_ECCESR_BLCEN(2)) != 0x00),
183 		((esr & SDRAM0_ECCESR_BLCEN(3)) != 0x00),
184 		((esr & SDRAM0_ECCESR_CBEN(0)) != 0x00),
185 		((esr & SDRAM0_ECCESR_CBEN(1)) != 0x00));
186 
187 	/* Should check for uncorrectable errors and panic... */
188 
189 	if (sc->sc_ecc_cnt > 1000) {
190 		printf("ECC: Too many errors, recycling entire "
191 			"SDRAM (size = %d).\n", sc->sc_memsize);
192 
193 		/*
194 		 * Can this code be changed to run without disabling data MMU
195 		 * and disabling intrs?
196 		 * Does kernel always map all of physical RAM VA=PA? If so,
197 		 * just loop over lowmem.
198 		 */
199 		asm volatile(
200 			"mfmsr 	%0;"
201 			"li	%1, 0x00;"
202 			"ori	%1, %1, 0x8010;"
203 			"andc	%1, %0, %1;"
204 			"mtmsr	%1;"
205 			"sync;isync;"
206 			"li	%1, 0x00;"
207 			"1:"
208 			"dcbt	0, %1;"
209 			"sync;isync;"
210 			"lwz	%2, 0(%1);"
211 			"stw	%2, 0(%1);"
212 			"sync;isync;"
213 			"dcbf	0, %1;"
214 			"sync;isync;"
215 			"addi	%1, %1, 0x20;"
216 			"addic.	%3, %3, -0x20;"
217 			"bge 	1b;"
218 			"mtmsr %0;"
219 			"sync;isync;"
220 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
221 		: "r" (sc->sc_memsize) : "0" );
222 
223 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
224 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
225 
226 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
227 		mtdcr(DCR_SDRAM0_CFGDATA, 0xffffffff);
228 
229 		/*
230 		 * Correctable errors here are OK, mem should be clean now.
231 		 *
232 		 * Should check for uncorrectable errors and panic...
233 		 */
234 		printf("ECC: Recycling complete, ESR=%x. "
235 			"Checking for persistent errors.\n", esr);
236 
237 		asm volatile(
238 			"mfmsr 	%0;"
239 			"li	%1, 0x00;"
240 			"ori	%1, %1, 0x8010;"
241 			"andc	%1, %0, %1;"
242 			"mtmsr	%1;"
243 			"sync;isync;"
244 			"li	%1, 0x00;"
245 			"1:"
246 			"dcbt	0, %1;"
247 			"sync;isync;"
248 			"lwz	%2, 0(%1);"
249 			"stw	%2, 0(%1);"
250 			"sync;isync;"
251 			"dcbf	0, %1;"
252 			"sync;isync;"
253 			"addi	%1, %1, 0x20;"
254 			"addic.	%3, %3, -0x20;"
255 			"bge 	1b;"
256 			"mtmsr %0;"
257 			"sync;isync;"
258 		: "=&r" (msr), "=&r" (tmp), "=&r" (dat)
259 		: "r" (sc->sc_memsize) : "0" );
260 
261 		mtdcr(DCR_SDRAM0_CFGADDR, DCR_SDRAM0_ECCESR);
262 		esr = mfdcr(DCR_SDRAM0_CFGDATA);
263 
264 		/*
265 		 * If esr is non zero here, we're screwed.
266 		 * Should check this and panic.
267 		 */
268 		printf("ECC: Persistent error check complete, "
269 			"final ESR=%x.\n", esr);
270 	}
271 
272 	sc->sc_ecc_tb = tb;
273 	sc->sc_ecc_cnt = 0;
274 
275 	return(1);
276 }
277