xref: /illumos-gate/usr/src/uts/sun4u/cpu/us3_common.c (revision 3db86aab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/types.h>
29 #include <sys/systm.h>
30 #include <sys/ddi.h>
31 #include <sys/sysmacros.h>
32 #include <sys/archsystm.h>
33 #include <sys/vmsystm.h>
34 #include <sys/machparam.h>
35 #include <sys/machsystm.h>
36 #include <sys/machthread.h>
37 #include <sys/cpu.h>
38 #include <sys/cmp.h>
39 #include <sys/elf_SPARC.h>
40 #include <vm/vm_dep.h>
41 #include <vm/hat_sfmmu.h>
42 #include <vm/seg_kpm.h>
43 #include <sys/cpuvar.h>
44 #include <sys/cheetahregs.h>
45 #include <sys/us3_module.h>
46 #include <sys/async.h>
47 #include <sys/cmn_err.h>
48 #include <sys/debug.h>
49 #include <sys/dditypes.h>
50 #include <sys/prom_debug.h>
51 #include <sys/prom_plat.h>
52 #include <sys/cpu_module.h>
53 #include <sys/sysmacros.h>
54 #include <sys/intreg.h>
55 #include <sys/clock.h>
56 #include <sys/platform_module.h>
57 #include <sys/machtrap.h>
58 #include <sys/ontrap.h>
59 #include <sys/panic.h>
60 #include <sys/memlist.h>
61 #include <sys/bootconf.h>
62 #include <sys/ivintr.h>
63 #include <sys/atomic.h>
64 #include <sys/taskq.h>
65 #include <sys/note.h>
66 #include <sys/ndifm.h>
67 #include <sys/ddifm.h>
68 #include <sys/fm/protocol.h>
69 #include <sys/fm/util.h>
70 #include <sys/fm/cpu/UltraSPARC-III.h>
71 #include <sys/fpras_impl.h>
72 #include <sys/dtrace.h>
73 #include <sys/watchpoint.h>
74 #include <sys/plat_ecc_unum.h>
75 #include <sys/cyclic.h>
76 #include <sys/errorq.h>
77 #include <sys/errclassify.h>
78 
79 #ifdef	CHEETAHPLUS_ERRATUM_25
80 #include <sys/xc_impl.h>
81 #endif	/* CHEETAHPLUS_ERRATUM_25 */
82 
83 /*
84  * Note that 'Cheetah PRM' refers to:
85  *   SPARC V9 JPS1 Implementation Supplement: Sun UltraSPARC-III
86  */
87 
88 /*
89  * Per CPU pointers to physical address of TL>0 logout data areas.
90  * These pointers have to be in the kernel nucleus to avoid MMU
91  * misses.
92  */
93 uint64_t ch_err_tl1_paddrs[NCPU];
94 
95 /*
96  * One statically allocated structure to use during startup/DR
97  * to prevent unnecessary panics.
98  */
99 ch_err_tl1_data_t ch_err_tl1_data;
100 
101 /*
102  * Per CPU pending error at TL>0, used by level15 softint handler
103  */
104 uchar_t ch_err_tl1_pending[NCPU];
105 
106 /*
107  * For deferred CE re-enable after trap.
108  */
109 taskq_t		*ch_check_ce_tq;
110 
111 /*
112  * Internal functions.
113  */
114 static int cpu_async_log_err(void *flt, errorq_elem_t *eqep);
115 static void cpu_log_diag_info(ch_async_flt_t *ch_flt);
116 static void cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
117     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp);
118 static int clear_ecc(struct async_flt *ecc);
119 #if defined(CPU_IMP_ECACHE_ASSOC)
120 static int cpu_ecache_line_valid(ch_async_flt_t *ch_flt);
121 #endif
122 static int cpu_ecache_set_size(struct cpu *cp);
123 static int cpu_ectag_line_invalid(int cachesize, uint64_t tag);
124 static int cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr);
125 static uint64_t cpu_ectag_to_pa(int setsize, uint64_t tag);
126 static int cpu_ectag_pa_to_subblk_state(int cachesize,
127 				uint64_t subaddr, uint64_t tag);
128 static void cpu_flush_ecache_line(ch_async_flt_t *ch_flt);
129 static int afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit);
130 static int afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit);
131 static int afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit);
132 static int afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit);
133 static int synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit);
134 static void cpu_uninit_ecache_scrub_dr(struct cpu *cp);
135 static void cpu_scrubphys(struct async_flt *aflt);
136 static void cpu_payload_add_aflt(struct async_flt *, nvlist_t *, nvlist_t *,
137     int *, int *);
138 static void cpu_payload_add_ecache(struct async_flt *, nvlist_t *);
139 static void cpu_ereport_init(struct async_flt *aflt);
140 static int cpu_check_secondary_errors(ch_async_flt_t *, uint64_t, uint64_t);
141 static uint8_t cpu_flt_bit_to_plat_error(struct async_flt *aflt);
142 static void cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
143     uint64_t nceen, ch_cpu_logout_t *clop);
144 static int cpu_ce_delayed_ec_logout(uint64_t);
145 static int cpu_matching_ecache_line(uint64_t, void *, int, int *);
146 
147 #ifdef	CHEETAHPLUS_ERRATUM_25
148 static int mondo_recover_proc(uint16_t, int);
149 static void cheetah_nudge_init(void);
150 static void cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
151     cyc_time_t *when);
152 static void cheetah_nudge_buddy(void);
153 #endif	/* CHEETAHPLUS_ERRATUM_25 */
154 
155 #if defined(CPU_IMP_L1_CACHE_PARITY)
156 static void cpu_dcache_parity_info(ch_async_flt_t *ch_flt);
157 static void cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index);
158 static void cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
159     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word);
160 static void cpu_icache_parity_info(ch_async_flt_t *ch_flt);
161 static void cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index);
162 static void cpu_pcache_parity_info(ch_async_flt_t *ch_flt);
163 static void cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index);
164 static void cpu_payload_add_dcache(struct async_flt *, nvlist_t *);
165 static void cpu_payload_add_icache(struct async_flt *, nvlist_t *);
166 #endif	/* CPU_IMP_L1_CACHE_PARITY */
167 
168 int (*p2get_mem_info)(int synd_code, uint64_t paddr,
169     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
170     int *segsp, int *banksp, int *mcidp);
171 
172 /*
173  * This table is used to determine which bit(s) is(are) bad when an ECC
174  * error occurs.  The array is indexed by an 9-bit syndrome.  The entries
175  * of this array have the following semantics:
176  *
177  *      00-127  The number of the bad bit, when only one bit is bad.
178  *      128     ECC bit C0 is bad.
179  *      129     ECC bit C1 is bad.
180  *      130     ECC bit C2 is bad.
181  *      131     ECC bit C3 is bad.
182  *      132     ECC bit C4 is bad.
183  *      133     ECC bit C5 is bad.
184  *      134     ECC bit C6 is bad.
185  *      135     ECC bit C7 is bad.
186  *      136     ECC bit C8 is bad.
187  *	137-143 reserved for Mtag Data and ECC.
188  *      144(M2) Two bits are bad within a nibble.
189  *      145(M3) Three bits are bad within a nibble.
190  *      146(M3) Four bits are bad within a nibble.
191  *      147(M)  Multiple bits (5 or more) are bad.
192  *      148     NO bits are bad.
193  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-4,11-5.
194  */
195 
196 #define	C0	128
197 #define	C1	129
198 #define	C2	130
199 #define	C3	131
200 #define	C4	132
201 #define	C5	133
202 #define	C6	134
203 #define	C7	135
204 #define	C8	136
205 #define	MT0	137	/* Mtag Data bit 0 */
206 #define	MT1	138
207 #define	MT2	139
208 #define	MTC0	140	/* Mtag Check bit 0 */
209 #define	MTC1	141
210 #define	MTC2	142
211 #define	MTC3	143
212 #define	M2	144
213 #define	M3	145
214 #define	M4	146
215 #define	M	147
216 #define	NA	148
217 #if defined(JALAPENO) || defined(SERRANO)
218 #define	S003	149	/* Syndrome 0x003 => likely from CPU/EDU:ST/FRU/BP */
219 #define	S003MEM	150	/* Syndrome 0x003 => likely from WDU/WBP */
220 #define	SLAST	S003MEM	/* last special syndrome */
221 #else /* JALAPENO || SERRANO */
222 #define	S003	149	/* Syndrome 0x003 => likely from EDU:ST */
223 #define	S071	150	/* Syndrome 0x071 => likely from WDU/CPU */
224 #define	S11C	151	/* Syndrome 0x11c => likely from BERR/DBERR */
225 #define	SLAST	S11C	/* last special syndrome */
226 #endif /* JALAPENO || SERRANO */
227 #if defined(JALAPENO) || defined(SERRANO)
228 #define	BPAR0	152	/* syndrom 152 through 167 for bus parity */
229 #define	BPAR15	167
230 #endif	/* JALAPENO || SERRANO */
231 
232 static uint8_t ecc_syndrome_tab[] =
233 {
234 NA,  C0,  C1, S003, C2,  M2,  M3,  47,  C3,  M2,  M2,  53,  M2,  41,  29,   M,
235 C4,   M,   M,  50,  M2,  38,  25,  M2,  M2,  33,  24,  M2,  11,   M,  M2,  16,
236 C5,   M,   M,  46,  M2,  37,  19,  M2,   M,  31,  32,   M,   7,  M2,  M2,  10,
237 M2,  40,  13,  M2,  59,   M,  M2,  66,   M,  M2,  M2,   0,  M2,  67,  71,   M,
238 C6,   M,   M,  43,   M,  36,  18,   M,  M2,  49,  15,   M,  63,  M2,  M2,   6,
239 M2,  44,  28,  M2,   M,  M2,  M2,  52,  68,  M2,  M2,  62,  M2,  M3,  M3,  M4,
240 M2,  26, 106,  M2,  64,   M,  M2,   2, 120,   M,  M2,  M3,   M,  M3,  M3,  M4,
241 #if defined(JALAPENO) || defined(SERRANO)
242 116, M2,  M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
243 #else	/* JALAPENO || SERRANO */
244 116, S071, M2,  M3,  M2,  M3,   M,  M4,  M2,  58,  54,  M2,   M,  M4,  M4,  M3,
245 #endif	/* JALAPENO || SERRANO */
246 C7,  M2,   M,  42,   M,  35,  17,  M2,   M,  45,  14,  M2,  21,  M2,  M2,   5,
247 M,   27,   M,   M,  99,   M,   M,   3, 114,  M2,  M2,  20,  M2,  M3,  M3,   M,
248 M2,  23, 113,  M2, 112,  M2,   M,  51,  95,   M,  M2,  M3,  M2,  M3,  M3,  M2,
249 103,  M,  M2,  M3,  M2,  M3,  M3,  M4,  M2,  48,   M,   M,  73,  M2,   M,  M3,
250 M2,  22, 110,  M2, 109,  M2,   M,   9, 108,  M2,   M,  M3,  M2,  M3,  M3,   M,
251 102, M2,   M,   M,  M2,  M3,  M3,   M,  M2,  M3,  M3,  M2,   M,  M4,   M,  M3,
252 98,   M,  M2,  M3,  M2,   M,  M3,  M4,  M2,  M3,  M3,  M4,  M3,   M,   M,   M,
253 M2,  M3,  M3,   M,  M3,   M,   M,   M,  56,  M4,   M,  M3,  M4,   M,   M,   M,
254 C8,   M,  M2,  39,   M,  34, 105,  M2,   M,  30, 104,   M, 101,   M,   M,   4,
255 #if defined(JALAPENO) || defined(SERRANO)
256 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57,  M2,   M,  M3,   M,
257 #else	/* JALAPENO || SERRANO */
258 M,    M, 100,   M,  83,   M,  M2,  12,  87,   M,   M,  57, S11C,  M,  M3,   M,
259 #endif	/* JALAPENO || SERRANO */
260 M2,  97,  82,  M2,  78,  M2,  M2,   1,  96,   M,   M,   M,   M,   M,  M3,  M2,
261 94,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  79,   M,  69,   M,  M4,   M,
262 M2,  93,  92,   M,  91,   M,  M2,   8,  90,  M2,  M2,   M,   M,   M,   M,  M4,
263 89,   M,   M,  M3,  M2,  M3,  M3,   M,   M,   M,  M3,  M2,  M3,  M2,   M,  M3,
264 86,   M,  M2,  M3,  M2,   M,  M3,   M,  M2,   M,  M3,   M,  M3,   M,   M,  M3,
265 M,    M,  M3,  M2,  M3,  M2,  M4,   M,  60,   M,  M2,  M3,  M4,   M,   M,  M2,
266 M2,  88,  85,  M2,  84,   M,  M2,  55,  81,  M2,  M2,  M3,  M2,  M3,  M3,  M4,
267 77,   M,   M,   M,  M2,  M3,   M,   M,  M2,  M3,  M3,  M4,  M3,  M2,   M,   M,
268 74,   M,  M2,  M3,   M,   M,  M3,   M,   M,   M,  M3,   M,  M3,   M,  M4,  M3,
269 M2,  70, 107,  M4,  65,  M2,  M2,   M, 127,   M,   M,   M,  M2,  M3,  M3,   M,
270 80,  M2,  M2,  72,   M, 119, 118,   M,  M2, 126,  76,   M, 125,   M,  M4,  M3,
271 M2, 115, 124,   M,  75,   M,   M,  M3,  61,   M,  M4,   M,  M4,   M,   M,   M,
272 M,  123, 122,  M4, 121,  M4,   M,  M3, 117,  M2,  M2,  M3,  M4,  M3,   M,   M,
273 111,  M,   M,   M,  M4,  M3,  M3,   M,   M,   M,  M3,   M,  M3,  M2,   M,   M
274 };
275 
276 #define	ESYND_TBL_SIZE	(sizeof (ecc_syndrome_tab) / sizeof (uint8_t))
277 
278 #if !(defined(JALAPENO) || defined(SERRANO))
279 /*
280  * This table is used to determine which bit(s) is(are) bad when a Mtag
281  * error occurs.  The array is indexed by an 4-bit ECC syndrome. The entries
282  * of this array have the following semantics:
283  *
284  *      -1	Invalid mtag syndrome.
285  *      137     Mtag Data 0 is bad.
286  *      138     Mtag Data 1 is bad.
287  *      139     Mtag Data 2 is bad.
288  *      140     Mtag ECC 0 is bad.
289  *      141     Mtag ECC 1 is bad.
290  *      142     Mtag ECC 2 is bad.
291  *      143     Mtag ECC 3 is bad.
292  * Based on "Cheetah Programmer's Reference Manual" rev 1.1, Tables 11-6.
293  */
294 short mtag_syndrome_tab[] =
295 {
296 NA, MTC0, MTC1, M2, MTC2, M2, M2, MT0, MTC3, M2, M2,  MT1, M2, MT2, M2, M2
297 };
298 
299 #define	MSYND_TBL_SIZE	(sizeof (mtag_syndrome_tab) / sizeof (short))
300 
301 #else /* !(JALAPENO || SERRANO) */
302 
303 #define	BSYND_TBL_SIZE	16
304 
305 #endif /* !(JALAPENO || SERRANO) */
306 
307 /*
308  * CE initial classification and subsequent action lookup table
309  */
310 static ce_dispact_t ce_disp_table[CE_INITDISPTBL_SIZE];
311 static int ce_disp_inited;
312 
313 /*
314  * Set to disable leaky and partner check for memory correctables
315  */
316 int ce_xdiag_off;
317 
318 /*
319  * The following are not incremented atomically so are indicative only
320  */
321 static int ce_xdiag_drops;
322 static int ce_xdiag_lkydrops;
323 static int ce_xdiag_ptnrdrops;
324 static int ce_xdiag_bad;
325 
326 /*
327  * CE leaky check callback structure
328  */
329 typedef struct {
330 	struct async_flt *lkycb_aflt;
331 	errorq_t *lkycb_eqp;
332 	errorq_elem_t *lkycb_eqep;
333 } ce_lkychk_cb_t;
334 
335 /*
336  * defines for various ecache_flush_flag's
337  */
338 #define	ECACHE_FLUSH_LINE	1
339 #define	ECACHE_FLUSH_ALL	2
340 
341 /*
342  * STICK sync
343  */
344 #define	STICK_ITERATION 10
345 #define	MAX_TSKEW	1
346 #define	EV_A_START	0
347 #define	EV_A_END	1
348 #define	EV_B_START	2
349 #define	EV_B_END	3
350 #define	EVENTS		4
351 
352 static int64_t stick_iter = STICK_ITERATION;
353 static int64_t stick_tsk = MAX_TSKEW;
354 
355 typedef enum {
356 	EVENT_NULL = 0,
357 	SLAVE_START,
358 	SLAVE_CONT,
359 	MASTER_START
360 } event_cmd_t;
361 
362 static volatile event_cmd_t stick_sync_cmd = EVENT_NULL;
363 static int64_t timestamp[EVENTS];
364 static volatile int slave_done;
365 
366 #ifdef DEBUG
367 #define	DSYNC_ATTEMPTS 64
368 typedef struct {
369 	int64_t	skew_val[DSYNC_ATTEMPTS];
370 } ss_t;
371 
372 ss_t stick_sync_stats[NCPU];
373 #endif /* DEBUG */
374 
375 uint_t cpu_impl_dual_pgsz = 0;
376 #if defined(CPU_IMP_DUAL_PAGESIZE)
377 uint_t disable_dual_pgsz = 0;
378 #endif	/* CPU_IMP_DUAL_PAGESIZE */
379 
380 /*
381  * Save the cache bootup state for use when internal
382  * caches are to be re-enabled after an error occurs.
383  */
384 uint64_t cache_boot_state;
385 
386 /*
387  * PA[22:0] represent Displacement in Safari configuration space.
388  */
389 uint_t	root_phys_addr_lo_mask = 0x7fffffu;
390 
391 bus_config_eclk_t bus_config_eclk[] = {
392 #if defined(JALAPENO) || defined(SERRANO)
393 	{JBUS_CONFIG_ECLK_1_DIV, JBUS_CONFIG_ECLK_1},
394 	{JBUS_CONFIG_ECLK_2_DIV, JBUS_CONFIG_ECLK_2},
395 	{JBUS_CONFIG_ECLK_32_DIV, JBUS_CONFIG_ECLK_32},
396 #else /* JALAPENO || SERRANO */
397 	{SAFARI_CONFIG_ECLK_1_DIV, SAFARI_CONFIG_ECLK_1},
398 	{SAFARI_CONFIG_ECLK_2_DIV, SAFARI_CONFIG_ECLK_2},
399 	{SAFARI_CONFIG_ECLK_32_DIV, SAFARI_CONFIG_ECLK_32},
400 #endif /* JALAPENO || SERRANO */
401 	{0, 0}
402 };
403 
404 /*
405  * Interval for deferred CEEN reenable
406  */
407 int cpu_ceen_delay_secs = CPU_CEEN_DELAY_SECS;
408 
409 /*
410  * set in /etc/system to control logging of user BERR/TO's
411  */
412 int cpu_berr_to_verbose = 0;
413 
414 /*
415  * set to 0 in /etc/system to defer CEEN reenable for all CEs
416  */
417 uint64_t cpu_ce_not_deferred = CPU_CE_NOT_DEFERRED;
418 uint64_t cpu_ce_not_deferred_ext = CPU_CE_NOT_DEFERRED_EXT;
419 
420 /*
421  * Set of all offline cpus
422  */
423 cpuset_t cpu_offline_set;
424 
425 static void cpu_delayed_check_ce_errors(void *);
426 static void cpu_check_ce_errors(void *);
427 void cpu_error_ecache_flush(ch_async_flt_t *);
428 static int cpu_error_ecache_flush_required(ch_async_flt_t *);
429 static void cpu_log_and_clear_ce(ch_async_flt_t *);
430 void cpu_ce_detected(ch_cpu_errors_t *, int);
431 
432 /*
433  * CE Leaky check timeout in microseconds.  This is chosen to be twice the
434  * memory refresh interval of current DIMMs (64ms).  After initial fix that
435  * gives at least one full refresh cycle in which the cell can leak
436  * (whereafter further refreshes simply reinforce any incorrect bit value).
437  */
438 clock_t cpu_ce_lkychk_timeout_usec = 128000;
439 
440 /*
441  * CE partner check partner caching period in seconds
442  */
443 int cpu_ce_ptnr_cachetime_sec = 60;
444 
445 /*
446  * Sets trap table entry ttentry by overwriting eight instructions from ttlabel
447  */
448 #define	CH_SET_TRAP(ttentry, ttlabel)			\
449 		bcopy((const void *)&ttlabel, &ttentry, 32);		\
450 		flush_instr_mem((caddr_t)&ttentry, 32);
451 
452 static int min_ecache_size;
453 static uint_t priv_hcl_1;
454 static uint_t priv_hcl_2;
455 static uint_t priv_hcl_4;
456 static uint_t priv_hcl_8;
457 
458 void
459 cpu_setup(void)
460 {
461 	extern int at_flags;
462 	extern int disable_delay_tlb_flush, delay_tlb_flush;
463 	extern int cpc_has_overflow_intr;
464 	extern int disable_text_largepages;
465 	extern int use_text_pgsz4m;
466 
467 	/*
468 	 * Setup chip-specific trap handlers.
469 	 */
470 	cpu_init_trap();
471 
472 	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);
473 
474 	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1 | EF_SPARC_SUN_US3;
475 
476 	/*
477 	 * save the cache bootup state.
478 	 */
479 	cache_boot_state = get_dcu() & DCU_CACHE;
480 
481 	/*
482 	 * Due to the number of entries in the fully-associative tlb
483 	 * this may have to be tuned lower than in spitfire.
484 	 */
485 	pp_slots = MIN(8, MAXPP_SLOTS);
486 
487 	/*
488 	 * Block stores do not invalidate all pages of the d$, pagecopy
489 	 * et. al. need virtual translations with virtual coloring taken
490 	 * into consideration.  prefetch/ldd will pollute the d$ on the
491 	 * load side.
492 	 */
493 	pp_consistent_coloring = PPAGE_STORE_VCOLORING | PPAGE_LOADS_POLLUTE;
494 
495 	if (use_page_coloring) {
496 		do_pg_coloring = 1;
497 		if (use_virtual_coloring)
498 			do_virtual_coloring = 1;
499 	}
500 
501 	isa_list =
502 	    "sparcv9+vis2 sparcv9+vis sparcv9 "
503 	    "sparcv8plus+vis2 sparcv8plus+vis sparcv8plus "
504 	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";
505 
506 	/*
507 	 * On Panther-based machines, this should
508 	 * also include AV_SPARC_POPC too
509 	 */
510 	cpu_hwcap_flags = AV_SPARC_VIS | AV_SPARC_VIS2;
511 
512 	/*
513 	 * On cheetah, there's no hole in the virtual address space
514 	 */
515 	hole_start = hole_end = 0;
516 
517 	/*
518 	 * The kpm mapping window.
519 	 * kpm_size:
520 	 *	The size of a single kpm range.
521 	 *	The overall size will be: kpm_size * vac_colors.
522 	 * kpm_vbase:
523 	 *	The virtual start address of the kpm range within the kernel
524 	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
525 	 */
526 	kpm_size = (size_t)(8ull * 1024 * 1024 * 1024 * 1024); /* 8TB */
527 	kpm_size_shift = 43;
528 	kpm_vbase = (caddr_t)0x8000000000000000ull; /* 8EB */
529 	kpm_smallpages = 1;
530 
531 	/*
532 	 * The traptrace code uses either %tick or %stick for
533 	 * timestamping.  We have %stick so we can use it.
534 	 */
535 	traptrace_use_stick = 1;
536 
537 	/*
538 	 * Cheetah has a performance counter overflow interrupt
539 	 */
540 	cpc_has_overflow_intr = 1;
541 
542 	/*
543 	 * Use cheetah flush-all support
544 	 */
545 	if (!disable_delay_tlb_flush)
546 		delay_tlb_flush = 1;
547 
548 #if defined(CPU_IMP_DUAL_PAGESIZE)
549 	/*
550 	 * Use Cheetah+ and later dual page size support.
551 	 */
552 	if (!disable_dual_pgsz) {
553 		cpu_impl_dual_pgsz = 1;
554 	}
555 #endif	/* CPU_IMP_DUAL_PAGESIZE */
556 
557 	/*
558 	 * Declare that this architecture/cpu combination does fpRAS.
559 	 */
560 	fpras_implemented = 1;
561 
562 	/*
563 	 * Enable 4M pages to be used for mapping user text by default.  Don't
564 	 * use large pages for initialized data segments since we may not know
565 	 * at exec() time what should be the preferred large page size for DTLB
566 	 * programming.
567 	 */
568 	use_text_pgsz4m = 1;
569 	disable_text_largepages = (1 << TTE64K) | (1 << TTE512K) |
570 	    (1 << TTE32M) | (1 << TTE256M);
571 
572 	/*
573 	 * Setup CE lookup table
574 	 */
575 	CE_INITDISPTBL_POPULATE(ce_disp_table);
576 	ce_disp_inited = 1;
577 }
578 
579 /*
580  * Called by setcpudelay
581  */
582 void
583 cpu_init_tick_freq(void)
584 {
585 	/*
586 	 * For UltraSPARC III and beyond we want to use the
587 	 * system clock rate as the basis for low level timing,
588 	 * due to support of mixed speed CPUs and power managment.
589 	 */
590 	if (system_clock_freq == 0)
591 		cmn_err(CE_PANIC, "setcpudelay: invalid system_clock_freq");
592 
593 	sys_tick_freq = system_clock_freq;
594 }
595 
596 #ifdef CHEETAHPLUS_ERRATUM_25
597 /*
598  * Tunables
599  */
600 int cheetah_bpe_off = 0;
601 int cheetah_sendmondo_recover = 1;
602 int cheetah_sendmondo_fullscan = 0;
603 int cheetah_sendmondo_recover_delay = 5;
604 
605 #define	CHEETAH_LIVELOCK_MIN_DELAY	1
606 
607 /*
608  * Recovery Statistics
609  */
610 typedef struct cheetah_livelock_entry	{
611 	int cpuid;		/* fallen cpu */
612 	int buddy;		/* cpu that ran recovery */
613 	clock_t lbolt;		/* when recovery started */
614 	hrtime_t recovery_time;	/* time spent in recovery */
615 } cheetah_livelock_entry_t;
616 
617 #define	CHEETAH_LIVELOCK_NENTRY	32
618 
619 cheetah_livelock_entry_t cheetah_livelock_hist[CHEETAH_LIVELOCK_NENTRY];
620 int cheetah_livelock_entry_nxt;
621 
622 #define	CHEETAH_LIVELOCK_ENTRY_NEXT(statp)	{			\
623 	statp = cheetah_livelock_hist + cheetah_livelock_entry_nxt;	\
624 	if (++cheetah_livelock_entry_nxt >= CHEETAH_LIVELOCK_NENTRY) {	\
625 		cheetah_livelock_entry_nxt = 0;				\
626 	}								\
627 }
628 
629 #define	CHEETAH_LIVELOCK_ENTRY_SET(statp, item, val)	statp->item = val
630 
631 struct {
632 	hrtime_t hrt;		/* maximum recovery time */
633 	int recovery;		/* recovered */
634 	int full_claimed;	/* maximum pages claimed in full recovery */
635 	int proc_entry;		/* attempted to claim TSB */
636 	int proc_tsb_scan;	/* tsb scanned */
637 	int proc_tsb_partscan;	/* tsb partially scanned */
638 	int proc_tsb_fullscan;	/* whole tsb scanned */
639 	int proc_claimed;	/* maximum pages claimed in tsb scan */
640 	int proc_user;		/* user thread */
641 	int proc_kernel;	/* kernel thread */
642 	int proc_onflt;		/* bad stack */
643 	int proc_cpu;		/* null cpu */
644 	int proc_thread;	/* null thread */
645 	int proc_proc;		/* null proc */
646 	int proc_as;		/* null as */
647 	int proc_hat;		/* null hat */
648 	int proc_hat_inval;	/* hat contents don't make sense */
649 	int proc_hat_busy;	/* hat is changing TSBs */
650 	int proc_tsb_reloc;	/* TSB skipped because being relocated */
651 	int proc_cnum_bad;	/* cnum out of range */
652 	int proc_cnum;		/* last cnum processed */
653 	tte_t proc_tte;		/* last tte processed */
654 } cheetah_livelock_stat;
655 
656 #define	CHEETAH_LIVELOCK_STAT(item)	cheetah_livelock_stat.item++
657 
658 #define	CHEETAH_LIVELOCK_STATSET(item, value)		\
659 	cheetah_livelock_stat.item = value
660 
661 #define	CHEETAH_LIVELOCK_MAXSTAT(item, value)	{	\
662 	if (value > cheetah_livelock_stat.item)		\
663 		cheetah_livelock_stat.item = value;	\
664 }
665 
666 /*
667  * Attempt to recover a cpu by claiming every cache line as saved
668  * in the TSB that the non-responsive cpu is using. Since we can't
669  * grab any adaptive lock, this is at best an attempt to do so. Because
670  * we don't grab any locks, we must operate under the protection of
671  * on_fault().
672  *
673  * Return 1 if cpuid could be recovered, 0 if failed.
674  */
675 int
676 mondo_recover_proc(uint16_t cpuid, int bn)
677 {
678 	label_t ljb;
679 	cpu_t *cp;
680 	kthread_t *t;
681 	proc_t *p;
682 	struct as *as;
683 	struct hat *hat;
684 	uint_t  cnum;
685 	struct tsb_info *tsbinfop;
686 	struct tsbe *tsbep;
687 	caddr_t tsbp;
688 	caddr_t end_tsbp;
689 	uint64_t paddr;
690 	uint64_t idsr;
691 	u_longlong_t pahi, palo;
692 	int pages_claimed = 0;
693 	tte_t tsbe_tte;
694 	int tried_kernel_tsb = 0;
695 	mmu_ctx_t *mmu_ctxp;
696 
697 	CHEETAH_LIVELOCK_STAT(proc_entry);
698 
699 	if (on_fault(&ljb)) {
700 		CHEETAH_LIVELOCK_STAT(proc_onflt);
701 		goto badstruct;
702 	}
703 
704 	if ((cp = cpu[cpuid]) == NULL) {
705 		CHEETAH_LIVELOCK_STAT(proc_cpu);
706 		goto badstruct;
707 	}
708 
709 	if ((t = cp->cpu_thread) == NULL) {
710 		CHEETAH_LIVELOCK_STAT(proc_thread);
711 		goto badstruct;
712 	}
713 
714 	if ((p = ttoproc(t)) == NULL) {
715 		CHEETAH_LIVELOCK_STAT(proc_proc);
716 		goto badstruct;
717 	}
718 
719 	if ((as = p->p_as) == NULL) {
720 		CHEETAH_LIVELOCK_STAT(proc_as);
721 		goto badstruct;
722 	}
723 
724 	if ((hat = as->a_hat) == NULL) {
725 		CHEETAH_LIVELOCK_STAT(proc_hat);
726 		goto badstruct;
727 	}
728 
729 	if (hat != ksfmmup) {
730 		CHEETAH_LIVELOCK_STAT(proc_user);
731 		if (hat->sfmmu_flags & (HAT_BUSY | HAT_SWAPPED | HAT_SWAPIN)) {
732 			CHEETAH_LIVELOCK_STAT(proc_hat_busy);
733 			goto badstruct;
734 		}
735 		tsbinfop = hat->sfmmu_tsb;
736 		if (tsbinfop == NULL) {
737 			CHEETAH_LIVELOCK_STAT(proc_hat_inval);
738 			goto badstruct;
739 		}
740 		tsbp = tsbinfop->tsb_va;
741 		end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
742 	} else {
743 		CHEETAH_LIVELOCK_STAT(proc_kernel);
744 		tsbinfop = NULL;
745 		tsbp = ktsb_base;
746 		end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
747 	}
748 
749 	/* Verify as */
750 	if (hat->sfmmu_as != as) {
751 		CHEETAH_LIVELOCK_STAT(proc_hat_inval);
752 		goto badstruct;
753 	}
754 
755 	mmu_ctxp = CPU_MMU_CTXP(cp);
756 	ASSERT(mmu_ctxp);
757 	cnum = hat->sfmmu_ctxs[mmu_ctxp->mmu_idx].cnum;
758 	CHEETAH_LIVELOCK_STATSET(proc_cnum, cnum);
759 
760 	if ((cnum < 0) || (cnum == INVALID_CONTEXT) ||
761 	    (cnum >= mmu_ctxp->mmu_nctxs)) {
762 		CHEETAH_LIVELOCK_STAT(proc_cnum_bad);
763 		goto badstruct;
764 	}
765 
766 	do {
767 		CHEETAH_LIVELOCK_STAT(proc_tsb_scan);
768 
769 		/*
770 		 * Skip TSBs being relocated.  This is important because
771 		 * we want to avoid the following deadlock scenario:
772 		 *
773 		 * 1) when we came in we set ourselves to "in recover" state.
774 		 * 2) when we try to touch TSB being relocated the mapping
775 		 *    will be in the suspended state so we'll spin waiting
776 		 *    for it to be unlocked.
777 		 * 3) when the CPU that holds the TSB mapping locked tries to
778 		 *    unlock it it will send a xtrap which will fail to xcall
779 		 *    us or the CPU we're trying to recover, and will in turn
780 		 *    enter the mondo code.
781 		 * 4) since we are still spinning on the locked mapping
782 		 *    no further progress will be made and the system will
783 		 *    inevitably hard hang.
784 		 *
785 		 * A TSB not being relocated can't begin being relocated
786 		 * while we're accessing it because we check
787 		 * sendmondo_in_recover before relocating TSBs.
788 		 */
789 		if (hat != ksfmmup &&
790 		    (tsbinfop->tsb_flags & TSB_RELOC_FLAG) != 0) {
791 			CHEETAH_LIVELOCK_STAT(proc_tsb_reloc);
792 			goto next_tsbinfo;
793 		}
794 
795 		for (tsbep = (struct tsbe *)tsbp;
796 		    tsbep < (struct tsbe *)end_tsbp; tsbep++) {
797 			tsbe_tte = tsbep->tte_data;
798 
799 			if (tsbe_tte.tte_val == 0) {
800 				/*
801 				 * Invalid tte
802 				 */
803 				continue;
804 			}
805 			if (tsbe_tte.tte_se) {
806 				/*
807 				 * Don't want device registers
808 				 */
809 				continue;
810 			}
811 			if (tsbe_tte.tte_cp == 0) {
812 				/*
813 				 * Must be cached in E$
814 				 */
815 				continue;
816 			}
817 			CHEETAH_LIVELOCK_STATSET(proc_tte, tsbe_tte);
818 			idsr = getidsr();
819 			if ((idsr & (IDSR_NACK_BIT(bn) |
820 			    IDSR_BUSY_BIT(bn))) == 0) {
821 				CHEETAH_LIVELOCK_STAT(proc_tsb_partscan);
822 				goto done;
823 			}
824 			pahi = tsbe_tte.tte_pahi;
825 			palo = tsbe_tte.tte_palo;
826 			paddr = (uint64_t)((pahi << 32) |
827 			    (palo << MMU_PAGESHIFT));
828 			claimlines(paddr, TTEBYTES(TTE_CSZ(&tsbe_tte)),
829 			    CH_ECACHE_SUBBLK_SIZE);
830 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
831 				shipit(cpuid, bn);
832 			}
833 			pages_claimed++;
834 		}
835 next_tsbinfo:
836 		if (tsbinfop != NULL)
837 			tsbinfop = tsbinfop->tsb_next;
838 		if (tsbinfop != NULL) {
839 			tsbp = tsbinfop->tsb_va;
840 			end_tsbp = tsbp + TSB_BYTES(tsbinfop->tsb_szc);
841 		} else if (tsbp == ktsb_base) {
842 			tried_kernel_tsb = 1;
843 		} else if (!tried_kernel_tsb) {
844 			tsbp = ktsb_base;
845 			end_tsbp = tsbp + TSB_BYTES(ktsb_sz);
846 			hat = ksfmmup;
847 			tsbinfop = NULL;
848 		}
849 	} while (tsbinfop != NULL ||
850 			((tsbp == ktsb_base) && !tried_kernel_tsb));
851 
852 	CHEETAH_LIVELOCK_STAT(proc_tsb_fullscan);
853 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
854 	no_fault();
855 	idsr = getidsr();
856 	if ((idsr & (IDSR_NACK_BIT(bn) |
857 	    IDSR_BUSY_BIT(bn))) == 0) {
858 		return (1);
859 	} else {
860 		return (0);
861 	}
862 
863 done:
864 	no_fault();
865 	CHEETAH_LIVELOCK_MAXSTAT(proc_claimed, pages_claimed);
866 	return (1);
867 
868 badstruct:
869 	no_fault();
870 	return (0);
871 }
872 
873 /*
874  * Attempt to claim ownership, temporarily, of every cache line that a
875  * non-responsive cpu might be using.  This might kick that cpu out of
876  * this state.
877  *
878  * The return value indicates to the caller if we have exhausted all recovery
879  * techniques. If 1 is returned, it is useless to call this function again
880  * even for a different target CPU.
881  */
882 int
883 mondo_recover(uint16_t cpuid, int bn)
884 {
885 	struct memseg *seg;
886 	uint64_t begin_pa, end_pa, cur_pa;
887 	hrtime_t begin_hrt, end_hrt;
888 	int retval = 0;
889 	int pages_claimed = 0;
890 	cheetah_livelock_entry_t *histp;
891 	uint64_t idsr;
892 
893 	if (cas32(&sendmondo_in_recover, 0, 1) != 0) {
894 		/*
895 		 * Wait while recovery takes place
896 		 */
897 		while (sendmondo_in_recover) {
898 			drv_usecwait(1);
899 		}
900 		/*
901 		 * Assume we didn't claim the whole memory. If
902 		 * the target of this caller is not recovered,
903 		 * it will come back.
904 		 */
905 		return (retval);
906 	}
907 
908 	CHEETAH_LIVELOCK_ENTRY_NEXT(histp)
909 	CHEETAH_LIVELOCK_ENTRY_SET(histp, lbolt, lbolt);
910 	CHEETAH_LIVELOCK_ENTRY_SET(histp, cpuid, cpuid);
911 	CHEETAH_LIVELOCK_ENTRY_SET(histp, buddy, CPU->cpu_id);
912 
913 	begin_hrt = gethrtime_waitfree();
914 	/*
915 	 * First try to claim the lines in the TSB the target
916 	 * may have been using.
917 	 */
918 	if (mondo_recover_proc(cpuid, bn) == 1) {
919 		/*
920 		 * Didn't claim the whole memory
921 		 */
922 		goto done;
923 	}
924 
925 	/*
926 	 * We tried using the TSB. The target is still
927 	 * not recovered. Check if complete memory scan is
928 	 * enabled.
929 	 */
930 	if (cheetah_sendmondo_fullscan == 0) {
931 		/*
932 		 * Full memory scan is disabled.
933 		 */
934 		retval = 1;
935 		goto done;
936 	}
937 
938 	/*
939 	 * Try claiming the whole memory.
940 	 */
941 	for (seg = memsegs; seg; seg = seg->next) {
942 		begin_pa = (uint64_t)(seg->pages_base) << MMU_PAGESHIFT;
943 		end_pa = (uint64_t)(seg->pages_end) << MMU_PAGESHIFT;
944 		for (cur_pa = begin_pa; cur_pa < end_pa;
945 		    cur_pa += MMU_PAGESIZE) {
946 			idsr = getidsr();
947 			if ((idsr & (IDSR_NACK_BIT(bn) |
948 			    IDSR_BUSY_BIT(bn))) == 0) {
949 				/*
950 				 * Didn't claim all memory
951 				 */
952 				goto done;
953 			}
954 			claimlines(cur_pa, MMU_PAGESIZE,
955 			    CH_ECACHE_SUBBLK_SIZE);
956 			if ((idsr & IDSR_BUSY_BIT(bn)) == 0) {
957 				shipit(cpuid, bn);
958 			}
959 			pages_claimed++;
960 		}
961 	}
962 
963 	/*
964 	 * We did all we could.
965 	 */
966 	retval = 1;
967 
968 done:
969 	/*
970 	 * Update statistics
971 	 */
972 	end_hrt = gethrtime_waitfree();
973 	CHEETAH_LIVELOCK_STAT(recovery);
974 	CHEETAH_LIVELOCK_MAXSTAT(hrt, (end_hrt - begin_hrt));
975 	CHEETAH_LIVELOCK_MAXSTAT(full_claimed, pages_claimed);
976 	CHEETAH_LIVELOCK_ENTRY_SET(histp, recovery_time, \
977 	    (end_hrt -  begin_hrt));
978 
979 	while (cas32(&sendmondo_in_recover, 1, 0) != 1);
980 
981 	return (retval);
982 }
983 
984 /*
985  * This is called by the cyclic framework when this CPU becomes online
986  */
987 /*ARGSUSED*/
988 static void
989 cheetah_nudge_onln(void *arg, cpu_t *cpu, cyc_handler_t *hdlr, cyc_time_t *when)
990 {
991 
992 	hdlr->cyh_func = (cyc_func_t)cheetah_nudge_buddy;
993 	hdlr->cyh_level = CY_LOW_LEVEL;
994 	hdlr->cyh_arg = NULL;
995 
996 	/*
997 	 * Stagger the start time
998 	 */
999 	when->cyt_when = cpu->cpu_id * (NANOSEC / NCPU);
1000 	if (cheetah_sendmondo_recover_delay < CHEETAH_LIVELOCK_MIN_DELAY) {
1001 		cheetah_sendmondo_recover_delay = CHEETAH_LIVELOCK_MIN_DELAY;
1002 	}
1003 	when->cyt_interval = cheetah_sendmondo_recover_delay * NANOSEC;
1004 }
1005 
1006 /*
1007  * Create a low level cyclic to send a xtrap to the next cpu online.
1008  * However, there's no need to have this running on a uniprocessor system.
1009  */
1010 static void
1011 cheetah_nudge_init(void)
1012 {
1013 	cyc_omni_handler_t hdlr;
1014 
1015 	if (max_ncpus == 1) {
1016 		return;
1017 	}
1018 
1019 	hdlr.cyo_online = cheetah_nudge_onln;
1020 	hdlr.cyo_offline = NULL;
1021 	hdlr.cyo_arg = NULL;
1022 
1023 	mutex_enter(&cpu_lock);
1024 	(void) cyclic_add_omni(&hdlr);
1025 	mutex_exit(&cpu_lock);
1026 }
1027 
1028 /*
1029  * Cyclic handler to wake up buddy
1030  */
1031 void
1032 cheetah_nudge_buddy(void)
1033 {
1034 	/*
1035 	 * Disable kernel preemption to protect the cpu list
1036 	 */
1037 	kpreempt_disable();
1038 	if ((CPU->cpu_next_onln != CPU) && (sendmondo_in_recover == 0)) {
1039 		xt_one(CPU->cpu_next_onln->cpu_id, (xcfunc_t *)xt_sync_tl1,
1040 		    0, 0);
1041 	}
1042 	kpreempt_enable();
1043 }
1044 
1045 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1046 
1047 #ifdef SEND_MONDO_STATS
1048 uint32_t x_one_stimes[64];
1049 uint32_t x_one_ltimes[16];
1050 uint32_t x_set_stimes[64];
1051 uint32_t x_set_ltimes[16];
1052 uint32_t x_set_cpus[NCPU];
1053 uint32_t x_nack_stimes[64];
1054 #endif
1055 
1056 /*
1057  * Note: A version of this function is used by the debugger via the KDI,
1058  * and must be kept in sync with this version.  Any changes made to this
1059  * function to support new chips or to accomodate errata must also be included
1060  * in the KDI-specific version.  See us3_kdi.c.
1061  */
1062 void
1063 send_one_mondo(int cpuid)
1064 {
1065 	int busy, nack;
1066 	uint64_t idsr, starttick, endtick, tick, lasttick;
1067 	uint64_t busymask;
1068 #ifdef	CHEETAHPLUS_ERRATUM_25
1069 	int recovered = 0;
1070 #endif
1071 
1072 	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
1073 	starttick = lasttick = gettick();
1074 	shipit(cpuid, 0);
1075 	endtick = starttick + xc_tick_limit;
1076 	busy = nack = 0;
1077 #if defined(JALAPENO) || defined(SERRANO)
1078 	/*
1079 	 * Lower 2 bits of the agent ID determine which BUSY/NACK pair
1080 	 * will be used for dispatching interrupt. For now, assume
1081 	 * there are no more than IDSR_BN_SETS CPUs, hence no aliasing
1082 	 * issues with respect to BUSY/NACK pair usage.
1083 	 */
1084 	busymask  = IDSR_BUSY_BIT(cpuid);
1085 #else /* JALAPENO || SERRANO */
1086 	busymask = IDSR_BUSY;
1087 #endif /* JALAPENO || SERRANO */
1088 	for (;;) {
1089 		idsr = getidsr();
1090 		if (idsr == 0)
1091 			break;
1092 
1093 		tick = gettick();
1094 		/*
1095 		 * If there is a big jump between the current tick
1096 		 * count and lasttick, we have probably hit a break
1097 		 * point.  Adjust endtick accordingly to avoid panic.
1098 		 */
1099 		if (tick > (lasttick + xc_tick_jump_limit))
1100 			endtick += (tick - lasttick);
1101 		lasttick = tick;
1102 		if (tick > endtick) {
1103 			if (panic_quiesce)
1104 				return;
1105 #ifdef	CHEETAHPLUS_ERRATUM_25
1106 			if (cheetah_sendmondo_recover && recovered == 0) {
1107 				if (mondo_recover(cpuid, 0)) {
1108 					/*
1109 					 * We claimed the whole memory or
1110 					 * full scan is disabled.
1111 					 */
1112 					recovered++;
1113 				}
1114 				tick = gettick();
1115 				endtick = tick + xc_tick_limit;
1116 				lasttick = tick;
1117 				/*
1118 				 * Recheck idsr
1119 				 */
1120 				continue;
1121 			} else
1122 #endif	/* CHEETAHPLUS_ERRATUM_25 */
1123 			{
1124 				cmn_err(CE_PANIC, "send mondo timeout "
1125 				    "(target 0x%x) [%d NACK %d BUSY]",
1126 				    cpuid, nack, busy);
1127 			}
1128 		}
1129 
1130 		if (idsr & busymask) {
1131 			busy++;
1132 			continue;
1133 		}
1134 		drv_usecwait(1);
1135 		shipit(cpuid, 0);
1136 		nack++;
1137 		busy = 0;
1138 	}
1139 #ifdef SEND_MONDO_STATS
1140 	{
1141 		int n = gettick() - starttick;
1142 		if (n < 8192)
1143 			x_one_stimes[n >> 7]++;
1144 		else
1145 			x_one_ltimes[(n >> 13) & 0xf]++;
1146 	}
1147 #endif
1148 }
1149 
1150 void
1151 syncfpu(void)
1152 {
1153 }
1154 
1155 /*
1156  * Return processor specific async error structure
1157  * size used.
1158  */
1159 int
1160 cpu_aflt_size(void)
1161 {
1162 	return (sizeof (ch_async_flt_t));
1163 }
1164 
1165 /*
1166  * Tunable to disable the checking of other cpu logout areas during panic for
1167  * potential syndrome 71 generating errors.
1168  */
1169 int enable_check_other_cpus_logout = 1;
1170 
1171 /*
1172  * Check other cpus logout area for potential synd 71 generating
1173  * errors.
1174  */
1175 static void
1176 cpu_check_cpu_logout(int cpuid, caddr_t tpc, int tl, int ecc_type,
1177     ch_cpu_logout_t *clop)
1178 {
1179 	struct async_flt *aflt;
1180 	ch_async_flt_t ch_flt;
1181 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1182 
1183 	if (clop == NULL || clop->clo_data.chd_afar == LOGOUT_INVALID) {
1184 		return;
1185 	}
1186 
1187 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1188 
1189 	t_afar = clop->clo_data.chd_afar;
1190 	t_afsr = clop->clo_data.chd_afsr;
1191 	t_afsr_ext = clop->clo_data.chd_afsr_ext;
1192 #if defined(SERRANO)
1193 	ch_flt.afar2 = clop->clo_data.chd_afar2;
1194 #endif	/* SERRANO */
1195 
1196 	/*
1197 	 * In order to simplify code, we maintain this afsr_errs
1198 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1199 	 * sticky bits.
1200 	 */
1201 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1202 	    (t_afsr & C_AFSR_ALL_ERRS);
1203 
1204 	/* Setup the async fault structure */
1205 	aflt = (struct async_flt *)&ch_flt;
1206 	aflt->flt_id = gethrtime_waitfree();
1207 	ch_flt.afsr_ext = t_afsr_ext;
1208 	ch_flt.afsr_errs = t_afsr_errs;
1209 	aflt->flt_stat = t_afsr;
1210 	aflt->flt_addr = t_afar;
1211 	aflt->flt_bus_id = cpuid;
1212 	aflt->flt_inst = cpuid;
1213 	aflt->flt_pc = tpc;
1214 	aflt->flt_prot = AFLT_PROT_NONE;
1215 	aflt->flt_class = CPU_FAULT;
1216 	aflt->flt_priv = ((t_afsr & C_AFSR_PRIV) != 0);
1217 	aflt->flt_tl = tl;
1218 	aflt->flt_status = ecc_type;
1219 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1220 
1221 	/*
1222 	 * Queue events on the async event queue, one event per error bit.
1223 	 * If no events are queued, queue an event to complain.
1224 	 */
1225 	if (cpu_queue_events(&ch_flt, NULL, t_afsr_errs, clop) == 0) {
1226 		ch_flt.flt_type = CPU_INV_AFSR;
1227 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1228 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1229 		    aflt->flt_panic);
1230 	}
1231 
1232 	/*
1233 	 * Zero out + invalidate CPU logout.
1234 	 */
1235 	bzero(clop, sizeof (ch_cpu_logout_t));
1236 	clop->clo_data.chd_afar = LOGOUT_INVALID;
1237 }
1238 
1239 /*
1240  * Check the logout areas of all other cpus for unlogged errors.
1241  */
1242 static void
1243 cpu_check_other_cpus_logout(void)
1244 {
1245 	int i, j;
1246 	processorid_t myid;
1247 	struct cpu *cp;
1248 	ch_err_tl1_data_t *cl1p;
1249 
1250 	myid = CPU->cpu_id;
1251 	for (i = 0; i < NCPU; i++) {
1252 		cp = cpu[i];
1253 
1254 		if ((cp == NULL) || !(cp->cpu_flags & CPU_EXISTS) ||
1255 		    (cp->cpu_id == myid) || (CPU_PRIVATE(cp) == NULL)) {
1256 			continue;
1257 		}
1258 
1259 		/*
1260 		 * Check each of the tl>0 logout areas
1261 		 */
1262 		cl1p = CPU_PRIVATE_PTR(cp, chpr_tl1_err_data[0]);
1263 		for (j = 0; j < CH_ERR_TL1_TLMAX; j++, cl1p++) {
1264 			if (cl1p->ch_err_tl1_flags == 0)
1265 				continue;
1266 
1267 			cpu_check_cpu_logout(i, (caddr_t)cl1p->ch_err_tl1_tpc,
1268 			    1, ECC_F_TRAP, &cl1p->ch_err_tl1_logout);
1269 		}
1270 
1271 		/*
1272 		 * Check each of the remaining logout areas
1273 		 */
1274 		cpu_check_cpu_logout(i, NULL, 0, ECC_F_TRAP,
1275 		    CPU_PRIVATE_PTR(cp, chpr_fecctl0_logout));
1276 		cpu_check_cpu_logout(i, NULL, 0, ECC_C_TRAP,
1277 		    CPU_PRIVATE_PTR(cp, chpr_cecc_logout));
1278 		cpu_check_cpu_logout(i, NULL, 0, ECC_D_TRAP,
1279 		    CPU_PRIVATE_PTR(cp, chpr_async_logout));
1280 	}
1281 }
1282 
1283 /*
1284  * The fast_ecc_err handler transfers control here for UCU, UCC events.
1285  * Note that we flush Ecache twice, once in the fast_ecc_err handler to
1286  * flush the error that caused the UCU/UCC, then again here at the end to
1287  * flush the TL=1 trap handler code out of the Ecache, so we can minimize
1288  * the probability of getting a TL>1 Fast ECC trap when we're fielding
1289  * another Fast ECC trap.
1290  *
1291  * Cheetah+ also handles: TSCE: No additional processing required.
1292  * Panther adds L3_UCU and L3_UCC which are reported in AFSR_EXT.
1293  *
1294  * Note that the p_clo_flags input is only valid in cases where the
1295  * cpu_private struct is not yet initialized (since that is the only
1296  * time that information cannot be obtained from the logout struct.)
1297  */
1298 /*ARGSUSED*/
1299 void
1300 cpu_fast_ecc_error(struct regs *rp, ulong_t p_clo_flags)
1301 {
1302 	ch_cpu_logout_t *clop;
1303 	uint64_t ceen, nceen;
1304 
1305 	/*
1306 	 * Get the CPU log out info. If we can't find our CPU private
1307 	 * pointer, then we will have to make due without any detailed
1308 	 * logout information.
1309 	 */
1310 	if (CPU_PRIVATE(CPU) == NULL) {
1311 		clop = NULL;
1312 		ceen = p_clo_flags & EN_REG_CEEN;
1313 		nceen = p_clo_flags & EN_REG_NCEEN;
1314 	} else {
1315 		clop = CPU_PRIVATE_PTR(CPU, chpr_fecctl0_logout);
1316 		ceen = clop->clo_flags & EN_REG_CEEN;
1317 		nceen = clop->clo_flags & EN_REG_NCEEN;
1318 	}
1319 
1320 	cpu_log_fast_ecc_error((caddr_t)rp->r_pc,
1321 	    (rp->r_tstate & TSTATE_PRIV) ? 1 : 0, 0, ceen, nceen, clop);
1322 }
1323 
1324 /*
1325  * Log fast ecc error, called from either Fast ECC at TL=0 or Fast
1326  * ECC at TL>0.  Need to supply either a error register pointer or a
1327  * cpu logout structure pointer.
1328  */
1329 static void
1330 cpu_log_fast_ecc_error(caddr_t tpc, int priv, int tl, uint64_t ceen,
1331     uint64_t nceen, ch_cpu_logout_t *clop)
1332 {
1333 	struct async_flt *aflt;
1334 	ch_async_flt_t ch_flt;
1335 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1336 	char pr_reason[MAX_REASON_STRING];
1337 	ch_cpu_errors_t cpu_error_regs;
1338 
1339 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1340 	/*
1341 	 * If no cpu logout data, then we will have to make due without
1342 	 * any detailed logout information.
1343 	 */
1344 	if (clop == NULL) {
1345 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1346 		get_cpu_error_state(&cpu_error_regs);
1347 		set_cpu_error_state(&cpu_error_regs);
1348 		t_afar = cpu_error_regs.afar;
1349 		t_afsr = cpu_error_regs.afsr;
1350 		t_afsr_ext = cpu_error_regs.afsr_ext;
1351 #if defined(SERRANO)
1352 		ch_flt.afar2 = cpu_error_regs.afar2;
1353 #endif	/* SERRANO */
1354 	} else {
1355 		t_afar = clop->clo_data.chd_afar;
1356 		t_afsr = clop->clo_data.chd_afsr;
1357 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1358 #if defined(SERRANO)
1359 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1360 #endif	/* SERRANO */
1361 	}
1362 
1363 	/*
1364 	 * In order to simplify code, we maintain this afsr_errs
1365 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1366 	 * sticky bits.
1367 	 */
1368 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1369 	    (t_afsr & C_AFSR_ALL_ERRS);
1370 	pr_reason[0] = '\0';
1371 
1372 	/* Setup the async fault structure */
1373 	aflt = (struct async_flt *)&ch_flt;
1374 	aflt->flt_id = gethrtime_waitfree();
1375 	ch_flt.afsr_ext = t_afsr_ext;
1376 	ch_flt.afsr_errs = t_afsr_errs;
1377 	aflt->flt_stat = t_afsr;
1378 	aflt->flt_addr = t_afar;
1379 	aflt->flt_bus_id = getprocessorid();
1380 	aflt->flt_inst = CPU->cpu_id;
1381 	aflt->flt_pc = tpc;
1382 	aflt->flt_prot = AFLT_PROT_NONE;
1383 	aflt->flt_class = CPU_FAULT;
1384 	aflt->flt_priv = priv;
1385 	aflt->flt_tl = tl;
1386 	aflt->flt_status = ECC_F_TRAP;
1387 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1388 
1389 	/*
1390 	 * XXXX - Phenomenal hack to get around Solaris not getting all the
1391 	 * cmn_err messages out to the console.  The situation is a UCU (in
1392 	 * priv mode) which causes a WDU which causes a UE (on the retry).
1393 	 * The messages for the UCU and WDU are enqueued and then pulled off
1394 	 * the async queue via softint and syslogd starts to process them
1395 	 * but doesn't get them to the console.  The UE causes a panic, but
1396 	 * since the UCU/WDU messages are already in transit, those aren't
1397 	 * on the async queue.  The hack is to check if we have a matching
1398 	 * WDU event for the UCU, and if it matches, we're more than likely
1399 	 * going to panic with a UE, unless we're under protection.  So, we
1400 	 * check to see if we got a matching WDU event and if we're under
1401 	 * protection.
1402 	 *
1403 	 * For Cheetah/Cheetah+/Jaguar/Jalapeno, the sequence we care about
1404 	 * looks like this:
1405 	 *    UCU->WDU->UE
1406 	 * For Panther, it could look like either of these:
1407 	 *    UCU---->WDU->L3_WDU->UE
1408 	 *    L3_UCU->WDU->L3_WDU->UE
1409 	 */
1410 	if ((t_afsr_errs & (C_AFSR_UCU | C_AFSR_L3_UCU)) &&
1411 	    aflt->flt_panic == 0 && aflt->flt_priv != 0 &&
1412 	    curthread->t_ontrap == NULL && curthread->t_lofault == NULL) {
1413 		get_cpu_error_state(&cpu_error_regs);
1414 		aflt->flt_panic |= ((cpu_error_regs.afsr & C_AFSR_WDU) &&
1415 		    (cpu_error_regs.afar == t_afar));
1416 		aflt->flt_panic |= ((clop == NULL) &&
1417 		    (t_afsr_errs & C_AFSR_WDU));
1418 	}
1419 
1420 	/*
1421 	 * Queue events on the async event queue, one event per error bit.
1422 	 * If no events are queued or no Fast ECC events are on in the AFSR,
1423 	 * queue an event to complain.
1424 	 */
1425 	if (cpu_queue_events(&ch_flt, pr_reason, t_afsr_errs, clop) == 0 ||
1426 	    ((t_afsr_errs & (C_AFSR_FECC_ERRS | C_AFSR_EXT_FECC_ERRS)) == 0)) {
1427 		ch_flt.flt_type = CPU_INV_AFSR;
1428 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1429 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1430 		    aflt->flt_panic);
1431 	}
1432 
1433 	/*
1434 	 * Zero out + invalidate CPU logout.
1435 	 */
1436 	if (clop) {
1437 		bzero(clop, sizeof (ch_cpu_logout_t));
1438 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1439 	}
1440 
1441 	/*
1442 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1443 	 * or disrupting errors have happened.  We do this because if a
1444 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1445 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1446 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1447 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1448 	 * deferred or disrupting error happening between checking the AFSR and
1449 	 * enabling NCEEN/CEEN.
1450 	 *
1451 	 * Note: CEEN and NCEEN are only reenabled if they were on when trap
1452 	 * taken.
1453 	 */
1454 	set_error_enable(get_error_enable() | (nceen | ceen));
1455 	if (clear_errors(&ch_flt)) {
1456 		aflt->flt_panic |= ((ch_flt.afsr_errs &
1457 		    (C_AFSR_EXT_ASYNC_ERRS | C_AFSR_ASYNC_ERRS)) != 0);
1458 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1459 		    NULL);
1460 	}
1461 
1462 	/*
1463 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1464 	 * be logged as part of the panic flow.
1465 	 */
1466 	if (aflt->flt_panic)
1467 		fm_panic("%sError(s)", pr_reason);
1468 
1469 	/*
1470 	 * Flushing the Ecache here gets the part of the trap handler that
1471 	 * is run at TL=1 out of the Ecache.
1472 	 */
1473 	cpu_flush_ecache();
1474 }
1475 
1476 /*
1477  * This is called via sys_trap from pil15_interrupt code if the
1478  * corresponding entry in ch_err_tl1_pending is set.  Checks the
1479  * various ch_err_tl1_data structures for valid entries based on the bit
1480  * settings in the ch_err_tl1_flags entry of the structure.
1481  */
1482 /*ARGSUSED*/
1483 void
1484 cpu_tl1_error(struct regs *rp, int panic)
1485 {
1486 	ch_err_tl1_data_t *cl1p, cl1;
1487 	int i, ncl1ps;
1488 	uint64_t me_flags;
1489 	uint64_t ceen, nceen;
1490 
1491 	if (ch_err_tl1_paddrs[CPU->cpu_id] == 0) {
1492 		cl1p = &ch_err_tl1_data;
1493 		ncl1ps = 1;
1494 	} else if (CPU_PRIVATE(CPU) != NULL) {
1495 		cl1p = CPU_PRIVATE_PTR(CPU, chpr_tl1_err_data[0]);
1496 		ncl1ps = CH_ERR_TL1_TLMAX;
1497 	} else {
1498 		ncl1ps = 0;
1499 	}
1500 
1501 	for (i = 0; i < ncl1ps; i++, cl1p++) {
1502 		if (cl1p->ch_err_tl1_flags == 0)
1503 			continue;
1504 
1505 		/*
1506 		 * Grab a copy of the logout data and invalidate
1507 		 * the logout area.
1508 		 */
1509 		cl1 = *cl1p;
1510 		bzero(cl1p, sizeof (ch_err_tl1_data_t));
1511 		cl1p->ch_err_tl1_logout.clo_data.chd_afar = LOGOUT_INVALID;
1512 		me_flags = CH_ERR_ME_FLAGS(cl1.ch_err_tl1_flags);
1513 
1514 		/*
1515 		 * Log "first error" in ch_err_tl1_data.
1516 		 */
1517 		if (cl1.ch_err_tl1_flags & CH_ERR_FECC) {
1518 			ceen = get_error_enable() & EN_REG_CEEN;
1519 			nceen = get_error_enable() & EN_REG_NCEEN;
1520 			cpu_log_fast_ecc_error((caddr_t)cl1.ch_err_tl1_tpc, 1,
1521 			    1, ceen, nceen, &cl1.ch_err_tl1_logout);
1522 		}
1523 #if defined(CPU_IMP_L1_CACHE_PARITY)
1524 		if (cl1.ch_err_tl1_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1525 			cpu_parity_error(rp, cl1.ch_err_tl1_flags,
1526 			    (caddr_t)cl1.ch_err_tl1_tpc);
1527 		}
1528 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1529 
1530 		/*
1531 		 * Log "multiple events" in ch_err_tl1_data.  Note that
1532 		 * we don't read and clear the AFSR/AFAR in the TL>0 code
1533 		 * if the structure is busy, we just do the cache flushing
1534 		 * we have to do and then do the retry.  So the AFSR/AFAR
1535 		 * at this point *should* have some relevant info.  If there
1536 		 * are no valid errors in the AFSR, we'll assume they've
1537 		 * already been picked up and logged.  For I$/D$ parity,
1538 		 * we just log an event with an "Unknown" (NULL) TPC.
1539 		 */
1540 		if (me_flags & CH_ERR_FECC) {
1541 			ch_cpu_errors_t cpu_error_regs;
1542 			uint64_t t_afsr_errs;
1543 
1544 			/*
1545 			 * Get the error registers and see if there's
1546 			 * a pending error.  If not, don't bother
1547 			 * generating an "Invalid AFSR" error event.
1548 			 */
1549 			get_cpu_error_state(&cpu_error_regs);
1550 			t_afsr_errs = (cpu_error_regs.afsr_ext &
1551 			    C_AFSR_EXT_ALL_ERRS) |
1552 			    (cpu_error_regs.afsr & C_AFSR_ALL_ERRS);
1553 			if (t_afsr_errs != 0) {
1554 				ceen = get_error_enable() & EN_REG_CEEN;
1555 				nceen = get_error_enable() & EN_REG_NCEEN;
1556 				cpu_log_fast_ecc_error((caddr_t)NULL, 1,
1557 				    1, ceen, nceen, NULL);
1558 			}
1559 		}
1560 #if defined(CPU_IMP_L1_CACHE_PARITY)
1561 		if (me_flags & (CH_ERR_IPE | CH_ERR_DPE)) {
1562 			cpu_parity_error(rp, me_flags, (caddr_t)NULL);
1563 		}
1564 #endif	/* CPU_IMP_L1_CACHE_PARITY */
1565 	}
1566 }
1567 
1568 /*
1569  * Called from Fast ECC TL>0 handler in case of fatal error.
1570  * cpu_tl1_error should always find an associated ch_err_tl1_data structure,
1571  * but if we don't, we'll panic with something reasonable.
1572  */
1573 /*ARGSUSED*/
1574 void
1575 cpu_tl1_err_panic(struct regs *rp, ulong_t flags)
1576 {
1577 	cpu_tl1_error(rp, 1);
1578 	/*
1579 	 * Should never return, but just in case.
1580 	 */
1581 	fm_panic("Unsurvivable ECC Error at TL>0");
1582 }
1583 
1584 /*
1585  * The ce_err/ce_err_tl1 handlers transfer control here for CE, EMC, EDU:ST,
1586  * EDC, WDU, WDC, CPU, CPC, IVU, IVC events.
1587  * Disrupting errors controlled by NCEEN: EDU:ST, WDU, CPU, IVU
1588  * Disrupting errors controlled by CEEN: CE, EMC, EDC, WDC, CPC, IVC
1589  *
1590  * Cheetah+ also handles (No additional processing required):
1591  *    DUE, DTO, DBERR	(NCEEN controlled)
1592  *    THCE		(CEEN and ET_ECC_en controlled)
1593  *    TUE		(ET_ECC_en controlled)
1594  *
1595  * Panther further adds:
1596  *    IMU, L3_EDU, L3_WDU, L3_CPU		(NCEEN controlled)
1597  *    IMC, L3_EDC, L3_WDC, L3_CPC, L3_THCE	(CEEN controlled)
1598  *    TUE_SH, TUE		(NCEEN and L2_tag_ECC_en controlled)
1599  *    L3_TUE, L3_TUE_SH		(NCEEN and ET_ECC_en controlled)
1600  *    THCE			(CEEN and L2_tag_ECC_en controlled)
1601  *    L3_THCE			(CEEN and ET_ECC_en controlled)
1602  *
1603  * Note that the p_clo_flags input is only valid in cases where the
1604  * cpu_private struct is not yet initialized (since that is the only
1605  * time that information cannot be obtained from the logout struct.)
1606  */
1607 /*ARGSUSED*/
1608 void
1609 cpu_disrupting_error(struct regs *rp, ulong_t p_clo_flags)
1610 {
1611 	struct async_flt *aflt;
1612 	ch_async_flt_t ch_flt;
1613 	char pr_reason[MAX_REASON_STRING];
1614 	ch_cpu_logout_t *clop;
1615 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1616 	ch_cpu_errors_t cpu_error_regs;
1617 
1618 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1619 	/*
1620 	 * Get the CPU log out info. If we can't find our CPU private
1621 	 * pointer, then we will have to make due without any detailed
1622 	 * logout information.
1623 	 */
1624 	if (CPU_PRIVATE(CPU) == NULL) {
1625 		clop = NULL;
1626 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1627 		get_cpu_error_state(&cpu_error_regs);
1628 		set_cpu_error_state(&cpu_error_regs);
1629 		t_afar = cpu_error_regs.afar;
1630 		t_afsr = cpu_error_regs.afsr;
1631 		t_afsr_ext = cpu_error_regs.afsr_ext;
1632 #if defined(SERRANO)
1633 		ch_flt.afar2 = cpu_error_regs.afar2;
1634 #endif	/* SERRANO */
1635 	} else {
1636 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
1637 		t_afar = clop->clo_data.chd_afar;
1638 		t_afsr = clop->clo_data.chd_afsr;
1639 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1640 #if defined(SERRANO)
1641 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1642 #endif	/* SERRANO */
1643 	}
1644 
1645 	/*
1646 	 * In order to simplify code, we maintain this afsr_errs
1647 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1648 	 * sticky bits.
1649 	 */
1650 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1651 	    (t_afsr & C_AFSR_ALL_ERRS);
1652 
1653 	pr_reason[0] = '\0';
1654 	/* Setup the async fault structure */
1655 	aflt = (struct async_flt *)&ch_flt;
1656 	ch_flt.afsr_ext = t_afsr_ext;
1657 	ch_flt.afsr_errs = t_afsr_errs;
1658 	aflt->flt_stat = t_afsr;
1659 	aflt->flt_addr = t_afar;
1660 	aflt->flt_pc = (caddr_t)rp->r_pc;
1661 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1662 	aflt->flt_tl = 0;
1663 	aflt->flt_panic = C_AFSR_PANIC(t_afsr_errs);
1664 
1665 	/*
1666 	 * If this trap is a result of one of the errors not masked
1667 	 * by cpu_ce_not_deferred, we don't reenable CEEN. Instead
1668 	 * indicate that a timeout is to be set later.
1669 	 */
1670 	if (!(t_afsr_errs & (cpu_ce_not_deferred | cpu_ce_not_deferred_ext)) &&
1671 	    !aflt->flt_panic)
1672 		ch_flt.flt_trapped_ce = CE_CEEN_DEFER | CE_CEEN_TRAPPED;
1673 	else
1674 		ch_flt.flt_trapped_ce = CE_CEEN_NODEFER | CE_CEEN_TRAPPED;
1675 
1676 	/*
1677 	 * log the CE and clean up
1678 	 */
1679 	cpu_log_and_clear_ce(&ch_flt);
1680 
1681 	/*
1682 	 * We re-enable CEEN (if required) and check if any disrupting errors
1683 	 * have happened.  We do this because if a disrupting error had occurred
1684 	 * with CEEN off, the trap will not be taken when CEEN is re-enabled.
1685 	 * Note that CEEN works differently on Cheetah than on Spitfire.  Also,
1686 	 * we enable CEEN *before* checking the AFSR to avoid the small window
1687 	 * of a error happening between checking the AFSR and enabling CEEN.
1688 	 */
1689 	if (ch_flt.flt_trapped_ce & CE_CEEN_NODEFER)
1690 	    set_error_enable(get_error_enable() | EN_REG_CEEN);
1691 	if (clear_errors(&ch_flt)) {
1692 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
1693 		    NULL);
1694 	}
1695 
1696 	/*
1697 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
1698 	 * be logged as part of the panic flow.
1699 	 */
1700 	if (aflt->flt_panic)
1701 		fm_panic("%sError(s)", pr_reason);
1702 }
1703 
1704 /*
1705  * The async_err handler transfers control here for UE, EMU, EDU:BLD,
1706  * L3_EDU:BLD, TO, and BERR events.
1707  * Deferred errors controlled by NCEEN: UE, EMU, EDU:BLD, L3_EDU:BLD, TO, BERR
1708  *
1709  * Cheetah+: No additional errors handled.
1710  *
1711  * Note that the p_clo_flags input is only valid in cases where the
1712  * cpu_private struct is not yet initialized (since that is the only
1713  * time that information cannot be obtained from the logout struct.)
1714  */
1715 /*ARGSUSED*/
1716 void
1717 cpu_deferred_error(struct regs *rp, ulong_t p_clo_flags)
1718 {
1719 	ushort_t ttype, tl;
1720 	ch_async_flt_t ch_flt;
1721 	struct async_flt *aflt;
1722 	int trampolined = 0;
1723 	char pr_reason[MAX_REASON_STRING];
1724 	ch_cpu_logout_t *clop;
1725 	uint64_t ceen, clo_flags;
1726 	uint64_t log_afsr;
1727 	uint64_t t_afar, t_afsr, t_afsr_ext, t_afsr_errs;
1728 	ch_cpu_errors_t cpu_error_regs;
1729 	int expected = DDI_FM_ERR_UNEXPECTED;
1730 	ddi_acc_hdl_t *hp;
1731 
1732 	/*
1733 	 * We need to look at p_flag to determine if the thread detected an
1734 	 * error while dumping core.  We can't grab p_lock here, but it's ok
1735 	 * because we just need a consistent snapshot and we know that everyone
1736 	 * else will store a consistent set of bits while holding p_lock.  We
1737 	 * don't have to worry about a race because SDOCORE is set once prior
1738 	 * to doing i/o from the process's address space and is never cleared.
1739 	 */
1740 	uint_t pflag = ttoproc(curthread)->p_flag;
1741 
1742 	bzero(&ch_flt, sizeof (ch_async_flt_t));
1743 	/*
1744 	 * Get the CPU log out info. If we can't find our CPU private
1745 	 * pointer then we will have to make due without any detailed
1746 	 * logout information.
1747 	 */
1748 	if (CPU_PRIVATE(CPU) == NULL) {
1749 		clop = NULL;
1750 		ch_flt.flt_diag_data.chd_afar = LOGOUT_INVALID;
1751 		get_cpu_error_state(&cpu_error_regs);
1752 		set_cpu_error_state(&cpu_error_regs);
1753 		t_afar = cpu_error_regs.afar;
1754 		t_afsr = cpu_error_regs.afsr;
1755 		t_afsr_ext = cpu_error_regs.afsr_ext;
1756 #if defined(SERRANO)
1757 		ch_flt.afar2 = cpu_error_regs.afar2;
1758 #endif	/* SERRANO */
1759 		clo_flags = p_clo_flags;
1760 	} else {
1761 		clop = CPU_PRIVATE_PTR(CPU, chpr_async_logout);
1762 		t_afar = clop->clo_data.chd_afar;
1763 		t_afsr = clop->clo_data.chd_afsr;
1764 		t_afsr_ext = clop->clo_data.chd_afsr_ext;
1765 #if defined(SERRANO)
1766 		ch_flt.afar2 = clop->clo_data.chd_afar2;
1767 #endif	/* SERRANO */
1768 		clo_flags = clop->clo_flags;
1769 	}
1770 
1771 	/*
1772 	 * In order to simplify code, we maintain this afsr_errs
1773 	 * variable which holds the aggregate of AFSR and AFSR_EXT
1774 	 * sticky bits.
1775 	 */
1776 	t_afsr_errs = (t_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
1777 	    (t_afsr & C_AFSR_ALL_ERRS);
1778 	pr_reason[0] = '\0';
1779 
1780 	/*
1781 	 * Grab information encoded into our clo_flags field.
1782 	 */
1783 	ceen = clo_flags & EN_REG_CEEN;
1784 	tl = (clo_flags & CLO_FLAGS_TL_MASK) >> CLO_FLAGS_TL_SHIFT;
1785 	ttype = (clo_flags & CLO_FLAGS_TT_MASK) >> CLO_FLAGS_TT_SHIFT;
1786 
1787 	/*
1788 	 * handle the specific error
1789 	 */
1790 	aflt = (struct async_flt *)&ch_flt;
1791 	aflt->flt_id = gethrtime_waitfree();
1792 	aflt->flt_bus_id = getprocessorid();
1793 	aflt->flt_inst = CPU->cpu_id;
1794 	ch_flt.afsr_ext = t_afsr_ext;
1795 	ch_flt.afsr_errs = t_afsr_errs;
1796 	aflt->flt_stat = t_afsr;
1797 	aflt->flt_addr = t_afar;
1798 	aflt->flt_pc = (caddr_t)rp->r_pc;
1799 	aflt->flt_prot = AFLT_PROT_NONE;
1800 	aflt->flt_class = CPU_FAULT;
1801 	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ?  1 : 0;
1802 	aflt->flt_tl = (uchar_t)tl;
1803 	aflt->flt_panic = ((tl != 0) || (aft_testfatal != 0) ||
1804 	    C_AFSR_PANIC(t_afsr_errs));
1805 	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;
1806 	aflt->flt_status = ((ttype == T_DATA_ERROR) ? ECC_D_TRAP : ECC_I_TRAP);
1807 
1808 	/*
1809 	 * If the trap occurred in privileged mode at TL=0, we need to check to
1810 	 * see if we were executing in the kernel under on_trap() or t_lofault
1811 	 * protection.  If so, modify the saved registers so that we return
1812 	 * from the trap to the appropriate trampoline routine.
1813 	 */
1814 	if (aflt->flt_priv && tl == 0) {
1815 		if (curthread->t_ontrap != NULL) {
1816 			on_trap_data_t *otp = curthread->t_ontrap;
1817 
1818 			if (otp->ot_prot & OT_DATA_EC) {
1819 				aflt->flt_prot = AFLT_PROT_EC;
1820 				otp->ot_trap |= OT_DATA_EC;
1821 				rp->r_pc = otp->ot_trampoline;
1822 				rp->r_npc = rp->r_pc + 4;
1823 				trampolined = 1;
1824 			}
1825 
1826 			if ((t_afsr & (C_AFSR_TO | C_AFSR_BERR)) &&
1827 			    (otp->ot_prot & OT_DATA_ACCESS)) {
1828 				aflt->flt_prot = AFLT_PROT_ACCESS;
1829 				otp->ot_trap |= OT_DATA_ACCESS;
1830 				rp->r_pc = otp->ot_trampoline;
1831 				rp->r_npc = rp->r_pc + 4;
1832 				trampolined = 1;
1833 				/*
1834 				 * for peeks and caut_gets errors are expected
1835 				 */
1836 				hp = (ddi_acc_hdl_t *)otp->ot_handle;
1837 				if (!hp)
1838 					expected = DDI_FM_ERR_PEEK;
1839 				else if (hp->ah_acc.devacc_attr_access ==
1840 				    DDI_CAUTIOUS_ACC)
1841 					expected = DDI_FM_ERR_EXPECTED;
1842 			}
1843 
1844 		} else if (curthread->t_lofault) {
1845 			aflt->flt_prot = AFLT_PROT_COPY;
1846 			rp->r_g1 = EFAULT;
1847 			rp->r_pc = curthread->t_lofault;
1848 			rp->r_npc = rp->r_pc + 4;
1849 			trampolined = 1;
1850 		}
1851 	}
1852 
1853 	/*
1854 	 * If we're in user mode or we're doing a protected copy, we either
1855 	 * want the ASTON code below to send a signal to the user process
1856 	 * or we want to panic if aft_panic is set.
1857 	 *
1858 	 * If we're in privileged mode and we're not doing a copy, then we
1859 	 * need to check if we've trampolined.  If we haven't trampolined,
1860 	 * we should panic.
1861 	 */
1862 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
1863 		if (t_afsr_errs &
1864 		    ((C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS) &
1865 		    ~(C_AFSR_BERR | C_AFSR_TO)))
1866 			aflt->flt_panic |= aft_panic;
1867 	} else if (!trampolined) {
1868 			aflt->flt_panic = 1;
1869 	}
1870 
1871 	/*
1872 	 * If we've trampolined due to a privileged TO or BERR, or if an
1873 	 * unprivileged TO or BERR occurred, we don't want to enqueue an
1874 	 * event for that TO or BERR.  Queue all other events (if any) besides
1875 	 * the TO/BERR.  Since we may not be enqueing any events, we need to
1876 	 * ignore the number of events queued.  If we haven't trampolined due
1877 	 * to a TO or BERR, just enqueue events normally.
1878 	 */
1879 	log_afsr = t_afsr_errs;
1880 	if (trampolined) {
1881 		log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1882 	} else if (!aflt->flt_priv) {
1883 		/*
1884 		 * User mode, suppress messages if
1885 		 * cpu_berr_to_verbose is not set.
1886 		 */
1887 		if (!cpu_berr_to_verbose)
1888 			log_afsr &= ~(C_AFSR_TO | C_AFSR_BERR);
1889 	}
1890 
1891 	/*
1892 	 * Log any errors that occurred
1893 	 */
1894 	if (((log_afsr &
1895 		((C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS) & ~C_AFSR_ME)) &&
1896 		cpu_queue_events(&ch_flt, pr_reason, log_afsr, clop) == 0) ||
1897 		(t_afsr_errs &
1898 		(C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) == 0) {
1899 		ch_flt.flt_type = CPU_INV_AFSR;
1900 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
1901 		    (void *)&ch_flt, sizeof (ch_async_flt_t), ue_queue,
1902 		    aflt->flt_panic);
1903 	}
1904 
1905 	/*
1906 	 * Zero out + invalidate CPU logout.
1907 	 */
1908 	if (clop) {
1909 		bzero(clop, sizeof (ch_cpu_logout_t));
1910 		clop->clo_data.chd_afar = LOGOUT_INVALID;
1911 	}
1912 
1913 #if defined(JALAPENO) || defined(SERRANO)
1914 	/*
1915 	 * UE/RUE/BERR/TO: Call our bus nexus friends to check for
1916 	 * IO errors that may have resulted in this trap.
1917 	 */
1918 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE|C_AFSR_TO|C_AFSR_BERR)) {
1919 		cpu_run_bus_error_handlers(aflt, expected);
1920 	}
1921 
1922 	/*
1923 	 * UE/RUE: If UE or RUE is in memory, we need to flush the bad
1924 	 * line from the Ecache.  We also need to query the bus nexus for
1925 	 * fatal errors.  Attempts to do diagnostic read on caches may
1926 	 * introduce more errors (especially when the module is bad).
1927 	 */
1928 	if (t_afsr & (C_AFSR_UE|C_AFSR_RUE)) {
1929 		/*
1930 		 * Ask our bus nexus friends if they have any fatal errors.  If
1931 		 * so, they will log appropriate error messages.
1932 		 */
1933 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1934 			aflt->flt_panic = 1;
1935 
1936 		/*
1937 		 * We got a UE or RUE and are panicking, save the fault PA in
1938 		 * a known location so that the platform specific panic code
1939 		 * can check for copyback errors.
1940 		 */
1941 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1942 			panic_aflt = *aflt;
1943 		}
1944 	}
1945 
1946 	/*
1947 	 * Flush Ecache line or entire Ecache
1948 	 */
1949 	if (t_afsr & (C_AFSR_UE | C_AFSR_RUE | C_AFSR_EDU | C_AFSR_BERR))
1950 		cpu_error_ecache_flush(&ch_flt);
1951 #else /* JALAPENO || SERRANO */
1952 	/*
1953 	 * UE/BERR/TO: Call our bus nexus friends to check for
1954 	 * IO errors that may have resulted in this trap.
1955 	 */
1956 	if (t_afsr & (C_AFSR_UE|C_AFSR_TO|C_AFSR_BERR)) {
1957 		cpu_run_bus_error_handlers(aflt, expected);
1958 	}
1959 
1960 	/*
1961 	 * UE: If the UE is in memory, we need to flush the bad
1962 	 * line from the Ecache.  We also need to query the bus nexus for
1963 	 * fatal errors.  Attempts to do diagnostic read on caches may
1964 	 * introduce more errors (especially when the module is bad).
1965 	 */
1966 	if (t_afsr & C_AFSR_UE) {
1967 		/*
1968 		 * Ask our legacy bus nexus friends if they have any fatal
1969 		 * errors.  If so, they will log appropriate error messages.
1970 		 */
1971 		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL)
1972 			aflt->flt_panic = 1;
1973 
1974 		/*
1975 		 * We got a UE and are panicking, save the fault PA in a known
1976 		 * location so that the platform specific panic code can check
1977 		 * for copyback errors.
1978 		 */
1979 		if (aflt->flt_panic && cpu_flt_in_memory(&ch_flt, C_AFSR_UE)) {
1980 			panic_aflt = *aflt;
1981 		}
1982 	}
1983 
1984 	/*
1985 	 * Flush Ecache line or entire Ecache
1986 	 */
1987 	if (t_afsr_errs &
1988 	    (C_AFSR_UE | C_AFSR_EDU | C_AFSR_BERR | C_AFSR_L3_EDU))
1989 		cpu_error_ecache_flush(&ch_flt);
1990 #endif /* JALAPENO || SERRANO */
1991 
1992 	/*
1993 	 * We carefully re-enable NCEEN and CEEN and then check if any deferred
1994 	 * or disrupting errors have happened.  We do this because if a
1995 	 * deferred or disrupting error had occurred with NCEEN/CEEN off, the
1996 	 * trap will not be taken when NCEEN/CEEN is re-enabled.  Note that
1997 	 * CEEN works differently on Cheetah than on Spitfire.  Also, we enable
1998 	 * NCEEN/CEEN *before* checking the AFSR to avoid the small window of a
1999 	 * deferred or disrupting error happening between checking the AFSR and
2000 	 * enabling NCEEN/CEEN.
2001 	 *
2002 	 * Note: CEEN reenabled only if it was on when trap taken.
2003 	 */
2004 	set_error_enable(get_error_enable() | (EN_REG_NCEEN | ceen));
2005 	if (clear_errors(&ch_flt)) {
2006 		/*
2007 		 * Check for secondary errors, and avoid panicking if we
2008 		 * have them
2009 		 */
2010 		if (cpu_check_secondary_errors(&ch_flt, t_afsr_errs,
2011 		    t_afar) == 0) {
2012 			aflt->flt_panic |= ((ch_flt.afsr_errs &
2013 			    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS)) != 0);
2014 		}
2015 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
2016 		    NULL);
2017 	}
2018 
2019 	/*
2020 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2021 	 * be logged as part of the panic flow.
2022 	 */
2023 	if (aflt->flt_panic)
2024 		fm_panic("%sError(s)", pr_reason);
2025 
2026 	/*
2027 	 * If we queued an error and we are going to return from the trap and
2028 	 * the error was in user mode or inside of a copy routine, set AST flag
2029 	 * so the queue will be drained before returning to user mode.  The
2030 	 * AST processing will also act on our failure policy.
2031 	 */
2032 	if (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY) {
2033 		int pcb_flag = 0;
2034 
2035 		if (t_afsr_errs &
2036 		    (C_AFSR_ASYNC_ERRS | C_AFSR_EXT_ASYNC_ERRS &
2037 		    ~(C_AFSR_BERR | C_AFSR_TO)))
2038 			pcb_flag |= ASYNC_HWERR;
2039 
2040 		if (t_afsr & C_AFSR_BERR)
2041 			pcb_flag |= ASYNC_BERR;
2042 
2043 		if (t_afsr & C_AFSR_TO)
2044 			pcb_flag |= ASYNC_BTO;
2045 
2046 		ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
2047 		aston(curthread);
2048 	}
2049 }
2050 
2051 #if defined(CPU_IMP_L1_CACHE_PARITY)
2052 /*
2053  * Handling of data and instruction parity errors (traps 0x71, 0x72).
2054  *
2055  * For Panther, P$ data parity errors during floating point load hits
2056  * are also detected (reported as TT 0x71) and handled by this trap
2057  * handler.
2058  *
2059  * AFSR/AFAR are not set for parity errors, only TPC (a virtual address)
2060  * is available.
2061  */
2062 /*ARGSUSED*/
2063 void
2064 cpu_parity_error(struct regs *rp, uint_t flags, caddr_t tpc)
2065 {
2066 	ch_async_flt_t ch_flt;
2067 	struct async_flt *aflt;
2068 	uchar_t tl = ((flags & CH_ERR_TL) != 0);
2069 	uchar_t iparity = ((flags & CH_ERR_IPE) != 0);
2070 	uchar_t panic = ((flags & CH_ERR_PANIC) != 0);
2071 	char *error_class;
2072 
2073 	/*
2074 	 * Log the error.
2075 	 * For icache parity errors the fault address is the trap PC.
2076 	 * For dcache/pcache parity errors the instruction would have to
2077 	 * be decoded to determine the address and that isn't possible
2078 	 * at high PIL.
2079 	 */
2080 	bzero(&ch_flt, sizeof (ch_async_flt_t));
2081 	aflt = (struct async_flt *)&ch_flt;
2082 	aflt->flt_id = gethrtime_waitfree();
2083 	aflt->flt_bus_id = getprocessorid();
2084 	aflt->flt_inst = CPU->cpu_id;
2085 	aflt->flt_pc = tpc;
2086 	aflt->flt_addr = iparity ? (uint64_t)tpc : AFLT_INV_ADDR;
2087 	aflt->flt_prot = AFLT_PROT_NONE;
2088 	aflt->flt_class = CPU_FAULT;
2089 	aflt->flt_priv = (tl || (rp->r_tstate & TSTATE_PRIV)) ?  1 : 0;
2090 	aflt->flt_tl = tl;
2091 	aflt->flt_panic = panic;
2092 	aflt->flt_status = iparity ? ECC_IP_TRAP : ECC_DP_TRAP;
2093 	ch_flt.flt_type = iparity ? CPU_IC_PARITY : CPU_DC_PARITY;
2094 
2095 	if (iparity) {
2096 		cpu_icache_parity_info(&ch_flt);
2097 		if (ch_flt.parity_data.ipe.cpl_off != -1)
2098 			error_class = FM_EREPORT_CPU_USIII_IDSPE;
2099 		else if (ch_flt.parity_data.ipe.cpl_way != -1)
2100 			error_class = FM_EREPORT_CPU_USIII_ITSPE;
2101 		else
2102 			error_class = FM_EREPORT_CPU_USIII_IPE;
2103 		aflt->flt_payload = FM_EREPORT_PAYLOAD_ICACHE_PE;
2104 	} else {
2105 		cpu_dcache_parity_info(&ch_flt);
2106 		if (ch_flt.parity_data.dpe.cpl_off != -1)
2107 			error_class = FM_EREPORT_CPU_USIII_DDSPE;
2108 		else if (ch_flt.parity_data.dpe.cpl_way != -1)
2109 			error_class = FM_EREPORT_CPU_USIII_DTSPE;
2110 		else
2111 			error_class = FM_EREPORT_CPU_USIII_DPE;
2112 		aflt->flt_payload = FM_EREPORT_PAYLOAD_DCACHE_PE;
2113 		/*
2114 		 * For panther we also need to check the P$ for parity errors.
2115 		 */
2116 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2117 			cpu_pcache_parity_info(&ch_flt);
2118 			if (ch_flt.parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2119 				error_class = FM_EREPORT_CPU_USIII_PDSPE;
2120 				aflt->flt_payload =
2121 				    FM_EREPORT_PAYLOAD_PCACHE_PE;
2122 			}
2123 		}
2124 	}
2125 
2126 	cpu_errorq_dispatch(error_class, (void *)&ch_flt,
2127 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
2128 
2129 	if (iparity) {
2130 		/*
2131 		 * Invalidate entire I$.
2132 		 * This is required due to the use of diagnostic ASI
2133 		 * accesses that may result in a loss of I$ coherency.
2134 		 */
2135 		if (cache_boot_state & DCU_IC) {
2136 			flush_icache();
2137 		}
2138 		/*
2139 		 * According to section P.3.1 of the Panther PRM, we
2140 		 * need to do a little more for recovery on those
2141 		 * CPUs after encountering an I$ parity error.
2142 		 */
2143 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2144 			flush_ipb();
2145 			correct_dcache_parity(dcache_size,
2146 			    dcache_linesize);
2147 			flush_pcache();
2148 		}
2149 	} else {
2150 		/*
2151 		 * Since the valid bit is ignored when checking parity the
2152 		 * D$ data and tag must also be corrected.  Set D$ data bits
2153 		 * to zero and set utag to 0, 1, 2, 3.
2154 		 */
2155 		correct_dcache_parity(dcache_size, dcache_linesize);
2156 
2157 		/*
2158 		 * According to section P.3.3 of the Panther PRM, we
2159 		 * need to do a little more for recovery on those
2160 		 * CPUs after encountering a D$ or P$ parity error.
2161 		 *
2162 		 * As far as clearing P$ parity errors, it is enough to
2163 		 * simply invalidate all entries in the P$ since P$ parity
2164 		 * error traps are only generated for floating point load
2165 		 * hits.
2166 		 */
2167 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2168 			flush_icache();
2169 			flush_ipb();
2170 			flush_pcache();
2171 		}
2172 	}
2173 
2174 	/*
2175 	 * Invalidate entire D$ if it was enabled.
2176 	 * This is done to avoid stale data in the D$ which might
2177 	 * occur with the D$ disabled and the trap handler doing
2178 	 * stores affecting lines already in the D$.
2179 	 */
2180 	if (cache_boot_state & DCU_DC) {
2181 		flush_dcache();
2182 	}
2183 
2184 	/*
2185 	 * Restore caches to their bootup state.
2186 	 */
2187 	set_dcu(get_dcu() | cache_boot_state);
2188 
2189 	/*
2190 	 * Panic here if aflt->flt_panic has been set.  Enqueued errors will
2191 	 * be logged as part of the panic flow.
2192 	 */
2193 	if (aflt->flt_panic)
2194 		fm_panic("%sError(s)", iparity ? "IPE " : "DPE ");
2195 
2196 	/*
2197 	 * If this error occurred at TL>0 then flush the E$ here to reduce
2198 	 * the chance of getting an unrecoverable Fast ECC error.  This
2199 	 * flush will evict the part of the parity trap handler that is run
2200 	 * at TL>1.
2201 	 */
2202 	if (tl) {
2203 		cpu_flush_ecache();
2204 	}
2205 }
2206 
2207 /*
2208  * On an I$ parity error, mark the appropriate entries in the ch_async_flt_t
2209  * to indicate which portions of the captured data should be in the ereport.
2210  */
2211 void
2212 cpu_async_log_ic_parity_err(ch_async_flt_t *ch_flt)
2213 {
2214 	int way = ch_flt->parity_data.ipe.cpl_way;
2215 	int offset = ch_flt->parity_data.ipe.cpl_off;
2216 	int tag_index;
2217 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2218 
2219 
2220 	if ((offset != -1) || (way != -1)) {
2221 		/*
2222 		 * Parity error in I$ tag or data
2223 		 */
2224 		tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2225 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2226 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2227 			    PN_ICIDX_TO_WAY(tag_index);
2228 		else
2229 			ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2230 			    CH_ICIDX_TO_WAY(tag_index);
2231 		ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2232 		    IC_LOGFLAG_MAGIC;
2233 	} else {
2234 		/*
2235 		 * Parity error was not identified.
2236 		 * Log tags and data for all ways.
2237 		 */
2238 		for (way = 0; way < CH_ICACHE_NWAY; way++) {
2239 			tag_index = ch_flt->parity_data.ipe.cpl_ic[way].ic_idx;
2240 			if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
2241 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2242 				    PN_ICIDX_TO_WAY(tag_index);
2243 			else
2244 				ch_flt->parity_data.ipe.cpl_ic[way].ic_way =
2245 				    CH_ICIDX_TO_WAY(tag_index);
2246 			ch_flt->parity_data.ipe.cpl_ic[way].ic_logflag =
2247 			    IC_LOGFLAG_MAGIC;
2248 		}
2249 	}
2250 }
2251 
2252 /*
2253  * On an D$ parity error, mark the appropriate entries in the ch_async_flt_t
2254  * to indicate which portions of the captured data should be in the ereport.
2255  */
2256 void
2257 cpu_async_log_dc_parity_err(ch_async_flt_t *ch_flt)
2258 {
2259 	int way = ch_flt->parity_data.dpe.cpl_way;
2260 	int offset = ch_flt->parity_data.dpe.cpl_off;
2261 	int tag_index;
2262 
2263 	if (offset != -1) {
2264 		/*
2265 		 * Parity error in D$ or P$ data array.
2266 		 *
2267 		 * First check to see whether the parity error is in D$ or P$
2268 		 * since P$ data parity errors are reported in Panther using
2269 		 * the same trap.
2270 		 */
2271 		if (ch_flt->parity_data.dpe.cpl_cache == CPU_PC_PARITY) {
2272 			tag_index = ch_flt->parity_data.dpe.cpl_pc[way].pc_idx;
2273 			ch_flt->parity_data.dpe.cpl_pc[way].pc_way =
2274 			    CH_PCIDX_TO_WAY(tag_index);
2275 			ch_flt->parity_data.dpe.cpl_pc[way].pc_logflag =
2276 			    PC_LOGFLAG_MAGIC;
2277 		} else {
2278 			tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2279 			ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2280 			    CH_DCIDX_TO_WAY(tag_index);
2281 			ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2282 			    DC_LOGFLAG_MAGIC;
2283 		}
2284 	} else if (way != -1) {
2285 		/*
2286 		 * Parity error in D$ tag.
2287 		 */
2288 		tag_index = ch_flt->parity_data.dpe.cpl_dc[way].dc_idx;
2289 		ch_flt->parity_data.dpe.cpl_dc[way].dc_way =
2290 		    CH_DCIDX_TO_WAY(tag_index);
2291 		ch_flt->parity_data.dpe.cpl_dc[way].dc_logflag =
2292 		    DC_LOGFLAG_MAGIC;
2293 	}
2294 }
2295 #endif	/* CPU_IMP_L1_CACHE_PARITY */
2296 
2297 /*
2298  * The cpu_async_log_err() function is called via the [uc]e_drain() function to
2299  * post-process CPU events that are dequeued.  As such, it can be invoked
2300  * from softint context, from AST processing in the trap() flow, or from the
2301  * panic flow.  We decode the CPU-specific data, and take appropriate actions.
2302  * Historically this entry point was used to log the actual cmn_err(9F) text;
2303  * now with FMA it is used to prepare 'flt' to be converted into an ereport.
2304  * With FMA this function now also returns a flag which indicates to the
2305  * caller whether the ereport should be posted (1) or suppressed (0).
2306  */
2307 static int
2308 cpu_async_log_err(void *flt, errorq_elem_t *eqep)
2309 {
2310 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)flt;
2311 	struct async_flt *aflt = (struct async_flt *)flt;
2312 	uint64_t errors;
2313 
2314 	switch (ch_flt->flt_type) {
2315 	case CPU_INV_AFSR:
2316 		/*
2317 		 * If it is a disrupting trap and the AFSR is zero, then
2318 		 * the event has probably already been noted. Do not post
2319 		 * an ereport.
2320 		 */
2321 		if ((aflt->flt_status & ECC_C_TRAP) &&
2322 		    (!(aflt->flt_stat & C_AFSR_MASK)))
2323 			return (0);
2324 		else
2325 			return (1);
2326 	case CPU_TO:
2327 	case CPU_BERR:
2328 	case CPU_FATAL:
2329 	case CPU_FPUERR:
2330 		return (1);
2331 
2332 	case CPU_UE_ECACHE_RETIRE:
2333 		cpu_log_err(aflt);
2334 		cpu_page_retire(ch_flt);
2335 		return (1);
2336 
2337 	/*
2338 	 * Cases where we may want to suppress logging or perform
2339 	 * extended diagnostics.
2340 	 */
2341 	case CPU_CE:
2342 	case CPU_EMC:
2343 		/*
2344 		 * We want to skip logging and further classification
2345 		 * only if ALL the following conditions are true:
2346 		 *
2347 		 *	1. There is only one error
2348 		 *	2. That error is a correctable memory error
2349 		 *	3. The error is caused by the memory scrubber (in
2350 		 *	   which case the error will have occurred under
2351 		 *	   on_trap protection)
2352 		 *	4. The error is on a retired page
2353 		 *
2354 		 * Note: AFLT_PROT_EC is used places other than the memory
2355 		 * scrubber.  However, none of those errors should occur
2356 		 * on a retired page.
2357 		 */
2358 		if ((ch_flt->afsr_errs &
2359 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_CE &&
2360 		    aflt->flt_prot == AFLT_PROT_EC) {
2361 
2362 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2363 			    if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2364 
2365 				/*
2366 				 * Since we're skipping logging, we'll need
2367 				 * to schedule the re-enabling of CEEN
2368 				 */
2369 				(void) timeout(cpu_delayed_check_ce_errors,
2370 				    (void *)(uintptr_t)aflt->flt_inst,
2371 				    drv_usectohz((clock_t)cpu_ceen_delay_secs
2372 						 * MICROSEC));
2373 			    }
2374 			    return (0);
2375 			}
2376 		}
2377 
2378 		/*
2379 		 * Perform/schedule further classification actions, but
2380 		 * only if the page is healthy (we don't want bad
2381 		 * pages inducing too much diagnostic activity).  If we could
2382 		 * not find a page pointer then we also skip this.  If
2383 		 * ce_scrub_xdiag_recirc returns nonzero then it has chosen
2384 		 * to copy and recirculate the event (for further diagnostics)
2385 		 * and we should not proceed to log it here.
2386 		 *
2387 		 * This must be the last step here before the cpu_log_err()
2388 		 * below - if an event recirculates cpu_ce_log_err() will
2389 		 * not call the current function but just proceed directly
2390 		 * to cpu_ereport_post after the cpu_log_err() avoided below.
2391 		 *
2392 		 * Note: Check cpu_impl_async_log_err if changing this
2393 		 */
2394 		if (page_retire_check(aflt->flt_addr, &errors) == EINVAL) {
2395 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2396 			    CE_XDIAG_SKIP_NOPP);
2397 		} else {
2398 			if (errors != PR_OK) {
2399 				CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
2400 				    CE_XDIAG_SKIP_PAGEDET);
2401 			} else if (ce_scrub_xdiag_recirc(aflt, ce_queue, eqep,
2402 			    offsetof(ch_async_flt_t, cmn_asyncflt))) {
2403 				return (0);
2404 			}
2405 		}
2406 		/*FALLTHRU*/
2407 
2408 	/*
2409 	 * Cases where we just want to report the error and continue.
2410 	 */
2411 	case CPU_CE_ECACHE:
2412 	case CPU_UE_ECACHE:
2413 	case CPU_IV:
2414 	case CPU_ORPH:
2415 		cpu_log_err(aflt);
2416 		return (1);
2417 
2418 	/*
2419 	 * Cases where we want to fall through to handle panicking.
2420 	 */
2421 	case CPU_UE:
2422 		/*
2423 		 * We want to skip logging in the same conditions as the
2424 		 * CE case.  In addition, we want to make sure we're not
2425 		 * panicking.
2426 		 */
2427 		if (!panicstr && (ch_flt->afsr_errs &
2428 		    (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) == C_AFSR_UE &&
2429 		    aflt->flt_prot == AFLT_PROT_EC) {
2430 			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
2431 				/* Zero the address to clear the error */
2432 				softcall(ecc_page_zero, (void *)aflt->flt_addr);
2433 				return (0);
2434 			}
2435 		}
2436 		cpu_log_err(aflt);
2437 		break;
2438 
2439 	default:
2440 		/*
2441 		 * If the us3_common.c code doesn't know the flt_type, it may
2442 		 * be an implementation-specific code.  Call into the impldep
2443 		 * backend to find out what to do: if it tells us to continue,
2444 		 * break and handle as if falling through from a UE; if not,
2445 		 * the impldep backend has handled the error and we're done.
2446 		 */
2447 		switch (cpu_impl_async_log_err(flt, eqep)) {
2448 		case CH_ASYNC_LOG_DONE:
2449 			return (1);
2450 		case CH_ASYNC_LOG_RECIRC:
2451 			return (0);
2452 		case CH_ASYNC_LOG_CONTINUE:
2453 			break; /* continue on to handle UE-like error */
2454 		default:
2455 			cmn_err(CE_WARN, "discarding error 0x%p with "
2456 			    "invalid fault type (0x%x)",
2457 			    (void *)aflt, ch_flt->flt_type);
2458 			return (0);
2459 		}
2460 	}
2461 
2462 	/* ... fall through from the UE case */
2463 
2464 	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
2465 		if (!panicstr) {
2466 			cpu_page_retire(ch_flt);
2467 		} else {
2468 			/*
2469 			 * Clear UEs on panic so that we don't
2470 			 * get haunted by them during panic or
2471 			 * after reboot
2472 			 */
2473 			cpu_clearphys(aflt);
2474 			(void) clear_errors(NULL);
2475 		}
2476 	}
2477 
2478 	return (1);
2479 }
2480 
2481 /*
2482  * Retire the bad page that may contain the flushed error.
2483  */
2484 void
2485 cpu_page_retire(ch_async_flt_t *ch_flt)
2486 {
2487 	struct async_flt *aflt = (struct async_flt *)ch_flt;
2488 	(void) page_retire(aflt->flt_addr, PR_UE);
2489 }
2490 
2491 /*
2492  * The cpu_log_err() function is called by cpu_async_log_err() to perform the
2493  * generic event post-processing for correctable and uncorrectable memory,
2494  * E$, and MTag errors.  Historically this entry point was used to log bits of
2495  * common cmn_err(9F) text; now with FMA it is used to prepare 'flt' to be
2496  * converted into an ereport.  In addition, it transmits the error to any
2497  * platform-specific service-processor FRU logging routines, if available.
2498  */
2499 void
2500 cpu_log_err(struct async_flt *aflt)
2501 {
2502 	char unum[UNUM_NAMLEN];
2503 	int len = 0;
2504 	int synd_status, synd_code, afar_status;
2505 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
2506 
2507 	/*
2508 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
2509 	 * For Panther, L2$ is not external, so we don't want to
2510 	 * generate an E$ unum for those errors.
2511 	 */
2512 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
2513 		if (ch_flt->flt_bit & C_AFSR_EXT_L3_ERRS)
2514 			aflt->flt_status |= ECC_ECACHE;
2515 	} else {
2516 		if (ch_flt->flt_bit & C_AFSR_ECACHE)
2517 			aflt->flt_status |= ECC_ECACHE;
2518 	}
2519 
2520 	/*
2521 	 * Determine syndrome status.
2522 	 */
2523 	synd_status = afsr_to_synd_status(aflt->flt_inst,
2524 	    ch_flt->afsr_errs, ch_flt->flt_bit);
2525 
2526 	/*
2527 	 * Determine afar status.
2528 	 */
2529 	if (pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT))
2530 		afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
2531 				ch_flt->flt_bit);
2532 	else
2533 		afar_status = AFLT_STAT_INVALID;
2534 
2535 	/*
2536 	 * If afar status is not invalid do a unum lookup.
2537 	 */
2538 	if (afar_status != AFLT_STAT_INVALID) {
2539 		(void) cpu_get_mem_unum_aflt(synd_status, aflt, unum,
2540 			UNUM_NAMLEN, &len);
2541 	} else {
2542 		unum[0] = '\0';
2543 	}
2544 
2545 	synd_code = synd_to_synd_code(synd_status,
2546 	    aflt->flt_synd, ch_flt->flt_bit);
2547 
2548 	/*
2549 	 * Do not send the fruid message (plat_ecc_error_data_t)
2550 	 * to the SC if it can handle the enhanced error information
2551 	 * (plat_ecc_error2_data_t) or when the tunable
2552 	 * ecc_log_fruid_enable is set to 0.
2553 	 */
2554 
2555 	if (&plat_ecc_capability_sc_get &&
2556 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR_MESSAGE)) {
2557 		if (&plat_log_fruid_error)
2558 			plat_log_fruid_error(synd_code, aflt, unum,
2559 			    ch_flt->flt_bit);
2560 	}
2561 
2562 	if (aflt->flt_func != NULL)
2563 		aflt->flt_func(aflt, unum);
2564 
2565 	if (afar_status != AFLT_STAT_INVALID)
2566 		cpu_log_diag_info(ch_flt);
2567 
2568 	/*
2569 	 * If we have a CEEN error , we do not reenable CEEN until after
2570 	 * we exit the trap handler. Otherwise, another error may
2571 	 * occur causing the handler to be entered recursively.
2572 	 * We set a timeout to trigger in cpu_ceen_delay_secs seconds,
2573 	 * to try and ensure that the CPU makes progress in the face
2574 	 * of a CE storm.
2575 	 */
2576 	if (ch_flt->flt_trapped_ce & CE_CEEN_DEFER) {
2577 		(void) timeout(cpu_delayed_check_ce_errors,
2578 		    (void *)(uintptr_t)aflt->flt_inst,
2579 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
2580 	}
2581 }
2582 
2583 /*
2584  * Invoked by error_init() early in startup and therefore before
2585  * startup_errorq() is called to drain any error Q -
2586  *
2587  * startup()
2588  *   startup_end()
2589  *     error_init()
2590  *       cpu_error_init()
2591  * errorq_init()
2592  *   errorq_drain()
2593  * start_other_cpus()
2594  *
2595  * The purpose of this routine is to create error-related taskqs.  Taskqs
2596  * are used for this purpose because cpu_lock can't be grabbed from interrupt
2597  * context.
2598  */
2599 void
2600 cpu_error_init(int items)
2601 {
2602 	/*
2603 	 * Create taskq(s) to reenable CE
2604 	 */
2605 	ch_check_ce_tq = taskq_create("cheetah_check_ce", 1, minclsyspri,
2606 	    items, items, TASKQ_PREPOPULATE);
2607 }
2608 
2609 void
2610 cpu_ce_log_err(struct async_flt *aflt, errorq_elem_t *eqep)
2611 {
2612 	char unum[UNUM_NAMLEN];
2613 	int len;
2614 
2615 	switch (aflt->flt_class) {
2616 	case CPU_FAULT:
2617 		cpu_ereport_init(aflt);
2618 		if (cpu_async_log_err(aflt, eqep))
2619 			cpu_ereport_post(aflt);
2620 		break;
2621 
2622 	case BUS_FAULT:
2623 		if (aflt->flt_func != NULL) {
2624 			(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt,
2625 			    unum, UNUM_NAMLEN, &len);
2626 			aflt->flt_func(aflt, unum);
2627 		}
2628 		break;
2629 
2630 	case RECIRC_CPU_FAULT:
2631 		aflt->flt_class = CPU_FAULT;
2632 		cpu_log_err(aflt);
2633 		cpu_ereport_post(aflt);
2634 		break;
2635 
2636 	case RECIRC_BUS_FAULT:
2637 		ASSERT(aflt->flt_class != RECIRC_BUS_FAULT);
2638 		/*FALLTHRU*/
2639 	default:
2640 		cmn_err(CE_WARN, "discarding CE error 0x%p with invalid "
2641 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
2642 		return;
2643 	}
2644 }
2645 
2646 /*
2647  * Scrub and classify a CE.  This function must not modify the
2648  * fault structure passed to it but instead should return the classification
2649  * information.
2650  */
2651 
2652 static uchar_t
2653 cpu_ce_scrub_mem_err_common(struct async_flt *ecc, boolean_t logout_tried)
2654 {
2655 	uchar_t disp = CE_XDIAG_EXTALG;
2656 	on_trap_data_t otd;
2657 	uint64_t orig_err;
2658 	ch_cpu_logout_t *clop;
2659 
2660 	/*
2661 	 * Clear CEEN.  CPU CE TL > 0 trap handling will already have done
2662 	 * this, but our other callers have not.  Disable preemption to
2663 	 * avoid CPU migration so that we restore CEEN on the correct
2664 	 * cpu later.
2665 	 *
2666 	 * CEEN is cleared so that further CEs that our instruction and
2667 	 * data footprint induce do not cause use to either creep down
2668 	 * kernel stack to the point of overflow, or do so much CE
2669 	 * notification as to make little real forward progress.
2670 	 *
2671 	 * NCEEN must not be cleared.  However it is possible that
2672 	 * our accesses to the flt_addr may provoke a bus error or timeout
2673 	 * if the offending address has just been unconfigured as part of
2674 	 * a DR action.  So we must operate under on_trap protection.
2675 	 */
2676 	kpreempt_disable();
2677 	orig_err = get_error_enable();
2678 	if (orig_err & EN_REG_CEEN)
2679 	    set_error_enable(orig_err & ~EN_REG_CEEN);
2680 
2681 	/*
2682 	 * Our classification algorithm includes the line state before
2683 	 * the scrub; we'd like this captured after the detection and
2684 	 * before the algorithm below - the earlier the better.
2685 	 *
2686 	 * If we've come from a cpu CE trap then this info already exists
2687 	 * in the cpu logout area.
2688 	 *
2689 	 * For a CE detected by memscrub for which there was no trap
2690 	 * (running with CEEN off) cpu_log_and_clear_ce has called
2691 	 * cpu_ce_delayed_ec_logout to capture some cache data, and
2692 	 * marked the fault structure as incomplete as a flag to later
2693 	 * logging code.
2694 	 *
2695 	 * If called directly from an IO detected CE there has been
2696 	 * no line data capture.  In this case we logout to the cpu logout
2697 	 * area - that's appropriate since it's the cpu cache data we need
2698 	 * for classification.  We thus borrow the cpu logout area for a
2699 	 * short time, and cpu_ce_delayed_ec_logout will mark it as busy in
2700 	 * this time (we will invalidate it again below).
2701 	 *
2702 	 * If called from the partner check xcall handler then this cpu
2703 	 * (the partner) has not necessarily experienced a CE at this
2704 	 * address.  But we want to capture line state before its scrub
2705 	 * attempt since we use that in our classification.
2706 	 */
2707 	if (logout_tried == B_FALSE) {
2708 		if (!cpu_ce_delayed_ec_logout(ecc->flt_addr))
2709 			disp |= CE_XDIAG_NOLOGOUT;
2710 	}
2711 
2712 	/*
2713 	 * Scrub memory, then check AFSR for errors.  The AFAR we scrub may
2714 	 * no longer be valid (if DR'd since the initial event) so we
2715 	 * perform this scrub under on_trap protection.  If this access is
2716 	 * ok then further accesses below will also be ok - DR cannot
2717 	 * proceed while this thread is active (preemption is disabled);
2718 	 * to be safe we'll nonetheless use on_trap again below.
2719 	 */
2720 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2721 		cpu_scrubphys(ecc);
2722 	} else {
2723 		no_trap();
2724 		if (orig_err & EN_REG_CEEN)
2725 		    set_error_enable(orig_err);
2726 		kpreempt_enable();
2727 		return (disp);
2728 	}
2729 	no_trap();
2730 
2731 	/*
2732 	 * Did the casx read of the scrub log a CE that matches the AFAR?
2733 	 * Note that it's quite possible that the read sourced the data from
2734 	 * another cpu.
2735 	 */
2736 	if (clear_ecc(ecc))
2737 		disp |= CE_XDIAG_CE1;
2738 
2739 	/*
2740 	 * Read the data again.  This time the read is very likely to
2741 	 * come from memory since the scrub induced a writeback to memory.
2742 	 */
2743 	if (!on_trap(&otd, OT_DATA_ACCESS)) {
2744 		(void) lddphys(P2ALIGN(ecc->flt_addr, 8));
2745 	} else {
2746 		no_trap();
2747 		if (orig_err & EN_REG_CEEN)
2748 		    set_error_enable(orig_err);
2749 		kpreempt_enable();
2750 		return (disp);
2751 	}
2752 	no_trap();
2753 
2754 	/* Did that read induce a CE that matches the AFAR? */
2755 	if (clear_ecc(ecc))
2756 		disp |= CE_XDIAG_CE2;
2757 
2758 	/*
2759 	 * Look at the logout information and record whether we found the
2760 	 * line in l2/l3 cache.  For Panther we are interested in whether
2761 	 * we found it in either cache (it won't reside in both but
2762 	 * it is possible to read it that way given the moving target).
2763 	 */
2764 	clop = CPU_PRIVATE(CPU) ? CPU_PRIVATE_PTR(CPU, chpr_cecc_logout) : NULL;
2765 	if (!(disp & CE_XDIAG_NOLOGOUT) && clop &&
2766 	    clop->clo_data.chd_afar != LOGOUT_INVALID) {
2767 		int hit, level;
2768 		int state;
2769 		int totalsize;
2770 		ch_ec_data_t *ecp;
2771 
2772 		/*
2773 		 * If hit is nonzero then a match was found and hit will
2774 		 * be one greater than the index which hit.  For Panther we
2775 		 * also need to pay attention to level to see which of l2$ or
2776 		 * l3$ it hit in.
2777 		 */
2778 		hit = cpu_matching_ecache_line(ecc->flt_addr, &clop->clo_data,
2779 		    0, &level);
2780 
2781 		if (hit) {
2782 			--hit;
2783 			disp |= CE_XDIAG_AFARMATCH;
2784 
2785 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
2786 				if (level == 2)
2787 					ecp = &clop->clo_data.chd_l2_data[hit];
2788 				else
2789 					ecp = &clop->clo_data.chd_ec_data[hit];
2790 			} else {
2791 				ASSERT(level == 2);
2792 				ecp = &clop->clo_data.chd_ec_data[hit];
2793 			}
2794 			totalsize = cpunodes[CPU->cpu_id].ecache_size;
2795 			state = cpu_ectag_pa_to_subblk_state(totalsize,
2796 			    ecc->flt_addr, ecp->ec_tag);
2797 
2798 			/*
2799 			 * Cheetah variants use different state encodings -
2800 			 * the CH_ECSTATE_* defines vary depending on the
2801 			 * module we're compiled for.  Translate into our
2802 			 * one true version.  Conflate Owner-Shared state
2803 			 * of SSM mode with Owner as victimisation of such
2804 			 * lines may cause a writeback.
2805 			 */
2806 			switch (state) {
2807 			case CH_ECSTATE_MOD:
2808 				disp |= EC_STATE_M;
2809 				break;
2810 
2811 			case CH_ECSTATE_OWN:
2812 			case CH_ECSTATE_OWS:
2813 				disp |= EC_STATE_O;
2814 				break;
2815 
2816 			case CH_ECSTATE_EXL:
2817 				disp |= EC_STATE_E;
2818 				break;
2819 
2820 			case CH_ECSTATE_SHR:
2821 				disp |= EC_STATE_S;
2822 				break;
2823 
2824 			default:
2825 				disp |= EC_STATE_I;
2826 				break;
2827 			}
2828 		}
2829 
2830 		/*
2831 		 * If we initiated the delayed logout then we are responsible
2832 		 * for invalidating the logout area.
2833 		 */
2834 		if (logout_tried == B_FALSE) {
2835 			bzero(clop, sizeof (ch_cpu_logout_t));
2836 			clop->clo_data.chd_afar = LOGOUT_INVALID;
2837 		}
2838 	}
2839 
2840 	/*
2841 	 * Re-enable CEEN if we turned it off.
2842 	 */
2843 	if (orig_err & EN_REG_CEEN)
2844 	    set_error_enable(orig_err);
2845 	kpreempt_enable();
2846 
2847 	return (disp);
2848 }
2849 
2850 /*
2851  * Scrub a correctable memory error and collect data for classification
2852  * of CE type.  This function is called in the detection path, ie tl0 handling
2853  * of a correctable error trap (cpus) or interrupt (IO) at high PIL.
2854  */
2855 void
2856 cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t logout_tried)
2857 {
2858 	/*
2859 	 * Cheetah CE classification does not set any bits in flt_status.
2860 	 * Instead we will record classification datapoints in flt_disp.
2861 	 */
2862 	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
2863 
2864 	/*
2865 	 * To check if the error detected by IO is persistent, sticky or
2866 	 * intermittent.  This is noticed by clear_ecc().
2867 	 */
2868 	if (ecc->flt_status & ECC_IOBUS)
2869 		ecc->flt_stat = C_AFSR_MEMORY;
2870 
2871 	/*
2872 	 * Record information from this first part of the algorithm in
2873 	 * flt_disp.
2874 	 */
2875 	ecc->flt_disp = cpu_ce_scrub_mem_err_common(ecc, logout_tried);
2876 }
2877 
2878 /*
2879  * Select a partner to perform a further CE classification check from.
2880  * Must be called with kernel preemption disabled (to stop the cpu list
2881  * from changing).  The detecting cpu we are partnering has cpuid
2882  * aflt->flt_inst; we might not be running on the detecting cpu.
2883  *
2884  * Restrict choice to active cpus in the same cpu partition as ourselves in
2885  * an effort to stop bad cpus in one partition causing other partitions to
2886  * perform excessive diagnostic activity.  Actually since the errorq drain
2887  * is run from a softint most of the time and that is a global mechanism
2888  * this isolation is only partial.  Return NULL if we fail to find a
2889  * suitable partner.
2890  *
2891  * We prefer a partner that is in a different latency group to ourselves as
2892  * we will share fewer datapaths.  If such a partner is unavailable then
2893  * choose one in the same lgroup but prefer a different chip and only allow
2894  * a sibling core if flags includes PTNR_SIBLINGOK.  If all else fails and
2895  * flags includes PTNR_SELFOK then permit selection of the original detector.
2896  *
2897  * We keep a cache of the last partner selected for a cpu, and we'll try to
2898  * use that previous partner if no more than cpu_ce_ptnr_cachetime_sec seconds
2899  * have passed since that selection was made.  This provides the benefit
2900  * of the point-of-view of different partners over time but without
2901  * requiring frequent cpu list traversals.
2902  */
2903 
2904 #define	PTNR_SIBLINGOK	0x1	/* Allow selection of sibling core */
2905 #define	PTNR_SELFOK	0x2	/* Allow selection of cpu to "partner" itself */
2906 
2907 static cpu_t *
2908 ce_ptnr_select(struct async_flt *aflt, int flags, int *typep)
2909 {
2910 	cpu_t *sp, *dtcr, *ptnr, *locptnr, *sibptnr;
2911 	hrtime_t lasttime, thistime;
2912 
2913 	ASSERT(curthread->t_preempt > 0 || getpil() >= DISP_LEVEL);
2914 
2915 	dtcr = cpu[aflt->flt_inst];
2916 
2917 	/*
2918 	 * Short-circuit for the following cases:
2919 	 *	. the dtcr is not flagged active
2920 	 *	. there is just one cpu present
2921 	 *	. the detector has disappeared
2922 	 *	. we were given a bad flt_inst cpuid; this should not happen
2923 	 *	  (eg PCI code now fills flt_inst) but if it does it is no
2924 	 *	  reason to panic.
2925 	 *	. there is just one cpu left online in the cpu partition
2926 	 *
2927 	 * If we return NULL after this point then we do not update the
2928 	 * chpr_ceptnr_seltime which will cause us to perform a full lookup
2929 	 * again next time; this is the case where the only other cpu online
2930 	 * in the detector's partition is on the same chip as the detector
2931 	 * and since CEEN re-enable is throttled even that case should not
2932 	 * hurt performance.
2933 	 */
2934 	if (dtcr == NULL || !cpu_flagged_active(dtcr->cpu_flags)) {
2935 		return (NULL);
2936 	}
2937 	if (ncpus == 1 || dtcr->cpu_part->cp_ncpus == 1) {
2938 		if (flags & PTNR_SELFOK) {
2939 			*typep = CE_XDIAG_PTNR_SELF;
2940 			return (dtcr);
2941 		} else {
2942 			return (NULL);
2943 		}
2944 	}
2945 
2946 	thistime = gethrtime();
2947 	lasttime = CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime);
2948 
2949 	/*
2950 	 * Select a starting point.
2951 	 */
2952 	if (!lasttime) {
2953 		/*
2954 		 * We've never selected a partner for this detector before.
2955 		 * Start the scan at the next online cpu in the same cpu
2956 		 * partition.
2957 		 */
2958 		sp = dtcr->cpu_next_part;
2959 	} else if (thistime - lasttime < cpu_ce_ptnr_cachetime_sec * NANOSEC) {
2960 		/*
2961 		 * Our last selection has not aged yet.  If this partner:
2962 		 *	. is still a valid cpu,
2963 		 *	. is still in the same partition as the detector
2964 		 *	. is still marked active
2965 		 *	. satisfies the 'flags' argument criteria
2966 		 * then select it again without updating the timestamp.
2967 		 */
2968 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2969 		if (sp == NULL || sp->cpu_part != dtcr->cpu_part ||
2970 		    !cpu_flagged_active(sp->cpu_flags) ||
2971 		    (sp == dtcr && !(flags & PTNR_SELFOK)) ||
2972 		    (sp->cpu_chip->chip_id == dtcr->cpu_chip->chip_id &&
2973 		    !(flags & PTNR_SIBLINGOK))) {
2974 			sp = dtcr->cpu_next_part;
2975 		} else {
2976 			if (sp->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
2977 				*typep = CE_XDIAG_PTNR_REMOTE;
2978 			} else if (sp == dtcr) {
2979 				*typep = CE_XDIAG_PTNR_SELF;
2980 			} else if (sp->cpu_chip->chip_id ==
2981 			    dtcr->cpu_chip->chip_id) {
2982 				*typep = CE_XDIAG_PTNR_SIBLING;
2983 			} else {
2984 				*typep = CE_XDIAG_PTNR_LOCAL;
2985 			}
2986 			return (sp);
2987 		}
2988 	} else {
2989 		/*
2990 		 * Our last selection has aged.  If it is nonetheless still a
2991 		 * valid cpu then start the scan at the next cpu in the
2992 		 * partition after our last partner.  If the last selection
2993 		 * is no longer a valid cpu then go with our default.  In
2994 		 * this way we slowly cycle through possible partners to
2995 		 * obtain multiple viewpoints over time.
2996 		 */
2997 		sp = cpu[CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id)];
2998 		if (sp == NULL) {
2999 			sp = dtcr->cpu_next_part;
3000 		} else {
3001 			sp = sp->cpu_next_part;		/* may be dtcr */
3002 			if (sp->cpu_part != dtcr->cpu_part)
3003 				sp = dtcr;
3004 		}
3005 	}
3006 
3007 	/*
3008 	 * We have a proposed starting point for our search, but if this
3009 	 * cpu is offline then its cpu_next_part will point to itself
3010 	 * so we can't use that to iterate over cpus in this partition in
3011 	 * the loop below.  We still want to avoid iterating over cpus not
3012 	 * in our partition, so in the case that our starting point is offline
3013 	 * we will repoint it to be the detector itself;  and if the detector
3014 	 * happens to be offline we'll return NULL from the following loop.
3015 	 */
3016 	if (!cpu_flagged_active(sp->cpu_flags)) {
3017 		sp = dtcr;
3018 	}
3019 
3020 	ptnr = sp;
3021 	locptnr = NULL;
3022 	sibptnr = NULL;
3023 	do {
3024 		if (ptnr == dtcr || !cpu_flagged_active(ptnr->cpu_flags))
3025 			continue;
3026 		if (ptnr->cpu_lpl->lpl_lgrp != dtcr->cpu_lpl->lpl_lgrp) {
3027 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = ptnr->cpu_id;
3028 			CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3029 			*typep = CE_XDIAG_PTNR_REMOTE;
3030 			return (ptnr);
3031 		}
3032 		if (ptnr->cpu_chip->chip_id == dtcr->cpu_chip->chip_id) {
3033 			if (sibptnr == NULL)
3034 				sibptnr = ptnr;
3035 			continue;
3036 		}
3037 		if (locptnr == NULL)
3038 			locptnr = ptnr;
3039 	} while ((ptnr = ptnr->cpu_next_part) != sp);
3040 
3041 	/*
3042 	 * A foreign partner has already been returned if one was available.
3043 	 *
3044 	 * If locptnr is not NULL it is a cpu in the same lgroup as the
3045 	 * detector, is active, and is not a sibling of the detector.
3046 	 *
3047 	 * If sibptnr is not NULL it is a sibling of the detector, and is
3048 	 * active.
3049 	 *
3050 	 * If we have to resort to using the detector itself we have already
3051 	 * checked that it is active.
3052 	 */
3053 	if (locptnr) {
3054 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = locptnr->cpu_id;
3055 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3056 		*typep = CE_XDIAG_PTNR_LOCAL;
3057 		return (locptnr);
3058 	} else if (sibptnr && flags & PTNR_SIBLINGOK) {
3059 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = sibptnr->cpu_id;
3060 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3061 		*typep = CE_XDIAG_PTNR_SIBLING;
3062 		return (sibptnr);
3063 	} else if (flags & PTNR_SELFOK) {
3064 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_id) = dtcr->cpu_id;
3065 		CPU_PRIVATE_VAL(dtcr, chpr_ceptnr_seltime) = thistime;
3066 		*typep = CE_XDIAG_PTNR_SELF;
3067 		return (dtcr);
3068 	}
3069 
3070 	return (NULL);
3071 }
3072 
3073 /*
3074  * Cross call handler that is requested to run on the designated partner of
3075  * a cpu that experienced a possibly sticky or possibly persistnet CE.
3076  */
3077 static void
3078 ce_ptnrchk_xc(struct async_flt *aflt, uchar_t *dispp)
3079 {
3080 	*dispp = cpu_ce_scrub_mem_err_common(aflt, B_FALSE);
3081 }
3082 
3083 /*
3084  * The associated errorqs are never destroyed so we do not need to deal with
3085  * them disappearing before this timeout fires.  If the affected memory
3086  * has been DR'd out since the original event the scrub algrithm will catch
3087  * any errors and return null disposition info.  If the original detecting
3088  * cpu has been DR'd out then ereport detector info will not be able to
3089  * lookup CPU type;  with a small timeout this is unlikely.
3090  */
3091 static void
3092 ce_lkychk_cb(ce_lkychk_cb_t *cbarg)
3093 {
3094 	struct async_flt *aflt = cbarg->lkycb_aflt;
3095 	uchar_t disp;
3096 	cpu_t *cp;
3097 	int ptnrtype;
3098 
3099 	kpreempt_disable();
3100 	if (cp = ce_ptnr_select(aflt, PTNR_SIBLINGOK | PTNR_SELFOK,
3101 	    &ptnrtype)) {
3102 		xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc, (uint64_t)aflt,
3103 		    (uint64_t)&disp);
3104 		CE_XDIAG_SETLKYINFO(aflt->flt_disp, disp);
3105 		CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3106 		CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3107 	} else {
3108 		ce_xdiag_lkydrops++;
3109 		if (ncpus > 1)
3110 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3111 			    CE_XDIAG_SKIP_NOPTNR);
3112 	}
3113 	kpreempt_enable();
3114 
3115 	errorq_commit(cbarg->lkycb_eqp, cbarg->lkycb_eqep, ERRORQ_ASYNC);
3116 	kmem_free(cbarg, sizeof (ce_lkychk_cb_t));
3117 }
3118 
3119 /*
3120  * Called from errorq drain code when processing a CE error, both from
3121  * CPU and PCI drain functions.  Decide what further classification actions,
3122  * if any, we will perform.  Perform immediate actions now, and schedule
3123  * delayed actions as required.  Note that we are no longer necessarily running
3124  * on the detecting cpu, and that the async_flt structure will not persist on
3125  * return from this function.
3126  *
3127  * Calls to this function should aim to be self-throtlling in some way.  With
3128  * the delayed re-enable of CEEN the absolute rate of calls should not
3129  * be excessive.  Callers should also avoid performing in-depth classification
3130  * for events in pages that are already known to be suspect.
3131  *
3132  * We return nonzero to indicate that the event has been copied and
3133  * recirculated for further testing.  The caller should not log the event
3134  * in this case - it will be logged when further test results are available.
3135  *
3136  * Our possible contexts are that of errorq_drain: below lock level or from
3137  * panic context.  We can assume that the cpu we are running on is online.
3138  */
3139 
3140 
3141 #ifdef DEBUG
3142 static int ce_xdiag_forceaction;
3143 #endif
3144 
3145 int
3146 ce_scrub_xdiag_recirc(struct async_flt *aflt, errorq_t *eqp,
3147     errorq_elem_t *eqep, size_t afltoffset)
3148 {
3149 	ce_dispact_t dispact, action;
3150 	cpu_t *cp;
3151 	uchar_t dtcrinfo, disp;
3152 	int ptnrtype;
3153 
3154 	if (!ce_disp_inited || panicstr || ce_xdiag_off) {
3155 		ce_xdiag_drops++;
3156 		return (0);
3157 	} else if (!aflt->flt_in_memory) {
3158 		ce_xdiag_drops++;
3159 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOTMEM);
3160 		return (0);
3161 	}
3162 
3163 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
3164 
3165 	/*
3166 	 * Some correctable events are not scrubbed/classified, such as those
3167 	 * noticed at the tail of cpu_deferred_error.  So if there is no
3168 	 * initial detector classification go no further.
3169 	 */
3170 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo)) {
3171 		ce_xdiag_drops++;
3172 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_NOSCRUB);
3173 		return (0);
3174 	}
3175 
3176 	dispact = CE_DISPACT(ce_disp_table,
3177 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
3178 	    CE_XDIAG_STATE(dtcrinfo),
3179 	    CE_XDIAG_CE1SEEN(dtcrinfo),
3180 	    CE_XDIAG_CE2SEEN(dtcrinfo));
3181 
3182 
3183 	action = CE_ACT(dispact);	/* bad lookup caught below */
3184 #ifdef DEBUG
3185 	if (ce_xdiag_forceaction != 0)
3186 		action = ce_xdiag_forceaction;
3187 #endif
3188 
3189 	switch (action) {
3190 	case CE_ACT_LKYCHK: {
3191 		caddr_t ndata;
3192 		errorq_elem_t *neqep;
3193 		struct async_flt *ecc;
3194 		ce_lkychk_cb_t *cbargp;
3195 
3196 		if ((ndata = errorq_elem_dup(eqp, eqep, &neqep)) == NULL) {
3197 			ce_xdiag_lkydrops++;
3198 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3199 			    CE_XDIAG_SKIP_DUPFAIL);
3200 			break;
3201 		}
3202 		ecc = (struct async_flt *)(ndata + afltoffset);
3203 
3204 		ASSERT(ecc->flt_class == CPU_FAULT ||
3205 		    ecc->flt_class == BUS_FAULT);
3206 		ecc->flt_class = (ecc->flt_class == CPU_FAULT) ?
3207 		    RECIRC_CPU_FAULT : RECIRC_BUS_FAULT;
3208 
3209 		cbargp = kmem_alloc(sizeof (ce_lkychk_cb_t), KM_SLEEP);
3210 		cbargp->lkycb_aflt = ecc;
3211 		cbargp->lkycb_eqp = eqp;
3212 		cbargp->lkycb_eqep = neqep;
3213 
3214 		(void) timeout((void (*)(void *))ce_lkychk_cb,
3215 		    (void *)cbargp, drv_usectohz(cpu_ce_lkychk_timeout_usec));
3216 		return (1);
3217 	}
3218 
3219 	case CE_ACT_PTNRCHK:
3220 		kpreempt_disable();	/* stop cpu list changing */
3221 		if ((cp = ce_ptnr_select(aflt, 0, &ptnrtype)) != NULL) {
3222 			xc_one(cp->cpu_id, (xcfunc_t *)ce_ptnrchk_xc,
3223 			    (uint64_t)aflt, (uint64_t)&disp);
3224 			CE_XDIAG_SETPTNRINFO(aflt->flt_disp, disp);
3225 			CE_XDIAG_SETPTNRID(aflt->flt_disp, cp->cpu_id);
3226 			CE_XDIAG_SETPTNRTYPE(aflt->flt_disp, ptnrtype);
3227 		} else if (ncpus > 1) {
3228 			ce_xdiag_ptnrdrops++;
3229 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3230 			    CE_XDIAG_SKIP_NOPTNR);
3231 		} else {
3232 			ce_xdiag_ptnrdrops++;
3233 			CE_XDIAG_SETSKIPCODE(aflt->flt_disp,
3234 			    CE_XDIAG_SKIP_UNIPROC);
3235 		}
3236 		kpreempt_enable();
3237 		break;
3238 
3239 	case CE_ACT_DONE:
3240 		break;
3241 
3242 	case CE_ACT(CE_DISP_BAD):
3243 	default:
3244 #ifdef DEBUG
3245 		cmn_err(CE_PANIC, "ce_scrub_post: Bad action '%d'", action);
3246 #endif
3247 		ce_xdiag_bad++;
3248 		CE_XDIAG_SETSKIPCODE(aflt->flt_disp, CE_XDIAG_SKIP_ACTBAD);
3249 		break;
3250 	}
3251 
3252 	return (0);
3253 }
3254 
3255 /*
3256  * We route all errors through a single switch statement.
3257  */
3258 void
3259 cpu_ue_log_err(struct async_flt *aflt)
3260 {
3261 	switch (aflt->flt_class) {
3262 	case CPU_FAULT:
3263 		cpu_ereport_init(aflt);
3264 		if (cpu_async_log_err(aflt, NULL))
3265 			cpu_ereport_post(aflt);
3266 		break;
3267 
3268 	case BUS_FAULT:
3269 		bus_async_log_err(aflt);
3270 		break;
3271 
3272 	default:
3273 		cmn_err(CE_WARN, "discarding async error %p with invalid "
3274 		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
3275 		return;
3276 	}
3277 }
3278 
3279 /*
3280  * Routine for panic hook callback from panic_idle().
3281  */
3282 void
3283 cpu_async_panic_callb(void)
3284 {
3285 	ch_async_flt_t ch_flt;
3286 	struct async_flt *aflt;
3287 	ch_cpu_errors_t cpu_error_regs;
3288 	uint64_t afsr_errs;
3289 
3290 	get_cpu_error_state(&cpu_error_regs);
3291 
3292 	afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3293 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_L3_ERRS);
3294 
3295 	if (afsr_errs) {
3296 
3297 		bzero(&ch_flt, sizeof (ch_async_flt_t));
3298 		aflt = (struct async_flt *)&ch_flt;
3299 		aflt->flt_id = gethrtime_waitfree();
3300 		aflt->flt_bus_id = getprocessorid();
3301 		aflt->flt_inst = CPU->cpu_id;
3302 		aflt->flt_stat = cpu_error_regs.afsr;
3303 		aflt->flt_addr = cpu_error_regs.afar;
3304 		aflt->flt_prot = AFLT_PROT_NONE;
3305 		aflt->flt_class = CPU_FAULT;
3306 		aflt->flt_priv = ((cpu_error_regs.afsr & C_AFSR_PRIV) != 0);
3307 		aflt->flt_panic = 1;
3308 		ch_flt.afsr_ext = cpu_error_regs.afsr_ext;
3309 		ch_flt.afsr_errs = afsr_errs;
3310 #if defined(SERRANO)
3311 		ch_flt.afar2 = cpu_error_regs.afar2;
3312 #endif	/* SERRANO */
3313 		(void) cpu_queue_events(&ch_flt, NULL, afsr_errs, NULL);
3314 	}
3315 }
3316 
3317 /*
3318  * Routine to convert a syndrome into a syndrome code.
3319  */
3320 static int
3321 synd_to_synd_code(int synd_status, ushort_t synd, uint64_t afsr_bit)
3322 {
3323 	if (synd_status == AFLT_STAT_INVALID)
3324 		return (-1);
3325 
3326 	/*
3327 	 * Use the syndrome to index the appropriate syndrome table,
3328 	 * to get the code indicating which bit(s) is(are) bad.
3329 	 */
3330 	if (afsr_bit &
3331 	    (C_AFSR_MSYND_ERRS | C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
3332 		if (afsr_bit & C_AFSR_MSYND_ERRS) {
3333 #if defined(JALAPENO) || defined(SERRANO)
3334 			if ((synd == 0) || (synd >= BSYND_TBL_SIZE))
3335 				return (-1);
3336 			else
3337 				return (BPAR0 + synd);
3338 #else /* JALAPENO || SERRANO */
3339 			if ((synd == 0) || (synd >= MSYND_TBL_SIZE))
3340 				return (-1);
3341 			else
3342 				return (mtag_syndrome_tab[synd]);
3343 #endif /* JALAPENO || SERRANO */
3344 		} else {
3345 			if ((synd == 0) || (synd >= ESYND_TBL_SIZE))
3346 				return (-1);
3347 			else
3348 				return (ecc_syndrome_tab[synd]);
3349 		}
3350 	} else {
3351 		return (-1);
3352 	}
3353 }
3354 
3355 int
3356 cpu_get_mem_sid(char *unum, char *buf, int buflen, int *lenp)
3357 {
3358 	if (&plat_get_mem_sid)
3359 		return (plat_get_mem_sid(unum, buf, buflen, lenp));
3360 	else
3361 		return (ENOTSUP);
3362 }
3363 
3364 int
3365 cpu_get_mem_offset(uint64_t flt_addr, uint64_t *offp)
3366 {
3367 	if (&plat_get_mem_offset)
3368 		return (plat_get_mem_offset(flt_addr, offp));
3369 	else
3370 		return (ENOTSUP);
3371 }
3372 
3373 int
3374 cpu_get_mem_addr(char *unum, char *sid, uint64_t offset, uint64_t *addrp)
3375 {
3376 	if (&plat_get_mem_addr)
3377 		return (plat_get_mem_addr(unum, sid, offset, addrp));
3378 	else
3379 		return (ENOTSUP);
3380 }
3381 
3382 /*
3383  * Routine to return a string identifying the physical name
3384  * associated with a memory/cache error.
3385  */
3386 int
3387 cpu_get_mem_unum(int synd_status, ushort_t flt_synd, uint64_t flt_stat,
3388     uint64_t flt_addr, int flt_bus_id, int flt_in_memory,
3389     ushort_t flt_status, char *buf, int buflen, int *lenp)
3390 {
3391 	int synd_code;
3392 	int ret;
3393 
3394 	/*
3395 	 * An AFSR of -1 defaults to a memory syndrome.
3396 	 */
3397 	if (flt_stat == (uint64_t)-1)
3398 		flt_stat = C_AFSR_CE;
3399 
3400 	synd_code = synd_to_synd_code(synd_status, flt_synd, flt_stat);
3401 
3402 	/*
3403 	 * Syndrome code must be either a single-bit error code
3404 	 * (0...143) or -1 for unum lookup.
3405 	 */
3406 	if (synd_code < 0 || synd_code >= M2)
3407 		synd_code = -1;
3408 	if (&plat_get_mem_unum) {
3409 		if ((ret = plat_get_mem_unum(synd_code, flt_addr, flt_bus_id,
3410 		    flt_in_memory, flt_status, buf, buflen, lenp)) != 0) {
3411 			buf[0] = '\0';
3412 			*lenp = 0;
3413 		}
3414 
3415 		return (ret);
3416 	}
3417 
3418 	return (ENOTSUP);
3419 }
3420 
3421 /*
3422  * Wrapper for cpu_get_mem_unum() routine that takes an
3423  * async_flt struct rather than explicit arguments.
3424  */
3425 int
3426 cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
3427     char *buf, int buflen, int *lenp)
3428 {
3429 	/*
3430 	 * If we come thru here for an IO bus error aflt->flt_stat will
3431 	 * not be the CPU AFSR, and we pass in a -1 to cpu_get_mem_unum()
3432 	 * so it will interpret this as a memory error.
3433 	 */
3434 	return (cpu_get_mem_unum(synd_status, aflt->flt_synd,
3435 	    (aflt->flt_class == BUS_FAULT) ?
3436 	    (uint64_t)-1 : ((ch_async_flt_t *)(aflt))->afsr_errs,
3437 	    aflt->flt_addr, aflt->flt_bus_id, aflt->flt_in_memory,
3438 	    aflt->flt_status, buf, buflen, lenp));
3439 }
3440 
3441 /*
3442  * This routine is a more generic interface to cpu_get_mem_unum()
3443  * that may be used by other modules (e.g. mm).
3444  */
3445 int
3446 cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
3447     char *buf, int buflen, int *lenp)
3448 {
3449 	int synd_status, flt_in_memory, ret;
3450 	ushort_t flt_status = 0;
3451 	char unum[UNUM_NAMLEN];
3452 
3453 	/*
3454 	 * Check for an invalid address.
3455 	 */
3456 	if (afar == (uint64_t)-1)
3457 		return (ENXIO);
3458 
3459 	if (synd == (uint64_t)-1)
3460 		synd_status = AFLT_STAT_INVALID;
3461 	else
3462 		synd_status = AFLT_STAT_VALID;
3463 
3464 	flt_in_memory = (*afsr & C_AFSR_MEMORY) &&
3465 	    pf_is_memory(afar >> MMU_PAGESHIFT);
3466 
3467 	/*
3468 	 * Need to turn on ECC_ECACHE for plat_get_mem_unum().
3469 	 * For Panther, L2$ is not external, so we don't want to
3470 	 * generate an E$ unum for those errors.
3471 	 */
3472 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3473 		if (*(afsr + 1) & C_AFSR_EXT_L3_ERRS)
3474 			flt_status |= ECC_ECACHE;
3475 	} else {
3476 		if (*afsr & C_AFSR_ECACHE)
3477 			flt_status |= ECC_ECACHE;
3478 	}
3479 
3480 	ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
3481 	    CPU->cpu_id, flt_in_memory, flt_status, unum, UNUM_NAMLEN, lenp);
3482 	if (ret != 0)
3483 		return (ret);
3484 
3485 	if (*lenp >= buflen)
3486 		return (ENAMETOOLONG);
3487 
3488 	(void) strncpy(buf, unum, buflen);
3489 
3490 	return (0);
3491 }
3492 
3493 /*
3494  * Routine to return memory information associated
3495  * with a physical address and syndrome.
3496  */
3497 int
3498 cpu_get_mem_info(uint64_t synd, uint64_t afar,
3499     uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
3500     int *segsp, int *banksp, int *mcidp)
3501 {
3502 	int synd_status, synd_code;
3503 
3504 	if (afar == (uint64_t)-1)
3505 		return (ENXIO);
3506 
3507 	if (synd == (uint64_t)-1)
3508 		synd_status = AFLT_STAT_INVALID;
3509 	else
3510 		synd_status = AFLT_STAT_VALID;
3511 
3512 	synd_code = synd_to_synd_code(synd_status, synd, C_AFSR_CE);
3513 
3514 	if (p2get_mem_info != NULL)
3515 		return ((p2get_mem_info)(synd_code, afar,
3516 			mem_sizep, seg_sizep, bank_sizep,
3517 			segsp, banksp, mcidp));
3518 	else
3519 		return (ENOTSUP);
3520 }
3521 
3522 /*
3523  * Routine to return a string identifying the physical
3524  * name associated with a cpuid.
3525  */
3526 int
3527 cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
3528 {
3529 	int ret;
3530 	char unum[UNUM_NAMLEN];
3531 
3532 	if (&plat_get_cpu_unum) {
3533 		if ((ret = plat_get_cpu_unum(cpuid, unum, UNUM_NAMLEN, lenp))
3534 		    != 0)
3535 			return (ret);
3536 	} else {
3537 		return (ENOTSUP);
3538 	}
3539 
3540 	if (*lenp >= buflen)
3541 		return (ENAMETOOLONG);
3542 
3543 	(void) strncpy(buf, unum, buflen);
3544 
3545 	return (0);
3546 }
3547 
3548 /*
3549  * This routine exports the name buffer size.
3550  */
3551 size_t
3552 cpu_get_name_bufsize()
3553 {
3554 	return (UNUM_NAMLEN);
3555 }
3556 
3557 /*
3558  * Historical function, apparantly not used.
3559  */
3560 /* ARGSUSED */
3561 void
3562 cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
3563 {}
3564 
3565 /*
3566  * Historical function only called for SBus errors in debugging.
3567  */
3568 /*ARGSUSED*/
3569 void
3570 read_ecc_data(struct async_flt *aflt, short verbose, short ce_err)
3571 {}
3572 
3573 /*
3574  * Clear the AFSR sticky bits.  The routine returns a non-zero value if
3575  * any of the AFSR's sticky errors are detected.  If a non-null pointer to
3576  * an async fault structure argument is passed in, the captured error state
3577  * (AFSR, AFAR) info will be returned in the structure.
3578  */
3579 int
3580 clear_errors(ch_async_flt_t *ch_flt)
3581 {
3582 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3583 	ch_cpu_errors_t	cpu_error_regs;
3584 
3585 	get_cpu_error_state(&cpu_error_regs);
3586 
3587 	if (ch_flt != NULL) {
3588 		aflt->flt_stat = cpu_error_regs.afsr & C_AFSR_MASK;
3589 		aflt->flt_addr = cpu_error_regs.afar;
3590 		ch_flt->afsr_ext = cpu_error_regs.afsr_ext;
3591 		ch_flt->afsr_errs = (cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3592 		    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS);
3593 #if defined(SERRANO)
3594 		ch_flt->afar2 = cpu_error_regs.afar2;
3595 #endif	/* SERRANO */
3596 	}
3597 
3598 	set_cpu_error_state(&cpu_error_regs);
3599 
3600 	return (((cpu_error_regs.afsr & C_AFSR_ALL_ERRS) |
3601 	    (cpu_error_regs.afsr_ext & C_AFSR_EXT_ALL_ERRS)) != 0);
3602 }
3603 
3604 /*
3605  * Clear any AFSR error bits, and check for persistence.
3606  *
3607  * It would be desirable to also insist that syndrome match.  PCI handling
3608  * has already filled flt_synd.  For errors trapped by CPU we only fill
3609  * flt_synd when we queue the event, so we do not have a valid flt_synd
3610  * during initial classification (it is valid if we're called as part of
3611  * subsequent low-pil additional classification attempts).  We could try
3612  * to determine which syndrome to use: we know we're only called for
3613  * CE/RCE (Jalapeno & Serrano) and CE/EMC (others) so the syndrome to use
3614  * would be esynd/none and esynd/msynd, respectively.  If that is
3615  * implemented then what do we do in the case that we do experience an
3616  * error on the same afar but with different syndrome?  At the very least
3617  * we should count such occurences.  Anyway, for now, we'll leave it as
3618  * it has been for ages.
3619  */
3620 static int
3621 clear_ecc(struct async_flt *aflt)
3622 {
3623 	ch_cpu_errors_t	cpu_error_regs;
3624 
3625 	/*
3626 	 * Snapshot the AFSR and AFAR and clear any errors
3627 	 */
3628 	get_cpu_error_state(&cpu_error_regs);
3629 	set_cpu_error_state(&cpu_error_regs);
3630 
3631 	/*
3632 	 * If any of the same memory access error bits are still on and
3633 	 * the AFAR matches, return that the error is persistent.
3634 	 */
3635 	return ((cpu_error_regs.afsr & (C_AFSR_MEMORY & aflt->flt_stat)) != 0 &&
3636 	    cpu_error_regs.afar == aflt->flt_addr);
3637 }
3638 
3639 /*
3640  * Turn off all cpu error detection, normally only used for panics.
3641  */
3642 void
3643 cpu_disable_errors(void)
3644 {
3645 	xt_all(set_error_enable_tl1, EN_REG_DISABLE, EER_SET_ABSOLUTE);
3646 
3647 	/*
3648 	 * With error detection now turned off, check the other cpus
3649 	 * logout areas for any unlogged errors.
3650 	 */
3651 	if (enable_check_other_cpus_logout) {
3652 		cpu_check_other_cpus_logout();
3653 		/*
3654 		 * Make a second pass over the logout areas, in case
3655 		 * there is a failing CPU in an error-trap loop which
3656 		 * will write to the logout area once it is emptied.
3657 		 */
3658 		cpu_check_other_cpus_logout();
3659 	}
3660 }
3661 
3662 /*
3663  * Enable errors.
3664  */
3665 void
3666 cpu_enable_errors(void)
3667 {
3668 	xt_all(set_error_enable_tl1, EN_REG_ENABLE, EER_SET_ABSOLUTE);
3669 }
3670 
3671 /*
3672  * Flush the entire ecache using displacement flush by reading through a
3673  * physical address range twice as large as the Ecache.
3674  */
3675 void
3676 cpu_flush_ecache(void)
3677 {
3678 	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size,
3679 	    cpunodes[CPU->cpu_id].ecache_linesize);
3680 }
3681 
3682 /*
3683  * Return CPU E$ set size - E$ size divided by the associativity.
3684  * We use this function in places where the CPU_PRIVATE ptr may not be
3685  * initialized yet.  Note that for send_mondo and in the Ecache scrubber,
3686  * we're guaranteed that CPU_PRIVATE is initialized.  Also, cpunodes is set
3687  * up before the kernel switches from OBP's to the kernel's trap table, so
3688  * we don't have to worry about cpunodes being unitialized.
3689  */
3690 int
3691 cpu_ecache_set_size(struct cpu *cp)
3692 {
3693 	if (CPU_PRIVATE(cp))
3694 		return (CPU_PRIVATE_VAL(cp, chpr_ec_set_size));
3695 
3696 	return (cpunodes[cp->cpu_id].ecache_size / cpu_ecache_nway());
3697 }
3698 
3699 /*
3700  * Flush Ecache line.
3701  * Uses ASI_EC_DIAG for Cheetah+ and Jalapeno.
3702  * Uses normal displacement flush for Cheetah.
3703  */
3704 static void
3705 cpu_flush_ecache_line(ch_async_flt_t *ch_flt)
3706 {
3707 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3708 	int ec_set_size = cpu_ecache_set_size(CPU);
3709 
3710 	ecache_flush_line(aflt->flt_addr, ec_set_size);
3711 }
3712 
3713 /*
3714  * Scrub physical address.
3715  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3716  * Ecache or direct-mapped Ecache.
3717  */
3718 static void
3719 cpu_scrubphys(struct async_flt *aflt)
3720 {
3721 	int ec_set_size = cpu_ecache_set_size(CPU);
3722 
3723 	scrubphys(aflt->flt_addr, ec_set_size);
3724 }
3725 
3726 /*
3727  * Clear physical address.
3728  * Scrub code is different depending upon whether this a Cheetah+ with 2-way
3729  * Ecache or direct-mapped Ecache.
3730  */
3731 void
3732 cpu_clearphys(struct async_flt *aflt)
3733 {
3734 	int lsize = cpunodes[CPU->cpu_id].ecache_linesize;
3735 	int ec_set_size = cpu_ecache_set_size(CPU);
3736 
3737 
3738 	clearphys(P2ALIGN(aflt->flt_addr, lsize), ec_set_size, lsize);
3739 }
3740 
3741 #if defined(CPU_IMP_ECACHE_ASSOC)
3742 /*
3743  * Check for a matching valid line in all the sets.
3744  * If found, return set# + 1. Otherwise return 0.
3745  */
3746 static int
3747 cpu_ecache_line_valid(ch_async_flt_t *ch_flt)
3748 {
3749 	struct async_flt *aflt = (struct async_flt *)ch_flt;
3750 	int totalsize = cpunodes[CPU->cpu_id].ecache_size;
3751 	int ec_set_size = cpu_ecache_set_size(CPU);
3752 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
3753 	int nway = cpu_ecache_nway();
3754 	int i;
3755 
3756 	for (i = 0; i < nway; i++, ecp++) {
3757 		if (!cpu_ectag_line_invalid(totalsize, ecp->ec_tag) &&
3758 		    (aflt->flt_addr & P2ALIGN(C_AFAR_PA, ec_set_size)) ==
3759 		    cpu_ectag_to_pa(ec_set_size, ecp->ec_tag))
3760 			return (i+1);
3761 	}
3762 	return (0);
3763 }
3764 #endif /* CPU_IMP_ECACHE_ASSOC */
3765 
3766 /*
3767  * Check whether a line in the given logout info matches the specified
3768  * fault address.  If reqval is set then the line must not be Invalid.
3769  * Returns 0 on failure;  on success (way + 1) is returned an *level is
3770  * set to 2 for l2$ or 3 for l3$.
3771  */
3772 static int
3773 cpu_matching_ecache_line(uint64_t faddr, void *data, int reqval, int *level)
3774 {
3775 	ch_diag_data_t *cdp = data;
3776 	ch_ec_data_t *ecp;
3777 	int totalsize, ec_set_size;
3778 	int i, ways;
3779 	int match = 0;
3780 	int tagvalid;
3781 	uint64_t addr, tagpa;
3782 	int ispanther = IS_PANTHER(cpunodes[CPU->cpu_id].implementation);
3783 
3784 	/*
3785 	 * Check the l2$ logout data
3786 	 */
3787 	if (ispanther) {
3788 		ecp = &cdp->chd_l2_data[0];
3789 		ec_set_size = PN_L2_SET_SIZE;
3790 		ways = PN_L2_NWAYS;
3791 	} else {
3792 		ecp = &cdp->chd_ec_data[0];
3793 		ec_set_size = cpu_ecache_set_size(CPU);
3794 		ways = cpu_ecache_nway();
3795 		totalsize = cpunodes[CPU->cpu_id].ecache_size;
3796 	}
3797 	/* remove low order PA bits from fault address not used in PA tag */
3798 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3799 	for (i = 0; i < ways; i++, ecp++) {
3800 		if (ispanther) {
3801 			tagpa = PN_L2TAG_TO_PA(ecp->ec_tag);
3802 			tagvalid = !PN_L2_LINE_INVALID(ecp->ec_tag);
3803 		} else {
3804 			tagpa = cpu_ectag_to_pa(ec_set_size, ecp->ec_tag);
3805 			tagvalid = !cpu_ectag_line_invalid(totalsize,
3806 			    ecp->ec_tag);
3807 		}
3808 		if (tagpa == addr && (!reqval || tagvalid)) {
3809 			match = i + 1;
3810 			*level = 2;
3811 			break;
3812 		}
3813 	}
3814 
3815 	if (match || !ispanther)
3816 		return (match);
3817 
3818 	/* For Panther we also check the l3$ */
3819 	ecp = &cdp->chd_ec_data[0];
3820 	ec_set_size = PN_L3_SET_SIZE;
3821 	ways = PN_L3_NWAYS;
3822 	addr = faddr & P2ALIGN(C_AFAR_PA, ec_set_size);
3823 
3824 	for (i = 0; i < ways; i++, ecp++) {
3825 		if (PN_L3TAG_TO_PA(ecp->ec_tag) == addr && (!reqval ||
3826 		    !PN_L3_LINE_INVALID(ecp->ec_tag))) {
3827 			match = i + 1;
3828 			*level = 3;
3829 			break;
3830 		}
3831 	}
3832 
3833 	return (match);
3834 }
3835 
3836 #if defined(CPU_IMP_L1_CACHE_PARITY)
3837 /*
3838  * Record information related to the source of an Dcache Parity Error.
3839  */
3840 static void
3841 cpu_dcache_parity_info(ch_async_flt_t *ch_flt)
3842 {
3843 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3844 	int index;
3845 
3846 	/*
3847 	 * Since instruction decode cannot be done at high PIL
3848 	 * just examine the entire Dcache to locate the error.
3849 	 */
3850 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3851 		ch_flt->parity_data.dpe.cpl_way = -1;
3852 		ch_flt->parity_data.dpe.cpl_off = -1;
3853 	}
3854 	for (index = 0; index < dc_set_size; index += dcache_linesize)
3855 		cpu_dcache_parity_check(ch_flt, index);
3856 }
3857 
3858 /*
3859  * Check all ways of the Dcache at a specified index for good parity.
3860  */
3861 static void
3862 cpu_dcache_parity_check(ch_async_flt_t *ch_flt, int index)
3863 {
3864 	int dc_set_size = dcache_size / CH_DCACHE_NWAY;
3865 	uint64_t parity_bits, pbits, data_word;
3866 	static int parity_bits_popc[] = { 0, 1, 1, 0 };
3867 	int way, word, data_byte;
3868 	ch_dc_data_t *dcp = &ch_flt->parity_data.dpe.cpl_dc[0];
3869 	ch_dc_data_t tmp_dcp;
3870 
3871 	for (way = 0; way < CH_DCACHE_NWAY; way++, dcp++) {
3872 		/*
3873 		 * Perform diagnostic read.
3874 		 */
3875 		get_dcache_dtag(index + way * dc_set_size,
3876 				(uint64_t *)&tmp_dcp);
3877 
3878 		/*
3879 		 * Check tag for even parity.
3880 		 * Sum of 1 bits (including parity bit) should be even.
3881 		 */
3882 		if (popc64(tmp_dcp.dc_tag & CHP_DCTAG_PARMASK) & 1) {
3883 			/*
3884 			 * If this is the first error log detailed information
3885 			 * about it and check the snoop tag. Otherwise just
3886 			 * record the fact that we found another error.
3887 			 */
3888 			if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3889 				ch_flt->parity_data.dpe.cpl_way = way;
3890 				ch_flt->parity_data.dpe.cpl_cache =
3891 				    CPU_DC_PARITY;
3892 				ch_flt->parity_data.dpe.cpl_tag |= CHP_DC_TAG;
3893 
3894 				if (popc64(tmp_dcp.dc_sntag &
3895 						CHP_DCSNTAG_PARMASK) & 1) {
3896 					ch_flt->parity_data.dpe.cpl_tag |=
3897 								CHP_DC_SNTAG;
3898 					ch_flt->parity_data.dpe.cpl_lcnt++;
3899 				}
3900 
3901 				bcopy(&tmp_dcp, dcp, sizeof (ch_dc_data_t));
3902 			}
3903 
3904 			ch_flt->parity_data.dpe.cpl_lcnt++;
3905 		}
3906 
3907 		if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
3908 			/*
3909 			 * Panther has more parity bits than the other
3910 			 * processors for covering dcache data and so each
3911 			 * byte of data in each word has its own parity bit.
3912 			 */
3913 			parity_bits = tmp_dcp.dc_pn_data_parity;
3914 			for (word = 0; word < 4; word++) {
3915 				data_word = tmp_dcp.dc_data[word];
3916 				pbits = parity_bits & PN_DC_DATA_PARITY_MASK;
3917 				for (data_byte = 0; data_byte < 8;
3918 				    data_byte++) {
3919 					if (((popc64(data_word &
3920 					    PN_DC_DATA_PARITY_MASK)) & 1) ^
3921 					    (pbits & 1)) {
3922 						cpu_record_dc_data_parity(
3923 						ch_flt, dcp, &tmp_dcp, way,
3924 						word);
3925 					}
3926 					pbits >>= 1;
3927 					data_word >>= 8;
3928 				}
3929 				parity_bits >>= 8;
3930 			}
3931 		} else {
3932 			/*
3933 			 * Check data array for even parity.
3934 			 * The 8 parity bits are grouped into 4 pairs each
3935 			 * of which covers a 64-bit word.  The endianness is
3936 			 * reversed -- the low-order parity bits cover the
3937 			 * high-order data words.
3938 			 */
3939 			parity_bits = tmp_dcp.dc_utag >> 8;
3940 			for (word = 0; word < 4; word++) {
3941 				pbits = (parity_bits >> (6 - word * 2)) & 3;
3942 				if ((popc64(tmp_dcp.dc_data[word]) +
3943 				    parity_bits_popc[pbits]) & 1) {
3944 					cpu_record_dc_data_parity(ch_flt, dcp,
3945 					    &tmp_dcp, way, word);
3946 				}
3947 			}
3948 		}
3949 	}
3950 }
3951 
3952 static void
3953 cpu_record_dc_data_parity(ch_async_flt_t *ch_flt,
3954     ch_dc_data_t *dest_dcp, ch_dc_data_t *src_dcp, int way, int word)
3955 {
3956 	/*
3957 	 * If this is the first error log detailed information about it.
3958 	 * Otherwise just record the fact that we found another error.
3959 	 */
3960 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
3961 		ch_flt->parity_data.dpe.cpl_way = way;
3962 		ch_flt->parity_data.dpe.cpl_cache = CPU_DC_PARITY;
3963 		ch_flt->parity_data.dpe.cpl_off = word * 8;
3964 		bcopy(src_dcp, dest_dcp, sizeof (ch_dc_data_t));
3965 	}
3966 	ch_flt->parity_data.dpe.cpl_lcnt++;
3967 }
3968 
3969 /*
3970  * Record information related to the source of an Icache Parity Error.
3971  *
3972  * Called with the Icache disabled so any diagnostic accesses are safe.
3973  */
3974 static void
3975 cpu_icache_parity_info(ch_async_flt_t *ch_flt)
3976 {
3977 	int	ic_set_size;
3978 	int	ic_linesize;
3979 	int	index;
3980 
3981 	if (CPU_PRIVATE(CPU)) {
3982 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
3983 		    CH_ICACHE_NWAY;
3984 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
3985 	} else {
3986 		ic_set_size = icache_size / CH_ICACHE_NWAY;
3987 		ic_linesize = icache_linesize;
3988 	}
3989 
3990 	ch_flt->parity_data.ipe.cpl_way = -1;
3991 	ch_flt->parity_data.ipe.cpl_off = -1;
3992 
3993 	for (index = 0; index < ic_set_size; index += ic_linesize)
3994 		cpu_icache_parity_check(ch_flt, index);
3995 }
3996 
3997 /*
3998  * Check all ways of the Icache at a specified index for good parity.
3999  */
4000 static void
4001 cpu_icache_parity_check(ch_async_flt_t *ch_flt, int index)
4002 {
4003 	uint64_t parmask, pn_inst_parity;
4004 	int ic_set_size;
4005 	int ic_linesize;
4006 	int flt_index, way, instr, num_instr;
4007 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4008 	ch_ic_data_t *icp = &ch_flt->parity_data.ipe.cpl_ic[0];
4009 	ch_ic_data_t tmp_icp;
4010 
4011 	if (CPU_PRIVATE(CPU)) {
4012 		ic_set_size = CPU_PRIVATE_VAL(CPU, chpr_icache_size) /
4013 		    CH_ICACHE_NWAY;
4014 		ic_linesize = CPU_PRIVATE_VAL(CPU, chpr_icache_linesize);
4015 	} else {
4016 		ic_set_size = icache_size / CH_ICACHE_NWAY;
4017 		ic_linesize = icache_linesize;
4018 	}
4019 
4020 	/*
4021 	 * Panther has twice as many instructions per icache line and the
4022 	 * instruction parity bit is in a different location.
4023 	 */
4024 	if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
4025 		num_instr = PN_IC_DATA_REG_SIZE / sizeof (uint64_t);
4026 		pn_inst_parity = PN_ICDATA_PARITY_BIT_MASK;
4027 	} else {
4028 		num_instr = CH_IC_DATA_REG_SIZE / sizeof (uint64_t);
4029 		pn_inst_parity = 0;
4030 	}
4031 
4032 	/*
4033 	 * Index at which we expect to find the parity error.
4034 	 */
4035 	flt_index = P2ALIGN(aflt->flt_addr % ic_set_size, ic_linesize);
4036 
4037 	for (way = 0; way < CH_ICACHE_NWAY; way++, icp++) {
4038 		/*
4039 		 * Diagnostic reads expect address argument in ASI format.
4040 		 */
4041 		get_icache_dtag(2 * (index + way * ic_set_size),
4042 				(uint64_t *)&tmp_icp);
4043 
4044 		/*
4045 		 * If this is the index in which we expect to find the
4046 		 * error log detailed information about each of the ways.
4047 		 * This information will be displayed later if we can't
4048 		 * determine the exact way in which the error is located.
4049 		 */
4050 		if (flt_index == index)
4051 			bcopy(&tmp_icp, icp, sizeof (ch_ic_data_t));
4052 
4053 		/*
4054 		 * Check tag for even parity.
4055 		 * Sum of 1 bits (including parity bit) should be even.
4056 		 */
4057 		if (popc64(tmp_icp.ic_patag & CHP_ICPATAG_PARMASK) & 1) {
4058 			/*
4059 			 * If this way is the one in which we expected
4060 			 * to find the error record the way and check the
4061 			 * snoop tag. Otherwise just record the fact we
4062 			 * found another error.
4063 			 */
4064 			if (flt_index == index) {
4065 				ch_flt->parity_data.ipe.cpl_way = way;
4066 				ch_flt->parity_data.ipe.cpl_tag |= CHP_IC_TAG;
4067 
4068 				if (popc64(tmp_icp.ic_sntag &
4069 						CHP_ICSNTAG_PARMASK) & 1) {
4070 					ch_flt->parity_data.ipe.cpl_tag |=
4071 								CHP_IC_SNTAG;
4072 					ch_flt->parity_data.ipe.cpl_lcnt++;
4073 				}
4074 
4075 			}
4076 			ch_flt->parity_data.ipe.cpl_lcnt++;
4077 			continue;
4078 		}
4079 
4080 		/*
4081 		 * Check instruction data for even parity.
4082 		 * Bits participating in parity differ for PC-relative
4083 		 * versus non-PC-relative instructions.
4084 		 */
4085 		for (instr = 0; instr < num_instr; instr++) {
4086 			parmask = (tmp_icp.ic_data[instr] &
4087 					CH_ICDATA_PRED_ISPCREL) ?
4088 				(CHP_ICDATA_PCREL_PARMASK | pn_inst_parity) :
4089 				(CHP_ICDATA_NPCREL_PARMASK | pn_inst_parity);
4090 			if (popc64(tmp_icp.ic_data[instr] & parmask) & 1) {
4091 				/*
4092 				 * If this way is the one in which we expected
4093 				 * to find the error record the way and offset.
4094 				 * Otherwise just log the fact we found another
4095 				 * error.
4096 				 */
4097 				if (flt_index == index) {
4098 					ch_flt->parity_data.ipe.cpl_way = way;
4099 					ch_flt->parity_data.ipe.cpl_off =
4100 								instr * 4;
4101 				}
4102 				ch_flt->parity_data.ipe.cpl_lcnt++;
4103 				continue;
4104 			}
4105 		}
4106 	}
4107 }
4108 
4109 /*
4110  * Record information related to the source of an Pcache Parity Error.
4111  */
4112 static void
4113 cpu_pcache_parity_info(ch_async_flt_t *ch_flt)
4114 {
4115 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4116 	int index;
4117 
4118 	/*
4119 	 * Since instruction decode cannot be done at high PIL just
4120 	 * examine the entire Pcache to check for any parity errors.
4121 	 */
4122 	if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4123 		ch_flt->parity_data.dpe.cpl_way = -1;
4124 		ch_flt->parity_data.dpe.cpl_off = -1;
4125 	}
4126 	for (index = 0; index < pc_set_size; index += CH_PCACHE_LSIZE)
4127 		cpu_pcache_parity_check(ch_flt, index);
4128 }
4129 
4130 /*
4131  * Check all ways of the Pcache at a specified index for good parity.
4132  */
4133 static void
4134 cpu_pcache_parity_check(ch_async_flt_t *ch_flt, int index)
4135 {
4136 	int pc_set_size = CH_PCACHE_SIZE / CH_PCACHE_NWAY;
4137 	int pc_data_words = CH_PC_DATA_REG_SIZE / sizeof (uint64_t);
4138 	int way, word, pbit, parity_bits;
4139 	ch_pc_data_t *pcp = &ch_flt->parity_data.dpe.cpl_pc[0];
4140 	ch_pc_data_t tmp_pcp;
4141 
4142 	for (way = 0; way < CH_PCACHE_NWAY; way++, pcp++) {
4143 		/*
4144 		 * Perform diagnostic read.
4145 		 */
4146 		get_pcache_dtag(index + way * pc_set_size,
4147 				(uint64_t *)&tmp_pcp);
4148 		/*
4149 		 * Check data array for odd parity. There are 8 parity
4150 		 * bits (bits 57:50 of ASI_PCACHE_STATUS_DATA) and each
4151 		 * of those bits covers exactly 8 bytes of the data
4152 		 * array:
4153 		 *
4154 		 *	parity bit	P$ data bytes covered
4155 		 *	----------	---------------------
4156 		 *	50		63:56
4157 		 *	51		55:48
4158 		 *	52		47:40
4159 		 *	53		39:32
4160 		 *	54		31:24
4161 		 *	55		23:16
4162 		 *	56		15:8
4163 		 *	57		7:0
4164 		 */
4165 		parity_bits = PN_PC_PARITY_BITS(tmp_pcp.pc_status);
4166 		for (word = 0; word < pc_data_words; word++) {
4167 			pbit = (parity_bits >> (pc_data_words - word - 1)) & 1;
4168 			if ((popc64(tmp_pcp.pc_data[word]) & 1) ^ pbit) {
4169 				/*
4170 				 * If this is the first error log detailed
4171 				 * information about it. Otherwise just record
4172 				 * the fact that we found another error.
4173 				 */
4174 				if (ch_flt->parity_data.dpe.cpl_lcnt == 0) {
4175 					ch_flt->parity_data.dpe.cpl_way = way;
4176 					ch_flt->parity_data.dpe.cpl_cache =
4177 					    CPU_PC_PARITY;
4178 					ch_flt->parity_data.dpe.cpl_off =
4179 					    word * sizeof (uint64_t);
4180 					bcopy(&tmp_pcp, pcp,
4181 							sizeof (ch_pc_data_t));
4182 				}
4183 				ch_flt->parity_data.dpe.cpl_lcnt++;
4184 			}
4185 		}
4186 	}
4187 }
4188 
4189 
4190 /*
4191  * Add L1 Data cache data to the ereport payload.
4192  */
4193 static void
4194 cpu_payload_add_dcache(struct async_flt *aflt, nvlist_t *nvl)
4195 {
4196 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4197 	ch_dc_data_t *dcp;
4198 	ch_dc_data_t dcdata[CH_DCACHE_NWAY];
4199 	uint_t nelem;
4200 	int i, ways_to_check, ways_logged = 0;
4201 
4202 	/*
4203 	 * If this is an D$ fault then there may be multiple
4204 	 * ways captured in the ch_parity_log_t structure.
4205 	 * Otherwise, there will be at most one way captured
4206 	 * in the ch_diag_data_t struct.
4207 	 * Check each way to see if it should be encoded.
4208 	 */
4209 	if (ch_flt->flt_type == CPU_DC_PARITY)
4210 		ways_to_check = CH_DCACHE_NWAY;
4211 	else
4212 		ways_to_check = 1;
4213 	for (i = 0; i < ways_to_check; i++) {
4214 		if (ch_flt->flt_type == CPU_DC_PARITY)
4215 			dcp = &ch_flt->parity_data.dpe.cpl_dc[i];
4216 		else
4217 			dcp = &ch_flt->flt_diag_data.chd_dc_data;
4218 		if (dcp->dc_logflag == DC_LOGFLAG_MAGIC) {
4219 			bcopy(dcp, &dcdata[ways_logged],
4220 				sizeof (ch_dc_data_t));
4221 			ways_logged++;
4222 		}
4223 	}
4224 
4225 	/*
4226 	 * Add the dcache data to the payload.
4227 	 */
4228 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_WAYS,
4229 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4230 	if (ways_logged != 0) {
4231 		nelem = sizeof (ch_dc_data_t) / sizeof (uint64_t) * ways_logged;
4232 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1D_DATA,
4233 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)dcdata, NULL);
4234 	}
4235 }
4236 
4237 /*
4238  * Add L1 Instruction cache data to the ereport payload.
4239  */
4240 static void
4241 cpu_payload_add_icache(struct async_flt *aflt, nvlist_t *nvl)
4242 {
4243 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4244 	ch_ic_data_t *icp;
4245 	ch_ic_data_t icdata[CH_ICACHE_NWAY];
4246 	uint_t nelem;
4247 	int i, ways_to_check, ways_logged = 0;
4248 
4249 	/*
4250 	 * If this is an I$ fault then there may be multiple
4251 	 * ways captured in the ch_parity_log_t structure.
4252 	 * Otherwise, there will be at most one way captured
4253 	 * in the ch_diag_data_t struct.
4254 	 * Check each way to see if it should be encoded.
4255 	 */
4256 	if (ch_flt->flt_type == CPU_IC_PARITY)
4257 		ways_to_check = CH_ICACHE_NWAY;
4258 	else
4259 		ways_to_check = 1;
4260 	for (i = 0; i < ways_to_check; i++) {
4261 		if (ch_flt->flt_type == CPU_IC_PARITY)
4262 			icp = &ch_flt->parity_data.ipe.cpl_ic[i];
4263 		else
4264 			icp = &ch_flt->flt_diag_data.chd_ic_data;
4265 		if (icp->ic_logflag == IC_LOGFLAG_MAGIC) {
4266 			bcopy(icp, &icdata[ways_logged],
4267 				sizeof (ch_ic_data_t));
4268 			ways_logged++;
4269 		}
4270 	}
4271 
4272 	/*
4273 	 * Add the icache data to the payload.
4274 	 */
4275 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_WAYS,
4276 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4277 	if (ways_logged != 0) {
4278 		nelem = sizeof (ch_ic_data_t) / sizeof (uint64_t) * ways_logged;
4279 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L1I_DATA,
4280 		    DATA_TYPE_UINT64_ARRAY, nelem, (uint64_t *)icdata, NULL);
4281 	}
4282 }
4283 
4284 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4285 
4286 /*
4287  * Add ecache data to payload.
4288  */
4289 static void
4290 cpu_payload_add_ecache(struct async_flt *aflt, nvlist_t *nvl)
4291 {
4292 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4293 	ch_ec_data_t *ecp;
4294 	ch_ec_data_t ecdata[CHD_EC_DATA_SETS];
4295 	uint_t nelem;
4296 	int i, ways_logged = 0;
4297 
4298 	/*
4299 	 * Check each way to see if it should be encoded
4300 	 * and concatinate it into a temporary buffer.
4301 	 */
4302 	for (i = 0; i < CHD_EC_DATA_SETS; i++) {
4303 		ecp = &ch_flt->flt_diag_data.chd_ec_data[i];
4304 		if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4305 			bcopy(ecp, &ecdata[ways_logged],
4306 				sizeof (ch_ec_data_t));
4307 			ways_logged++;
4308 		}
4309 	}
4310 
4311 	/*
4312 	 * Panther CPUs have an additional level of cache and so
4313 	 * what we just collected was the L3 (ecache) and not the
4314 	 * L2 cache.
4315 	 */
4316 	if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4317 		/*
4318 		 * Add the L3 (ecache) data to the payload.
4319 		 */
4320 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_WAYS,
4321 		    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4322 		if (ways_logged != 0) {
4323 			nelem = sizeof (ch_ec_data_t) /
4324 			    sizeof (uint64_t) * ways_logged;
4325 			fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L3_DATA,
4326 			    DATA_TYPE_UINT64_ARRAY, nelem,
4327 			    (uint64_t *)ecdata, NULL);
4328 		}
4329 
4330 		/*
4331 		 * Now collect the L2 cache.
4332 		 */
4333 		ways_logged = 0;
4334 		for (i = 0; i < PN_L2_NWAYS; i++) {
4335 			ecp = &ch_flt->flt_diag_data.chd_l2_data[i];
4336 			if (ecp->ec_logflag == EC_LOGFLAG_MAGIC) {
4337 				bcopy(ecp, &ecdata[ways_logged],
4338 				    sizeof (ch_ec_data_t));
4339 				ways_logged++;
4340 			}
4341 		}
4342 	}
4343 
4344 	/*
4345 	 * Add the L2 cache data to the payload.
4346 	 */
4347 	fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_WAYS,
4348 	    DATA_TYPE_UINT8, (uint8_t)ways_logged, NULL);
4349 	if (ways_logged != 0) {
4350 		nelem = sizeof (ch_ec_data_t) /
4351 			sizeof (uint64_t) * ways_logged;
4352 		fm_payload_set(nvl, FM_EREPORT_PAYLOAD_NAME_L2_DATA,
4353 		    DATA_TYPE_UINT64_ARRAY, nelem,  (uint64_t *)ecdata, NULL);
4354 	}
4355 }
4356 
4357 /*
4358  * Encode the data saved in the ch_async_flt_t struct into
4359  * the FM ereport payload.
4360  */
4361 static void
4362 cpu_payload_add_aflt(struct async_flt *aflt, nvlist_t *payload,
4363 	nvlist_t *resource, int *afar_status, int *synd_status)
4364 {
4365 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4366 	*synd_status = AFLT_STAT_INVALID;
4367 	*afar_status = AFLT_STAT_INVALID;
4368 
4369 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR) {
4370 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR,
4371 		    DATA_TYPE_UINT64, aflt->flt_stat, NULL);
4372 	}
4373 
4374 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFSR_EXT) &&
4375 	    IS_PANTHER(cpunodes[aflt->flt_inst].implementation)) {
4376 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFSR_EXT,
4377 		    DATA_TYPE_UINT64, ch_flt->afsr_ext, NULL);
4378 	}
4379 
4380 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR_STATUS) {
4381 		*afar_status = afsr_to_afar_status(ch_flt->afsr_errs,
4382 		    ch_flt->flt_bit);
4383 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR_STATUS,
4384 		    DATA_TYPE_UINT8, (uint8_t)*afar_status, NULL);
4385 	}
4386 
4387 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_AFAR) {
4388 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_AFAR,
4389 		    DATA_TYPE_UINT64, aflt->flt_addr, NULL);
4390 	}
4391 
4392 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PC) {
4393 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PC,
4394 		    DATA_TYPE_UINT64, (uint64_t)aflt->flt_pc, NULL);
4395 	}
4396 
4397 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TL) {
4398 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TL,
4399 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_tl, NULL);
4400 	}
4401 
4402 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_TT) {
4403 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_TT,
4404 		    DATA_TYPE_UINT8, flt_to_trap_type(aflt), NULL);
4405 	}
4406 
4407 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_PRIV) {
4408 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_PRIV,
4409 		    DATA_TYPE_BOOLEAN_VALUE,
4410 		    (aflt->flt_priv ? B_TRUE : B_FALSE), NULL);
4411 	}
4412 
4413 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ME) {
4414 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ME,
4415 		    DATA_TYPE_BOOLEAN_VALUE,
4416 		    (aflt->flt_stat & C_AFSR_ME) ? B_TRUE : B_FALSE, NULL);
4417 	}
4418 
4419 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND_STATUS) {
4420 		*synd_status = afsr_to_synd_status(aflt->flt_inst,
4421 		    ch_flt->afsr_errs, ch_flt->flt_bit);
4422 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND_STATUS,
4423 		    DATA_TYPE_UINT8, (uint8_t)*synd_status, NULL);
4424 	}
4425 
4426 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_SYND) {
4427 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_SYND,
4428 		    DATA_TYPE_UINT16, (uint16_t)aflt->flt_synd, NULL);
4429 	}
4430 
4431 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_TYPE) {
4432 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_TYPE,
4433 		    DATA_TYPE_STRING, flt_to_error_type(aflt), NULL);
4434 	}
4435 
4436 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_ERR_DISP) {
4437 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_ERR_DISP,
4438 		    DATA_TYPE_UINT64, aflt->flt_disp, NULL);
4439 	}
4440 
4441 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L2)
4442 		cpu_payload_add_ecache(aflt, payload);
4443 
4444 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_COPYFUNCTION) {
4445 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_COPYFUNCTION,
4446 		    DATA_TYPE_UINT8, (uint8_t)aflt->flt_status & 0xff, NULL);
4447 	}
4448 
4449 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_HOWDETECTED) {
4450 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_HOWDETECTED,
4451 		    DATA_TYPE_UINT8, (uint8_t)(aflt->flt_status >> 8), NULL);
4452 	}
4453 
4454 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_INSTRBLOCK) {
4455 		fm_payload_set(payload, FM_EREPORT_PAYLOAD_NAME_INSTRBLOCK,
4456 		    DATA_TYPE_UINT32_ARRAY, 16,
4457 		    (uint32_t *)&ch_flt->flt_fpdata, NULL);
4458 	}
4459 
4460 #if defined(CPU_IMP_L1_CACHE_PARITY)
4461 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1D)
4462 		cpu_payload_add_dcache(aflt, payload);
4463 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1I)
4464 		cpu_payload_add_icache(aflt, payload);
4465 #endif	/* CPU_IMP_L1_CACHE_PARITY */
4466 
4467 #if defined(CHEETAH_PLUS)
4468 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_L1P)
4469 		cpu_payload_add_pcache(aflt, payload);
4470 	if (aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAGS_TLB)
4471 		cpu_payload_add_tlb(aflt, payload);
4472 #endif	/* CHEETAH_PLUS */
4473 	/*
4474 	 * Create the FMRI that goes into the payload
4475 	 * and contains the unum info if necessary.
4476 	 */
4477 	if ((aflt->flt_payload & FM_EREPORT_PAYLOAD_FLAG_RESOURCE) &&
4478 	    (*afar_status == AFLT_STAT_VALID)) {
4479 		char unum[UNUM_NAMLEN] = "";
4480 		char sid[DIMM_SERIAL_ID_LEN] = "";
4481 		int len;
4482 
4483 		if (cpu_get_mem_unum_aflt(*synd_status, aflt, unum,
4484 		    UNUM_NAMLEN, &len) == 0) {
4485 			uint64_t offset = (uint64_t)-1;
4486 			int ret;
4487 
4488 			ret = cpu_get_mem_sid(unum, sid, DIMM_SERIAL_ID_LEN,
4489 			    &len);
4490 
4491 			if (ret == 0) {
4492 				(void) cpu_get_mem_offset(aflt->flt_addr,
4493 				    &offset);
4494 			}
4495 
4496 			fm_fmri_mem_set(resource, FM_MEM_SCHEME_VERSION,
4497 			    NULL, unum, (ret == 0) ? sid : NULL, offset);
4498 			fm_payload_set(payload,
4499 			    FM_EREPORT_PAYLOAD_NAME_RESOURCE,
4500 			    DATA_TYPE_NVLIST, resource, NULL);
4501 		}
4502 	}
4503 }
4504 
4505 /*
4506  * Initialize the way info if necessary.
4507  */
4508 void
4509 cpu_ereport_init(struct async_flt *aflt)
4510 {
4511 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
4512 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4513 	ch_ec_data_t *l2p = &ch_flt->flt_diag_data.chd_l2_data[0];
4514 	int i;
4515 
4516 	/*
4517 	 * Initialize the info in the CPU logout structure.
4518 	 * The I$/D$ way information is not initialized here
4519 	 * since it is captured in the logout assembly code.
4520 	 */
4521 	for (i = 0; i < CHD_EC_DATA_SETS; i++)
4522 		(ecp + i)->ec_way = i;
4523 
4524 	for (i = 0; i < PN_L2_NWAYS; i++)
4525 		(l2p + i)->ec_way = i;
4526 }
4527 
4528 /*
4529  * Returns whether fault address is valid for this error bit and
4530  * whether the address is "in memory" (i.e. pf_is_memory returns 1).
4531  */
4532 int
4533 cpu_flt_in_memory(ch_async_flt_t *ch_flt, uint64_t t_afsr_bit)
4534 {
4535 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4536 
4537 	return ((aflt->flt_stat & C_AFSR_MEMORY) &&
4538 	    afsr_to_afar_status(ch_flt->afsr_errs, t_afsr_bit) ==
4539 	    AFLT_STAT_VALID &&
4540 	    pf_is_memory(aflt->flt_addr >> MMU_PAGESHIFT));
4541 }
4542 
4543 static void
4544 cpu_log_diag_info(ch_async_flt_t *ch_flt)
4545 {
4546 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4547 	ch_dc_data_t *dcp = &ch_flt->flt_diag_data.chd_dc_data;
4548 	ch_ic_data_t *icp = &ch_flt->flt_diag_data.chd_ic_data;
4549 	ch_ec_data_t *ecp = &ch_flt->flt_diag_data.chd_ec_data[0];
4550 #if defined(CPU_IMP_ECACHE_ASSOC)
4551 	int i, nway;
4552 #endif /* CPU_IMP_ECACHE_ASSOC */
4553 
4554 	/*
4555 	 * Check if the CPU log out captured was valid.
4556 	 */
4557 	if (ch_flt->flt_diag_data.chd_afar == LOGOUT_INVALID ||
4558 	    ch_flt->flt_data_incomplete)
4559 		return;
4560 
4561 #if defined(CPU_IMP_ECACHE_ASSOC)
4562 	nway = cpu_ecache_nway();
4563 	i =  cpu_ecache_line_valid(ch_flt);
4564 	if (i == 0 || i > nway) {
4565 		for (i = 0; i < nway; i++)
4566 			ecp[i].ec_logflag = EC_LOGFLAG_MAGIC;
4567 	} else
4568 		ecp[i - 1].ec_logflag = EC_LOGFLAG_MAGIC;
4569 #else /* CPU_IMP_ECACHE_ASSOC */
4570 	ecp->ec_logflag = EC_LOGFLAG_MAGIC;
4571 #endif /* CPU_IMP_ECACHE_ASSOC */
4572 
4573 #if defined(CHEETAH_PLUS)
4574 	pn_cpu_log_diag_l2_info(ch_flt);
4575 #endif /* CHEETAH_PLUS */
4576 
4577 	if (CH_DCTAG_MATCH(dcp->dc_tag, aflt->flt_addr)) {
4578 		dcp->dc_way = CH_DCIDX_TO_WAY(dcp->dc_idx);
4579 		dcp->dc_logflag = DC_LOGFLAG_MAGIC;
4580 	}
4581 
4582 	if (CH_ICTAG_MATCH(icp, aflt->flt_addr)) {
4583 		if (IS_PANTHER(cpunodes[aflt->flt_inst].implementation))
4584 			icp->ic_way = PN_ICIDX_TO_WAY(icp->ic_idx);
4585 		else
4586 			icp->ic_way = CH_ICIDX_TO_WAY(icp->ic_idx);
4587 		icp->ic_logflag = IC_LOGFLAG_MAGIC;
4588 	}
4589 }
4590 
4591 /*
4592  * Cheetah ECC calculation.
4593  *
4594  * We only need to do the calculation on the data bits and can ignore check
4595  * bit and Mtag bit terms in the calculation.
4596  */
4597 static uint64_t ch_ecc_table[9][2] = {
4598 	/*
4599 	 * low order 64-bits   high-order 64-bits
4600 	 */
4601 	{ 0x46bffffeccd1177f, 0x488800022100014c },
4602 	{ 0x42fccc81331ff77f, 0x14424f1010249184 },
4603 	{ 0x8898827c222f1ffe, 0x22c1222808184aaf },
4604 	{ 0xf7632203e131ccf1, 0xe1241121848292b8 },
4605 	{ 0x7f5511421b113809, 0x901c88d84288aafe },
4606 	{ 0x1d49412184882487, 0x8f338c87c044c6ef },
4607 	{ 0xf552181014448344, 0x7ff8f4443e411911 },
4608 	{ 0x2189240808f24228, 0xfeeff8cc81333f42 },
4609 	{ 0x3280008440001112, 0xfee88b337ffffd62 },
4610 };
4611 
4612 /*
4613  * 64-bit population count, use well-known popcnt trick.
4614  * We could use the UltraSPARC V9 POPC instruction, but some
4615  * CPUs including Cheetahplus and Jaguar do not support that
4616  * instruction.
4617  */
4618 int
4619 popc64(uint64_t val)
4620 {
4621 	int cnt;
4622 
4623 	for (cnt = 0; val != 0; val &= val - 1)
4624 		cnt++;
4625 	return (cnt);
4626 }
4627 
4628 /*
4629  * Generate the 9 ECC bits for the 128-bit chunk based on the table above.
4630  * Note that xor'ing an odd number of 1 bits == 1 and xor'ing an even number
4631  * of 1 bits == 0, so we can just use the least significant bit of the popcnt
4632  * instead of doing all the xor's.
4633  */
4634 uint32_t
4635 us3_gen_ecc(uint64_t data_low, uint64_t data_high)
4636 {
4637 	int bitno, s;
4638 	int synd = 0;
4639 
4640 	for (bitno = 0; bitno < 9; bitno++) {
4641 		s = (popc64(data_low & ch_ecc_table[bitno][0]) +
4642 		    popc64(data_high & ch_ecc_table[bitno][1])) & 1;
4643 		synd |= (s << bitno);
4644 	}
4645 	return (synd);
4646 
4647 }
4648 
4649 /*
4650  * Queue one event based on ecc_type_to_info entry.  If the event has an AFT1
4651  * tag associated with it or is a fatal event (aflt_panic set), it is sent to
4652  * the UE event queue.  Otherwise it is dispatched to the CE event queue.
4653  */
4654 static void
4655 cpu_queue_one_event(ch_async_flt_t *ch_flt, char *reason,
4656     ecc_type_to_info_t *eccp, ch_diag_data_t *cdp)
4657 {
4658 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4659 
4660 	if (reason &&
4661 	    strlen(reason) + strlen(eccp->ec_reason) < MAX_REASON_STRING) {
4662 		(void) strcat(reason, eccp->ec_reason);
4663 	}
4664 
4665 	ch_flt->flt_bit = eccp->ec_afsr_bit;
4666 	ch_flt->flt_type = eccp->ec_flt_type;
4667 	if (cdp != NULL && cdp->chd_afar != LOGOUT_INVALID)
4668 		ch_flt->flt_diag_data = *cdp;
4669 	else
4670 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
4671 	aflt->flt_in_memory = cpu_flt_in_memory(ch_flt, ch_flt->flt_bit);
4672 
4673 	if (ch_flt->flt_bit & C_AFSR_MSYND_ERRS)
4674 		aflt->flt_synd = GET_M_SYND(aflt->flt_stat);
4675 	else if (ch_flt->flt_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS))
4676 		aflt->flt_synd = GET_E_SYND(aflt->flt_stat);
4677 	else
4678 		aflt->flt_synd = 0;
4679 
4680 	aflt->flt_payload = eccp->ec_err_payload;
4681 
4682 	if (aflt->flt_panic || (eccp->ec_afsr_bit &
4683 	    (C_AFSR_LEVEL1 | C_AFSR_EXT_LEVEL1)))
4684 		cpu_errorq_dispatch(eccp->ec_err_class,
4685 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
4686 		    aflt->flt_panic);
4687 	else
4688 		cpu_errorq_dispatch(eccp->ec_err_class,
4689 		    (void *)ch_flt, sizeof (ch_async_flt_t), ce_queue,
4690 		    aflt->flt_panic);
4691 }
4692 
4693 /*
4694  * Queue events on async event queue one event per error bit.  First we
4695  * queue the events that we "expect" for the given trap, then we queue events
4696  * that we may not expect.  Return number of events queued.
4697  */
4698 int
4699 cpu_queue_events(ch_async_flt_t *ch_flt, char *reason, uint64_t t_afsr_errs,
4700     ch_cpu_logout_t *clop)
4701 {
4702 	struct async_flt *aflt = (struct async_flt *)ch_flt;
4703 	ecc_type_to_info_t *eccp;
4704 	int nevents = 0;
4705 	uint64_t primary_afar = aflt->flt_addr, primary_afsr = aflt->flt_stat;
4706 #if defined(CHEETAH_PLUS)
4707 	uint64_t orig_t_afsr_errs;
4708 #endif
4709 	uint64_t primary_afsr_ext = ch_flt->afsr_ext;
4710 	uint64_t primary_afsr_errs = ch_flt->afsr_errs;
4711 	ch_diag_data_t *cdp = NULL;
4712 
4713 	t_afsr_errs &= ((C_AFSR_ALL_ERRS & ~C_AFSR_ME) | C_AFSR_EXT_ALL_ERRS);
4714 
4715 #if defined(CHEETAH_PLUS)
4716 	orig_t_afsr_errs = t_afsr_errs;
4717 
4718 	/*
4719 	 * For Cheetah+, log the shadow AFSR/AFAR bits first.
4720 	 */
4721 	if (clop != NULL) {
4722 		/*
4723 		 * Set the AFSR and AFAR fields to the shadow registers.  The
4724 		 * flt_addr and flt_stat fields will be reset to the primaries
4725 		 * below, but the sdw_addr and sdw_stat will stay as the
4726 		 * secondaries.
4727 		 */
4728 		cdp = &clop->clo_sdw_data;
4729 		aflt->flt_addr = ch_flt->flt_sdw_afar = cdp->chd_afar;
4730 		aflt->flt_stat = ch_flt->flt_sdw_afsr = cdp->chd_afsr;
4731 		ch_flt->afsr_ext = ch_flt->flt_sdw_afsr_ext = cdp->chd_afsr_ext;
4732 		ch_flt->afsr_errs = (cdp->chd_afsr_ext & C_AFSR_EXT_ALL_ERRS) |
4733 		    (cdp->chd_afsr & C_AFSR_ALL_ERRS);
4734 
4735 		/*
4736 		 * If the primary and shadow AFSR differ, tag the shadow as
4737 		 * the first fault.
4738 		 */
4739 		if ((primary_afar != cdp->chd_afar) ||
4740 		    (primary_afsr_errs != ch_flt->afsr_errs)) {
4741 			aflt->flt_stat |= (1ull << C_AFSR_FIRSTFLT_SHIFT);
4742 		}
4743 
4744 		/*
4745 		 * Check AFSR bits as well as AFSR_EXT bits in order of
4746 		 * the AFAR overwrite priority. Our stored AFSR_EXT value
4747 		 * is expected to be zero for those CPUs which do not have
4748 		 * an AFSR_EXT register.
4749 		 */
4750 		for (eccp = ecc_type_to_info; eccp->ec_desc != NULL; eccp++) {
4751 			if ((eccp->ec_afsr_bit &
4752 			    (ch_flt->afsr_errs & t_afsr_errs)) &&
4753 			    ((eccp->ec_flags & aflt->flt_status) != 0)) {
4754 				cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4755 				cdp = NULL;
4756 				t_afsr_errs &= ~eccp->ec_afsr_bit;
4757 				nevents++;
4758 			}
4759 		}
4760 
4761 		/*
4762 		 * If the ME bit is on in the primary AFSR turn all the
4763 		 * error bits on again that may set the ME bit to make
4764 		 * sure we see the ME AFSR error logs.
4765 		 */
4766 		if ((primary_afsr & C_AFSR_ME) != 0)
4767 			t_afsr_errs = (orig_t_afsr_errs & C_AFSR_ALL_ME_ERRS);
4768 	}
4769 #endif	/* CHEETAH_PLUS */
4770 
4771 	if (clop != NULL)
4772 		cdp = &clop->clo_data;
4773 
4774 	/*
4775 	 * Queue expected errors, error bit and fault type must match
4776 	 * in the ecc_type_to_info table.
4777 	 */
4778 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4779 	    eccp++) {
4780 		if ((eccp->ec_afsr_bit & t_afsr_errs) != 0 &&
4781 		    (eccp->ec_flags & aflt->flt_status) != 0) {
4782 #if defined(SERRANO)
4783 			/*
4784 			 * For FRC/FRU errors on Serrano the afar2 captures
4785 			 * the address and the associated data is
4786 			 * in the shadow logout area.
4787 			 */
4788 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4789 				if (clop != NULL)
4790 					cdp = &clop->clo_sdw_data;
4791 				aflt->flt_addr = ch_flt->afar2;
4792 			} else {
4793 				if (clop != NULL)
4794 					cdp = &clop->clo_data;
4795 				aflt->flt_addr = primary_afar;
4796 			}
4797 #else	/* SERRANO */
4798 			aflt->flt_addr = primary_afar;
4799 #endif	/* SERRANO */
4800 			aflt->flt_stat = primary_afsr;
4801 			ch_flt->afsr_ext = primary_afsr_ext;
4802 			ch_flt->afsr_errs = primary_afsr_errs;
4803 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4804 			cdp = NULL;
4805 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4806 			nevents++;
4807 		}
4808 	}
4809 
4810 	/*
4811 	 * Queue unexpected errors, error bit only match.
4812 	 */
4813 	for (eccp = ecc_type_to_info; t_afsr_errs != 0 && eccp->ec_desc != NULL;
4814 	    eccp++) {
4815 		if (eccp->ec_afsr_bit & t_afsr_errs) {
4816 #if defined(SERRANO)
4817 			/*
4818 			 * For FRC/FRU errors on Serrano the afar2 captures
4819 			 * the address and the associated data is
4820 			 * in the shadow logout area.
4821 			 */
4822 			if (eccp->ec_afsr_bit  & (C_AFSR_FRC | C_AFSR_FRU)) {
4823 				if (clop != NULL)
4824 					cdp = &clop->clo_sdw_data;
4825 				aflt->flt_addr = ch_flt->afar2;
4826 			} else {
4827 				if (clop != NULL)
4828 					cdp = &clop->clo_data;
4829 				aflt->flt_addr = primary_afar;
4830 			}
4831 #else	/* SERRANO */
4832 			aflt->flt_addr = primary_afar;
4833 #endif	/* SERRANO */
4834 			aflt->flt_stat = primary_afsr;
4835 			ch_flt->afsr_ext = primary_afsr_ext;
4836 			ch_flt->afsr_errs = primary_afsr_errs;
4837 			cpu_queue_one_event(ch_flt, reason, eccp, cdp);
4838 			cdp = NULL;
4839 			t_afsr_errs &= ~eccp->ec_afsr_bit;
4840 			nevents++;
4841 		}
4842 	}
4843 	return (nevents);
4844 }
4845 
4846 /*
4847  * Return trap type number.
4848  */
4849 uint8_t
4850 flt_to_trap_type(struct async_flt *aflt)
4851 {
4852 	if (aflt->flt_status & ECC_I_TRAP)
4853 		return (TRAP_TYPE_ECC_I);
4854 	if (aflt->flt_status & ECC_D_TRAP)
4855 		return (TRAP_TYPE_ECC_D);
4856 	if (aflt->flt_status & ECC_F_TRAP)
4857 		return (TRAP_TYPE_ECC_F);
4858 	if (aflt->flt_status & ECC_C_TRAP)
4859 		return (TRAP_TYPE_ECC_C);
4860 	if (aflt->flt_status & ECC_DP_TRAP)
4861 		return (TRAP_TYPE_ECC_DP);
4862 	if (aflt->flt_status & ECC_IP_TRAP)
4863 		return (TRAP_TYPE_ECC_IP);
4864 	if (aflt->flt_status & ECC_ITLB_TRAP)
4865 		return (TRAP_TYPE_ECC_ITLB);
4866 	if (aflt->flt_status & ECC_DTLB_TRAP)
4867 		return (TRAP_TYPE_ECC_DTLB);
4868 	return (TRAP_TYPE_UNKNOWN);
4869 }
4870 
4871 /*
4872  * Decide an error type based on detector and leaky/partner tests.
4873  * The following array is used for quick translation - it must
4874  * stay in sync with ce_dispact_t.
4875  */
4876 
4877 static char *cetypes[] = {
4878 	CE_DISP_DESC_U,
4879 	CE_DISP_DESC_I,
4880 	CE_DISP_DESC_PP,
4881 	CE_DISP_DESC_P,
4882 	CE_DISP_DESC_L,
4883 	CE_DISP_DESC_PS,
4884 	CE_DISP_DESC_S
4885 };
4886 
4887 char *
4888 flt_to_error_type(struct async_flt *aflt)
4889 {
4890 	ce_dispact_t dispact, disp;
4891 	uchar_t dtcrinfo, ptnrinfo, lkyinfo;
4892 
4893 	/*
4894 	 * The memory payload bundle is shared by some events that do
4895 	 * not perform any classification.  For those flt_disp will be
4896 	 * 0 and we will return "unknown".
4897 	 */
4898 	if (!ce_disp_inited || !aflt->flt_in_memory || aflt->flt_disp == 0)
4899 		return (cetypes[CE_DISP_UNKNOWN]);
4900 
4901 	dtcrinfo = CE_XDIAG_DTCRINFO(aflt->flt_disp);
4902 
4903 	/*
4904 	 * It is also possible that no scrub/classification was performed
4905 	 * by the detector, for instance where a disrupting error logged
4906 	 * in the AFSR while CEEN was off in cpu_deferred_error.
4907 	 */
4908 	if (!CE_XDIAG_EXT_ALG_APPLIED(dtcrinfo))
4909 		return (cetypes[CE_DISP_UNKNOWN]);
4910 
4911 	/*
4912 	 * Lookup type in initial classification/action table
4913 	 */
4914 	dispact = CE_DISPACT(ce_disp_table,
4915 	    CE_XDIAG_AFARMATCHED(dtcrinfo),
4916 	    CE_XDIAG_STATE(dtcrinfo),
4917 	    CE_XDIAG_CE1SEEN(dtcrinfo),
4918 	    CE_XDIAG_CE2SEEN(dtcrinfo));
4919 
4920 	/*
4921 	 * A bad lookup is not something to panic production systems for.
4922 	 */
4923 	ASSERT(dispact != CE_DISP_BAD);
4924 	if (dispact == CE_DISP_BAD)
4925 		return (cetypes[CE_DISP_UNKNOWN]);
4926 
4927 	disp = CE_DISP(dispact);
4928 
4929 	switch (disp) {
4930 	case CE_DISP_UNKNOWN:
4931 	case CE_DISP_INTERMITTENT:
4932 		break;
4933 
4934 	case CE_DISP_POSS_PERS:
4935 		/*
4936 		 * "Possible persistent" errors to which we have applied a valid
4937 		 * leaky test can be separated into "persistent" or "leaky".
4938 		 */
4939 		lkyinfo = CE_XDIAG_LKYINFO(aflt->flt_disp);
4940 		if (CE_XDIAG_TESTVALID(lkyinfo)) {
4941 			if (CE_XDIAG_CE1SEEN(lkyinfo) ||
4942 			    CE_XDIAG_CE2SEEN(lkyinfo))
4943 				disp = CE_DISP_LEAKY;
4944 			else
4945 				disp = CE_DISP_PERS;
4946 		}
4947 		break;
4948 
4949 	case CE_DISP_POSS_STICKY:
4950 		/*
4951 		 * Promote "possible sticky" results that have been
4952 		 * confirmed by a partner test to "sticky".  Unconfirmed
4953 		 * "possible sticky" events are left at that status - we do not
4954 		 * guess at any bad reader/writer etc status here.
4955 		 */
4956 		ptnrinfo = CE_XDIAG_PTNRINFO(aflt->flt_disp);
4957 		if (CE_XDIAG_TESTVALID(ptnrinfo) &&
4958 		    CE_XDIAG_CE1SEEN(ptnrinfo) && CE_XDIAG_CE2SEEN(ptnrinfo))
4959 			disp = CE_DISP_STICKY;
4960 
4961 		/*
4962 		 * Promote "possible sticky" results on a uniprocessor
4963 		 * to "sticky"
4964 		 */
4965 		if (disp == CE_DISP_POSS_STICKY &&
4966 		    CE_XDIAG_SKIPCODE(disp) == CE_XDIAG_SKIP_UNIPROC)
4967 			disp = CE_DISP_STICKY;
4968 		break;
4969 
4970 	default:
4971 		disp = CE_DISP_UNKNOWN;
4972 		break;
4973 	}
4974 
4975 	return (cetypes[disp]);
4976 }
4977 
4978 /*
4979  * Given the entire afsr, the specific bit to check and a prioritized list of
4980  * error bits, determine the validity of the various overwrite priority
4981  * features of the AFSR/AFAR: AFAR, ESYND and MSYND, each of which have
4982  * different overwrite priorities.
4983  *
4984  * Given a specific afsr error bit and the entire afsr, there are three cases:
4985  *   INVALID:	The specified bit is lower overwrite priority than some other
4986  *		error bit which is on in the afsr (or IVU/IVC).
4987  *   VALID:	The specified bit is higher priority than all other error bits
4988  *		which are on in the afsr.
4989  *   AMBIGUOUS: Another error bit (or bits) of equal priority to the specified
4990  *		bit is on in the afsr.
4991  */
4992 int
4993 afsr_to_overw_status(uint64_t afsr, uint64_t afsr_bit, uint64_t *ow_bits)
4994 {
4995 	uint64_t afsr_ow;
4996 
4997 	while ((afsr_ow = *ow_bits++) != 0) {
4998 		/*
4999 		 * If bit is in the priority class, check to see if another
5000 		 * bit in the same class is on => ambiguous.  Otherwise,
5001 		 * the value is valid.  If the bit is not on at this priority
5002 		 * class, but a higher priority bit is on, then the value is
5003 		 * invalid.
5004 		 */
5005 		if (afsr_ow & afsr_bit) {
5006 			/*
5007 			 * If equal pri bit is on, ambiguous.
5008 			 */
5009 			if (afsr & (afsr_ow & ~afsr_bit))
5010 				return (AFLT_STAT_AMBIGUOUS);
5011 			return (AFLT_STAT_VALID);
5012 		} else if (afsr & afsr_ow)
5013 			break;
5014 	}
5015 
5016 	/*
5017 	 * We didn't find a match or a higher priority bit was on.  Not
5018 	 * finding a match handles the case of invalid AFAR for IVC, IVU.
5019 	 */
5020 	return (AFLT_STAT_INVALID);
5021 }
5022 
5023 static int
5024 afsr_to_afar_status(uint64_t afsr, uint64_t afsr_bit)
5025 {
5026 #if defined(SERRANO)
5027 	if (afsr_bit & (C_AFSR_FRC | C_AFSR_FRU))
5028 		return (afsr_to_overw_status(afsr, afsr_bit, afar2_overwrite));
5029 	else
5030 #endif	/* SERRANO */
5031 		return (afsr_to_overw_status(afsr, afsr_bit, afar_overwrite));
5032 }
5033 
5034 static int
5035 afsr_to_esynd_status(uint64_t afsr, uint64_t afsr_bit)
5036 {
5037 	return (afsr_to_overw_status(afsr, afsr_bit, esynd_overwrite));
5038 }
5039 
5040 static int
5041 afsr_to_msynd_status(uint64_t afsr, uint64_t afsr_bit)
5042 {
5043 	return (afsr_to_overw_status(afsr, afsr_bit, msynd_overwrite));
5044 }
5045 
5046 static int
5047 afsr_to_synd_status(uint_t cpuid, uint64_t afsr, uint64_t afsr_bit)
5048 {
5049 #ifdef lint
5050 	cpuid = cpuid;
5051 #endif
5052 	if (afsr_bit & C_AFSR_MSYND_ERRS) {
5053 		return (afsr_to_msynd_status(afsr, afsr_bit));
5054 	} else if (afsr_bit & (C_AFSR_ESYND_ERRS | C_AFSR_EXT_ESYND_ERRS)) {
5055 #if defined(CHEETAH_PLUS)
5056 		/*
5057 		 * The E_SYND overwrite policy is slightly different
5058 		 * for Panther CPUs.
5059 		 */
5060 		if (IS_PANTHER(cpunodes[cpuid].implementation))
5061 			return (afsr_to_pn_esynd_status(afsr, afsr_bit));
5062 		else
5063 			return (afsr_to_esynd_status(afsr, afsr_bit));
5064 #else /* CHEETAH_PLUS */
5065 		return (afsr_to_esynd_status(afsr, afsr_bit));
5066 #endif /* CHEETAH_PLUS */
5067 	} else {
5068 		return (AFLT_STAT_INVALID);
5069 	}
5070 }
5071 
5072 /*
5073  * Slave CPU stick synchronization.
5074  */
5075 void
5076 sticksync_slave(void)
5077 {
5078 	int 		i;
5079 	int		tries = 0;
5080 	int64_t		tskew;
5081 	int64_t		av_tskew;
5082 
5083 	kpreempt_disable();
5084 	/* wait for the master side */
5085 	while (stick_sync_cmd != SLAVE_START)
5086 		;
5087 	/*
5088 	 * Synchronization should only take a few tries at most. But in the
5089 	 * odd case where the cpu isn't cooperating we'll keep trying. A cpu
5090 	 * without it's stick synchronized wouldn't be a good citizen.
5091 	 */
5092 	while (slave_done == 0) {
5093 		/*
5094 		 * Time skew calculation.
5095 		 */
5096 		av_tskew = tskew = 0;
5097 
5098 		for (i = 0; i < stick_iter; i++) {
5099 			/* make location hot */
5100 			timestamp[EV_A_START] = 0;
5101 			stick_timestamp(&timestamp[EV_A_START]);
5102 
5103 			/* tell the master we're ready */
5104 			stick_sync_cmd = MASTER_START;
5105 
5106 			/* and wait */
5107 			while (stick_sync_cmd != SLAVE_CONT)
5108 				;
5109 			/* Event B end */
5110 			stick_timestamp(&timestamp[EV_B_END]);
5111 
5112 			/* calculate time skew */
5113 			tskew = ((timestamp[EV_B_END] - timestamp[EV_B_START])
5114 				- (timestamp[EV_A_END] -
5115 				timestamp[EV_A_START])) / 2;
5116 
5117 			/* keep running count */
5118 			av_tskew += tskew;
5119 		} /* for */
5120 
5121 		/*
5122 		 * Adjust stick for time skew if not within the max allowed;
5123 		 * otherwise we're all done.
5124 		 */
5125 		if (stick_iter != 0)
5126 			av_tskew = av_tskew/stick_iter;
5127 		if (ABS(av_tskew) > stick_tsk) {
5128 			/*
5129 			 * If the skew is 1 (the slave's STICK register
5130 			 * is 1 STICK ahead of the master's), stick_adj
5131 			 * could fail to adjust the slave's STICK register
5132 			 * if the STICK read on the slave happens to
5133 			 * align with the increment of the STICK.
5134 			 * Therefore, we increment the skew to 2.
5135 			 */
5136 			if (av_tskew == 1)
5137 				av_tskew++;
5138 			stick_adj(-av_tskew);
5139 		} else
5140 			slave_done = 1;
5141 #ifdef DEBUG
5142 		if (tries < DSYNC_ATTEMPTS)
5143 			stick_sync_stats[CPU->cpu_id].skew_val[tries] =
5144 				av_tskew;
5145 		++tries;
5146 #endif /* DEBUG */
5147 #ifdef lint
5148 		tries = tries;
5149 #endif
5150 
5151 	} /* while */
5152 
5153 	/* allow the master to finish */
5154 	stick_sync_cmd = EVENT_NULL;
5155 	kpreempt_enable();
5156 }
5157 
5158 /*
5159  * Master CPU side of stick synchronization.
5160  *  - timestamp end of Event A
5161  *  - timestamp beginning of Event B
5162  */
5163 void
5164 sticksync_master(void)
5165 {
5166 	int		i;
5167 
5168 	kpreempt_disable();
5169 	/* tell the slave we've started */
5170 	slave_done = 0;
5171 	stick_sync_cmd = SLAVE_START;
5172 
5173 	while (slave_done == 0) {
5174 		for (i = 0; i < stick_iter; i++) {
5175 			/* wait for the slave */
5176 			while (stick_sync_cmd != MASTER_START)
5177 				;
5178 			/* Event A end */
5179 			stick_timestamp(&timestamp[EV_A_END]);
5180 
5181 			/* make location hot */
5182 			timestamp[EV_B_START] = 0;
5183 			stick_timestamp(&timestamp[EV_B_START]);
5184 
5185 			/* tell the slave to continue */
5186 			stick_sync_cmd = SLAVE_CONT;
5187 		} /* for */
5188 
5189 		/* wait while slave calculates time skew */
5190 		while (stick_sync_cmd == SLAVE_CONT)
5191 			;
5192 	} /* while */
5193 	kpreempt_enable();
5194 }
5195 
5196 /*
5197  * Cheetah/Cheetah+ have disrupting error for copyback's, so we don't need to
5198  * do Spitfire hack of xcall'ing all the cpus to ask to check for them.  Also,
5199  * in cpu_async_panic_callb, each cpu checks for CPU events on its way to
5200  * panic idle.
5201  */
5202 /*ARGSUSED*/
5203 void
5204 cpu_check_allcpus(struct async_flt *aflt)
5205 {}
5206 
5207 struct kmem_cache *ch_private_cache;
5208 
5209 /*
5210  * Cpu private unitialization.  Uninitialize the Ecache scrubber and
5211  * deallocate the scrubber data structures and cpu_private data structure.
5212  */
5213 void
5214 cpu_uninit_private(struct cpu *cp)
5215 {
5216 	cheetah_private_t *chprp = CPU_PRIVATE(cp);
5217 
5218 	ASSERT(chprp);
5219 	cpu_uninit_ecache_scrub_dr(cp);
5220 	CPU_PRIVATE(cp) = NULL;
5221 	ch_err_tl1_paddrs[cp->cpu_id] = NULL;
5222 	kmem_cache_free(ch_private_cache, chprp);
5223 	cmp_delete_cpu(cp->cpu_id);
5224 
5225 }
5226 
5227 /*
5228  * Cheetah Cache Scrubbing
5229  *
5230  * The primary purpose of Cheetah cache scrubbing is to reduce the exposure
5231  * of E$ tags, D$ data, and I$ data to cosmic ray events since they are not
5232  * protected by either parity or ECC.
5233  *
5234  * We currently default the E$ and D$ scan rate to 100 (scan 10% of the
5235  * cache per second). Due to the the specifics of how the I$ control
5236  * logic works with respect to the ASI used to scrub I$ lines, the entire
5237  * I$ is scanned at once.
5238  */
5239 
5240 /*
5241  * Tuneables to enable and disable the scrubbing of the caches, and to tune
5242  * scrubbing behavior.  These may be changed via /etc/system or using mdb
5243  * on a running system.
5244  */
5245 int dcache_scrub_enable = 1;		/* D$ scrubbing is on by default */
5246 
5247 /*
5248  * The following are the PIL levels that the softints/cross traps will fire at.
5249  */
5250 uint_t ecache_scrub_pil = PIL_9;	/* E$ scrub PIL for cross traps */
5251 uint_t dcache_scrub_pil = PIL_9;	/* D$ scrub PIL for cross traps */
5252 uint_t icache_scrub_pil = PIL_9;	/* I$ scrub PIL for cross traps */
5253 
5254 #if defined(JALAPENO)
5255 
5256 /*
5257  * Due to several errata (82, 85, 86), we don't enable the L2$ scrubber
5258  * on Jalapeno.
5259  */
5260 int ecache_scrub_enable = 0;
5261 
5262 #else	/* JALAPENO */
5263 
5264 /*
5265  * With all other cpu types, E$ scrubbing is on by default
5266  */
5267 int ecache_scrub_enable = 1;
5268 
5269 #endif	/* JALAPENO */
5270 
5271 
5272 #if defined(CHEETAH_PLUS) || defined(JALAPENO) || defined(SERRANO)
5273 
5274 /*
5275  * The I$ scrubber tends to cause latency problems for real-time SW, so it
5276  * is disabled by default on non-Cheetah systems
5277  */
5278 int icache_scrub_enable = 0;
5279 
5280 /*
5281  * Tuneables specifying the scrub calls per second and the scan rate
5282  * for each cache
5283  *
5284  * The cyclic times are set during boot based on the following values.
5285  * Changing these values in mdb after this time will have no effect.  If
5286  * a different value is desired, it must be set in /etc/system before a
5287  * reboot.
5288  */
5289 int ecache_calls_a_sec = 1;
5290 int dcache_calls_a_sec = 2;
5291 int icache_calls_a_sec = 2;
5292 
5293 int ecache_scan_rate_idle = 1;
5294 int ecache_scan_rate_busy = 1;
5295 int dcache_scan_rate_idle = 1;
5296 int dcache_scan_rate_busy = 1;
5297 int icache_scan_rate_idle = 1;
5298 int icache_scan_rate_busy = 1;
5299 
5300 #else	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5301 
5302 int icache_scrub_enable = 1;		/* I$ scrubbing is on by default */
5303 
5304 int ecache_calls_a_sec = 100;		/* E$ scrub calls per seconds */
5305 int dcache_calls_a_sec = 100;		/* D$ scrub calls per seconds */
5306 int icache_calls_a_sec = 100;		/* I$ scrub calls per seconds */
5307 
5308 int ecache_scan_rate_idle = 100;	/* E$ scan rate (in tenths of a %) */
5309 int ecache_scan_rate_busy = 100;	/* E$ scan rate (in tenths of a %) */
5310 int dcache_scan_rate_idle = 100;	/* D$ scan rate (in tenths of a %) */
5311 int dcache_scan_rate_busy = 100;	/* D$ scan rate (in tenths of a %) */
5312 int icache_scan_rate_idle = 100;	/* I$ scan rate (in tenths of a %) */
5313 int icache_scan_rate_busy = 100;	/* I$ scan rate (in tenths of a %) */
5314 
5315 #endif	/* CHEETAH_PLUS || JALAPENO || SERRANO */
5316 
5317 /*
5318  * In order to scrub on offline cpus, a cross trap is sent.  The handler will
5319  * increment the outstanding request counter and schedule a softint to run
5320  * the scrubber.
5321  */
5322 extern xcfunc_t cache_scrubreq_tl1;
5323 
5324 /*
5325  * These are the softint functions for each cache scrubber
5326  */
5327 static uint_t scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2);
5328 static uint_t scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2);
5329 static uint_t scrub_icache_line_intr(caddr_t arg1, caddr_t arg2);
5330 
5331 /*
5332  * The cache scrub info table contains cache specific information
5333  * and allows for some of the scrub code to be table driven, reducing
5334  * duplication of cache similar code.
5335  *
5336  * This table keeps a copy of the value in the calls per second variable
5337  * (?cache_calls_a_sec).  This makes it much more difficult for someone
5338  * to cause us problems (for example, by setting ecache_calls_a_sec to 0 in
5339  * mdb in a misguided attempt to disable the scrubber).
5340  */
5341 struct scrub_info {
5342 	int		*csi_enable;	/* scrubber enable flag */
5343 	int		csi_freq;	/* scrubber calls per second */
5344 	int		csi_index;	/* index to chsm_outstanding[] */
5345 	uint_t		csi_inum;	/* scrubber interrupt number */
5346 	cyclic_id_t	csi_omni_cyc_id;	/* omni cyclic ID */
5347 	cyclic_id_t	csi_offline_cyc_id;	/* offline cyclic ID */
5348 	char		csi_name[3];	/* cache name for this scrub entry */
5349 } cache_scrub_info[] = {
5350 { &ecache_scrub_enable, 0, CACHE_SCRUBBER_INFO_E, 0, 0, 0, "E$"},
5351 { &dcache_scrub_enable, 0, CACHE_SCRUBBER_INFO_D, 0, 0, 0, "D$"},
5352 { &icache_scrub_enable, 0, CACHE_SCRUBBER_INFO_I, 0, 0, 0, "I$"}
5353 };
5354 
5355 /*
5356  * If scrubbing is enabled, increment the outstanding request counter.  If it
5357  * is 1 (meaning there were no previous requests outstanding), call
5358  * setsoftint_tl1 through xt_one_unchecked, which eventually ends up doing
5359  * a self trap.
5360  */
5361 static void
5362 do_scrub(struct scrub_info *csi)
5363 {
5364 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5365 	int index = csi->csi_index;
5366 	uint32_t *outstanding = &csmp->chsm_outstanding[index];
5367 
5368 	if (*(csi->csi_enable) && (csmp->chsm_enable[index])) {
5369 		if (atomic_add_32_nv(outstanding, 1) == 1) {
5370 			xt_one_unchecked(CPU->cpu_id, setsoftint_tl1,
5371 			    csi->csi_inum, 0);
5372 		}
5373 	}
5374 }
5375 
5376 /*
5377  * Omni cyclics don't fire on offline cpus, so we use another cyclic to
5378  * cross-trap the offline cpus.
5379  */
5380 static void
5381 do_scrub_offline(struct scrub_info *csi)
5382 {
5383 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5384 
5385 	if (CPUSET_ISNULL(cpu_offline_set)) {
5386 		/*
5387 		 * No offline cpus - nothing to do
5388 		 */
5389 		return;
5390 	}
5391 
5392 	if (*(csi->csi_enable) && (csmp->chsm_enable[csi->csi_index])) {
5393 		xt_some(cpu_offline_set, cache_scrubreq_tl1, csi->csi_inum,
5394 		    csi->csi_index);
5395 	}
5396 }
5397 
5398 /*
5399  * This is the initial setup for the scrubber cyclics - it sets the
5400  * interrupt level, frequency, and function to call.
5401  */
5402 /*ARGSUSED*/
5403 static void
5404 cpu_scrub_cyclic_setup(void *arg, cpu_t *cpu, cyc_handler_t *hdlr,
5405     cyc_time_t *when)
5406 {
5407 	struct scrub_info *csi = (struct scrub_info *)arg;
5408 
5409 	ASSERT(csi != NULL);
5410 	hdlr->cyh_func = (cyc_func_t)do_scrub;
5411 	hdlr->cyh_level = CY_LOW_LEVEL;
5412 	hdlr->cyh_arg = arg;
5413 
5414 	when->cyt_when = 0;	/* Start immediately */
5415 	when->cyt_interval = NANOSEC / csi->csi_freq;
5416 }
5417 
5418 /*
5419  * Initialization for cache scrubbing.
5420  * This routine is called AFTER all cpus have had cpu_init_private called
5421  * to initialize their private data areas.
5422  */
5423 void
5424 cpu_init_cache_scrub(void)
5425 {
5426 	int i;
5427 	struct scrub_info *csi;
5428 	cyc_omni_handler_t omni_hdlr;
5429 	cyc_handler_t offline_hdlr;
5430 	cyc_time_t when;
5431 
5432 	/*
5433 	 * save away the maximum number of lines for the D$
5434 	 */
5435 	dcache_nlines = dcache_size / dcache_linesize;
5436 
5437 	/*
5438 	 * register the softints for the cache scrubbing
5439 	 */
5440 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_inum =
5441 	    add_softintr(ecache_scrub_pil, scrub_ecache_line_intr,
5442 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_E]);
5443 	cache_scrub_info[CACHE_SCRUBBER_INFO_E].csi_freq = ecache_calls_a_sec;
5444 
5445 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_inum =
5446 	    add_softintr(dcache_scrub_pil, scrub_dcache_line_intr,
5447 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_D]);
5448 	cache_scrub_info[CACHE_SCRUBBER_INFO_D].csi_freq = dcache_calls_a_sec;
5449 
5450 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_inum =
5451 	    add_softintr(icache_scrub_pil, scrub_icache_line_intr,
5452 	    (caddr_t)&cache_scrub_info[CACHE_SCRUBBER_INFO_I]);
5453 	cache_scrub_info[CACHE_SCRUBBER_INFO_I].csi_freq = icache_calls_a_sec;
5454 
5455 	/*
5456 	 * start the scrubbing for all the caches
5457 	 */
5458 	mutex_enter(&cpu_lock);
5459 	for (i = 0; i < CACHE_SCRUBBER_COUNT; i++) {
5460 
5461 		csi = &cache_scrub_info[i];
5462 
5463 		if (!(*csi->csi_enable))
5464 			continue;
5465 
5466 		/*
5467 		 * force the following to be true:
5468 		 *	1 <= calls_a_sec <= hz
5469 		 */
5470 		if (csi->csi_freq > hz) {
5471 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too high "
5472 				"(%d); resetting to hz (%d)", csi->csi_name,
5473 				csi->csi_freq, hz);
5474 			csi->csi_freq = hz;
5475 		} else if (csi->csi_freq < 1) {
5476 			cmn_err(CE_NOTE, "%s scrub calls_a_sec set too low "
5477 				"(%d); resetting to 1", csi->csi_name,
5478 				csi->csi_freq);
5479 			csi->csi_freq = 1;
5480 		}
5481 
5482 		omni_hdlr.cyo_online = cpu_scrub_cyclic_setup;
5483 		omni_hdlr.cyo_offline = NULL;
5484 		omni_hdlr.cyo_arg = (void *)csi;
5485 
5486 		offline_hdlr.cyh_func = (cyc_func_t)do_scrub_offline;
5487 		offline_hdlr.cyh_arg = (void *)csi;
5488 		offline_hdlr.cyh_level = CY_LOW_LEVEL;
5489 
5490 		when.cyt_when = 0;	/* Start immediately */
5491 		when.cyt_interval = NANOSEC / csi->csi_freq;
5492 
5493 		csi->csi_omni_cyc_id = cyclic_add_omni(&omni_hdlr);
5494 		csi->csi_offline_cyc_id = cyclic_add(&offline_hdlr, &when);
5495 	}
5496 	register_cpu_setup_func(cpu_scrub_cpu_setup, NULL);
5497 	mutex_exit(&cpu_lock);
5498 }
5499 
5500 /*
5501  * Indicate that the specified cpu is idle.
5502  */
5503 void
5504 cpu_idle_ecache_scrub(struct cpu *cp)
5505 {
5506 	if (CPU_PRIVATE(cp) != NULL) {
5507 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5508 		csmp->chsm_ecache_busy = ECACHE_CPU_IDLE;
5509 	}
5510 }
5511 
5512 /*
5513  * Indicate that the specified cpu is busy.
5514  */
5515 void
5516 cpu_busy_ecache_scrub(struct cpu *cp)
5517 {
5518 	if (CPU_PRIVATE(cp) != NULL) {
5519 		ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5520 		csmp->chsm_ecache_busy = ECACHE_CPU_BUSY;
5521 	}
5522 }
5523 
5524 /*
5525  * Initialization for cache scrubbing for the specified cpu.
5526  */
5527 void
5528 cpu_init_ecache_scrub_dr(struct cpu *cp)
5529 {
5530 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5531 	int cpuid = cp->cpu_id;
5532 
5533 	/* initialize the number of lines in the caches */
5534 	csmp->chsm_ecache_nlines = cpunodes[cpuid].ecache_size /
5535 	    cpunodes[cpuid].ecache_linesize;
5536 	csmp->chsm_icache_nlines = CPU_PRIVATE_VAL(cp, chpr_icache_size) /
5537 	    CPU_PRIVATE_VAL(cp, chpr_icache_linesize);
5538 
5539 	/*
5540 	 * do_scrub() and do_scrub_offline() check both the global
5541 	 * ?cache_scrub_enable and this per-cpu enable variable.  All scrubbers
5542 	 * check this value before scrubbing.  Currently, we use it to
5543 	 * disable the E$ scrubber on multi-core cpus or while running at
5544 	 * slowed speed.  For now, just turn everything on and allow
5545 	 * cpu_init_private() to change it if necessary.
5546 	 */
5547 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_E] = 1;
5548 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_D] = 1;
5549 	csmp->chsm_enable[CACHE_SCRUBBER_INFO_I] = 1;
5550 
5551 	cpu_busy_ecache_scrub(cp);
5552 }
5553 
5554 /*
5555  * Un-initialization for cache scrubbing for the specified cpu.
5556  */
5557 static void
5558 cpu_uninit_ecache_scrub_dr(struct cpu *cp)
5559 {
5560 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(cp, chpr_scrub_misc);
5561 
5562 	/*
5563 	 * un-initialize bookkeeping for cache scrubbing
5564 	 */
5565 	bzero(csmp, sizeof (ch_scrub_misc_t));
5566 
5567 	cpu_idle_ecache_scrub(cp);
5568 }
5569 
5570 /*
5571  * Called periodically on each CPU to scrub the D$.
5572  */
5573 static void
5574 scrub_dcache(int how_many)
5575 {
5576 	int i;
5577 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5578 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D];
5579 
5580 	/*
5581 	 * scrub the desired number of lines
5582 	 */
5583 	for (i = 0; i < how_many; i++) {
5584 		/*
5585 		 * scrub a D$ line
5586 		 */
5587 		dcache_inval_line(index);
5588 
5589 		/*
5590 		 * calculate the next D$ line to scrub, assumes
5591 		 * that dcache_nlines is a power of 2
5592 		 */
5593 		index = (index + 1) & (dcache_nlines - 1);
5594 	}
5595 
5596 	/*
5597 	 * set the scrub index for the next visit
5598 	 */
5599 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_D] = index;
5600 }
5601 
5602 /*
5603  * Handler for D$ scrub inum softint. Call scrub_dcache until
5604  * we decrement the outstanding request count to zero.
5605  */
5606 /*ARGSUSED*/
5607 static uint_t
5608 scrub_dcache_line_intr(caddr_t arg1, caddr_t arg2)
5609 {
5610 	int i;
5611 	int how_many;
5612 	int outstanding;
5613 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5614 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_D];
5615 	struct scrub_info *csi = (struct scrub_info *)arg1;
5616 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5617 		dcache_scan_rate_idle : dcache_scan_rate_busy;
5618 
5619 	/*
5620 	 * The scan rates are expressed in units of tenths of a
5621 	 * percent.  A scan rate of 1000 (100%) means the whole
5622 	 * cache is scanned every second.
5623 	 */
5624 	how_many = (dcache_nlines * scan_rate) / (1000 * csi->csi_freq);
5625 
5626 	do {
5627 		outstanding = *countp;
5628 		for (i = 0; i < outstanding; i++) {
5629 			scrub_dcache(how_many);
5630 		}
5631 	} while (atomic_add_32_nv(countp, -outstanding));
5632 
5633 	return (DDI_INTR_CLAIMED);
5634 }
5635 
5636 /*
5637  * Called periodically on each CPU to scrub the I$. The I$ is scrubbed
5638  * by invalidating lines. Due to the characteristics of the ASI which
5639  * is used to invalidate an I$ line, the entire I$ must be invalidated
5640  * vs. an individual I$ line.
5641  */
5642 static void
5643 scrub_icache(int how_many)
5644 {
5645 	int i;
5646 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5647 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I];
5648 	int icache_nlines = csmp->chsm_icache_nlines;
5649 
5650 	/*
5651 	 * scrub the desired number of lines
5652 	 */
5653 	for (i = 0; i < how_many; i++) {
5654 		/*
5655 		 * since the entire I$ must be scrubbed at once,
5656 		 * wait until the index wraps to zero to invalidate
5657 		 * the entire I$
5658 		 */
5659 		if (index == 0) {
5660 			icache_inval_all();
5661 		}
5662 
5663 		/*
5664 		 * calculate the next I$ line to scrub, assumes
5665 		 * that chsm_icache_nlines is a power of 2
5666 		 */
5667 		index = (index + 1) & (icache_nlines - 1);
5668 	}
5669 
5670 	/*
5671 	 * set the scrub index for the next visit
5672 	 */
5673 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_I] = index;
5674 }
5675 
5676 /*
5677  * Handler for I$ scrub inum softint. Call scrub_icache until
5678  * we decrement the outstanding request count to zero.
5679  */
5680 /*ARGSUSED*/
5681 static uint_t
5682 scrub_icache_line_intr(caddr_t arg1, caddr_t arg2)
5683 {
5684 	int i;
5685 	int how_many;
5686 	int outstanding;
5687 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5688 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_I];
5689 	struct scrub_info *csi = (struct scrub_info *)arg1;
5690 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5691 	    icache_scan_rate_idle : icache_scan_rate_busy;
5692 	int icache_nlines = csmp->chsm_icache_nlines;
5693 
5694 	/*
5695 	 * The scan rates are expressed in units of tenths of a
5696 	 * percent.  A scan rate of 1000 (100%) means the whole
5697 	 * cache is scanned every second.
5698 	 */
5699 	how_many = (icache_nlines * scan_rate) / (1000 * csi->csi_freq);
5700 
5701 	do {
5702 		outstanding = *countp;
5703 		for (i = 0; i < outstanding; i++) {
5704 			scrub_icache(how_many);
5705 		}
5706 	} while (atomic_add_32_nv(countp, -outstanding));
5707 
5708 	return (DDI_INTR_CLAIMED);
5709 }
5710 
5711 /*
5712  * Called periodically on each CPU to scrub the E$.
5713  */
5714 static void
5715 scrub_ecache(int how_many)
5716 {
5717 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5718 	int i;
5719 	int cpuid = CPU->cpu_id;
5720 	int index = csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E];
5721 	int nlines = csmp->chsm_ecache_nlines;
5722 	int linesize = cpunodes[cpuid].ecache_linesize;
5723 	int ec_set_size = cpu_ecache_set_size(CPU);
5724 
5725 	/*
5726 	 * scrub the desired number of lines
5727 	 */
5728 	for (i = 0; i < how_many; i++) {
5729 		/*
5730 		 * scrub the E$ line
5731 		 */
5732 		ecache_flush_line(ecache_flushaddr + (index * linesize),
5733 		    ec_set_size);
5734 
5735 		/*
5736 		 * calculate the next E$ line to scrub based on twice
5737 		 * the number of E$ lines (to displace lines containing
5738 		 * flush area data), assumes that the number of lines
5739 		 * is a power of 2
5740 		 */
5741 		index = (index + 1) & ((nlines << 1) - 1);
5742 	}
5743 
5744 	/*
5745 	 * set the ecache scrub index for the next visit
5746 	 */
5747 	csmp->chsm_flush_index[CACHE_SCRUBBER_INFO_E] = index;
5748 }
5749 
5750 /*
5751  * Handler for E$ scrub inum softint. Call the E$ scrubber until
5752  * we decrement the outstanding request count to zero.
5753  *
5754  * Due to interactions with cpu_scrub_cpu_setup(), the outstanding count may
5755  * become negative after the atomic_add_32_nv().  This is not a problem, as
5756  * the next trip around the loop won't scrub anything, and the next add will
5757  * reset the count back to zero.
5758  */
5759 /*ARGSUSED*/
5760 static uint_t
5761 scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
5762 {
5763 	int i;
5764 	int how_many;
5765 	int outstanding;
5766 	ch_scrub_misc_t *csmp = CPU_PRIVATE_PTR(CPU, chpr_scrub_misc);
5767 	uint32_t *countp = &csmp->chsm_outstanding[CACHE_SCRUBBER_INFO_E];
5768 	struct scrub_info *csi = (struct scrub_info *)arg1;
5769 	int scan_rate = (csmp->chsm_ecache_busy == ECACHE_CPU_IDLE) ?
5770 		ecache_scan_rate_idle : ecache_scan_rate_busy;
5771 	int ecache_nlines = csmp->chsm_ecache_nlines;
5772 
5773 	/*
5774 	 * The scan rates are expressed in units of tenths of a
5775 	 * percent.  A scan rate of 1000 (100%) means the whole
5776 	 * cache is scanned every second.
5777 	 */
5778 	how_many = (ecache_nlines * scan_rate) / (1000 * csi->csi_freq);
5779 
5780 	do {
5781 		outstanding = *countp;
5782 		for (i = 0; i < outstanding; i++) {
5783 			scrub_ecache(how_many);
5784 		}
5785 	} while (atomic_add_32_nv(countp, -outstanding));
5786 
5787 	return (DDI_INTR_CLAIMED);
5788 }
5789 
5790 /*
5791  * Timeout function to reenable CE
5792  */
5793 static void
5794 cpu_delayed_check_ce_errors(void *arg)
5795 {
5796 	if (!taskq_dispatch(ch_check_ce_tq, cpu_check_ce_errors, arg,
5797 	    TQ_NOSLEEP)) {
5798 		(void) timeout(cpu_delayed_check_ce_errors, arg,
5799 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5800 	}
5801 }
5802 
5803 /*
5804  * CE Deferred Re-enable after trap.
5805  *
5806  * When the CPU gets a disrupting trap for any of the errors
5807  * controlled by the CEEN bit, CEEN is disabled in the trap handler
5808  * immediately. To eliminate the possibility of multiple CEs causing
5809  * recursive stack overflow in the trap handler, we cannot
5810  * reenable CEEN while still running in the trap handler. Instead,
5811  * after a CE is logged on a CPU, we schedule a timeout function,
5812  * cpu_check_ce_errors(), to trigger after cpu_ceen_delay_secs
5813  * seconds. This function will check whether any further CEs
5814  * have occurred on that CPU, and if none have, will reenable CEEN.
5815  *
5816  * If further CEs have occurred while CEEN is disabled, another
5817  * timeout will be scheduled. This is to ensure that the CPU can
5818  * make progress in the face of CE 'storms', and that it does not
5819  * spend all its time logging CE errors.
5820  */
5821 static void
5822 cpu_check_ce_errors(void *arg)
5823 {
5824 	int	cpuid = (int)(uintptr_t)arg;
5825 	cpu_t	*cp;
5826 
5827 	/*
5828 	 * We acquire cpu_lock.
5829 	 */
5830 	ASSERT(curthread->t_pil == 0);
5831 
5832 	/*
5833 	 * verify that the cpu is still around, DR
5834 	 * could have got there first ...
5835 	 */
5836 	mutex_enter(&cpu_lock);
5837 	cp = cpu_get(cpuid);
5838 	if (cp == NULL) {
5839 		mutex_exit(&cpu_lock);
5840 		return;
5841 	}
5842 	/*
5843 	 * make sure we don't migrate across CPUs
5844 	 * while checking our CE status.
5845 	 */
5846 	kpreempt_disable();
5847 
5848 	/*
5849 	 * If we are running on the CPU that got the
5850 	 * CE, we can do the checks directly.
5851 	 */
5852 	if (cp->cpu_id == CPU->cpu_id) {
5853 		mutex_exit(&cpu_lock);
5854 		cpu_check_ce(TIMEOUT_CEEN_CHECK, 0, 0, 0);
5855 		kpreempt_enable();
5856 		return;
5857 	}
5858 	kpreempt_enable();
5859 
5860 	/*
5861 	 * send an x-call to get the CPU that originally
5862 	 * got the CE to do the necessary checks. If we can't
5863 	 * send the x-call, reschedule the timeout, otherwise we
5864 	 * lose CEEN forever on that CPU.
5865 	 */
5866 	if (CPU_XCALL_READY(cp->cpu_id) && (!(cp->cpu_flags & CPU_QUIESCED))) {
5867 		xc_one(cp->cpu_id, (xcfunc_t *)cpu_check_ce,
5868 		    TIMEOUT_CEEN_CHECK, 0);
5869 		mutex_exit(&cpu_lock);
5870 	} else {
5871 		/*
5872 		 * When the CPU is not accepting xcalls, or
5873 		 * the processor is offlined, we don't want to
5874 		 * incur the extra overhead of trying to schedule the
5875 		 * CE timeout indefinitely. However, we don't want to lose
5876 		 * CE checking forever.
5877 		 *
5878 		 * Keep rescheduling the timeout, accepting the additional
5879 		 * overhead as the cost of correctness in the case where we get
5880 		 * a CE, disable CEEN, offline the CPU during the
5881 		 * the timeout interval, and then online it at some
5882 		 * point in the future. This is unlikely given the short
5883 		 * cpu_ceen_delay_secs.
5884 		 */
5885 		mutex_exit(&cpu_lock);
5886 		(void) timeout(cpu_delayed_check_ce_errors,
5887 		    (void *)(uintptr_t)cp->cpu_id,
5888 		    drv_usectohz((clock_t)cpu_ceen_delay_secs * MICROSEC));
5889 	}
5890 }
5891 
5892 /*
5893  * This routine will check whether CEs have occurred while
5894  * CEEN is disabled. Any CEs detected will be logged and, if
5895  * possible, scrubbed.
5896  *
5897  * The memscrubber will also use this routine to clear any errors
5898  * caused by its scrubbing with CEEN disabled.
5899  *
5900  * flag == SCRUBBER_CEEN_CHECK
5901  *		called from memscrubber, just check/scrub, no reset
5902  *		paddr 	physical addr. for start of scrub pages
5903  *		vaddr 	virtual addr. for scrub area
5904  *		psz	page size of area to be scrubbed
5905  *
5906  * flag == TIMEOUT_CEEN_CHECK
5907  *		timeout function has triggered, reset timeout or CEEN
5908  *
5909  * Note: We must not migrate cpus during this function.  This can be
5910  * achieved by one of:
5911  *    - invoking as target of an x-call in which case we're at XCALL_PIL
5912  *	The flag value must be first xcall argument.
5913  *    - disabling kernel preemption.  This should be done for very short
5914  *	periods so is not suitable for SCRUBBER_CEEN_CHECK where we might
5915  *	scrub an extended area with cpu_check_block.  The call for
5916  *	TIMEOUT_CEEN_CHECK uses this so cpu_check_ce must be kept
5917  *	brief for this case.
5918  *    - binding to a cpu, eg with thread_affinity_set().  This is used
5919  *	in the SCRUBBER_CEEN_CHECK case, but is not practical for
5920  *	the TIMEOUT_CEEN_CHECK because both need cpu_lock.
5921  */
5922 void
5923 cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t psz)
5924 {
5925 	ch_cpu_errors_t	cpu_error_regs;
5926 	uint64_t	ec_err_enable;
5927 	uint64_t	page_offset;
5928 
5929 	/* Read AFSR */
5930 	get_cpu_error_state(&cpu_error_regs);
5931 
5932 	/*
5933 	 * If no CEEN errors have occurred during the timeout
5934 	 * interval, it is safe to re-enable CEEN and exit.
5935 	 */
5936 	if ((cpu_error_regs.afsr & C_AFSR_CECC_ERRS) == 0) {
5937 		if (flag == TIMEOUT_CEEN_CHECK &&
5938 		    !((ec_err_enable = get_error_enable()) & EN_REG_CEEN))
5939 			set_error_enable(ec_err_enable | EN_REG_CEEN);
5940 		return;
5941 	}
5942 
5943 	/*
5944 	 * Ensure that CEEN was not reenabled (maybe by DR) before
5945 	 * we log/clear the error.
5946 	 */
5947 	if ((ec_err_enable = get_error_enable()) & EN_REG_CEEN)
5948 	    set_error_enable(ec_err_enable & ~EN_REG_CEEN);
5949 
5950 	/*
5951 	 * log/clear the CE. If CE_CEEN_DEFER is passed, the
5952 	 * timeout will be rescheduled when the error is logged.
5953 	 */
5954 	if (!(cpu_error_regs.afsr & cpu_ce_not_deferred))
5955 	    cpu_ce_detected(&cpu_error_regs,
5956 		CE_CEEN_DEFER | CE_CEEN_TIMEOUT);
5957 	else
5958 	    cpu_ce_detected(&cpu_error_regs, CE_CEEN_TIMEOUT);
5959 
5960 	/*
5961 	 * If the memory scrubber runs while CEEN is
5962 	 * disabled, (or if CEEN is disabled during the
5963 	 * scrub as a result of a CE being triggered by
5964 	 * it), the range being scrubbed will not be
5965 	 * completely cleaned. If there are multiple CEs
5966 	 * in the range at most two of these will be dealt
5967 	 * with, (one by the trap handler and one by the
5968 	 * timeout). It is also possible that none are dealt
5969 	 * with, (CEEN disabled and another CE occurs before
5970 	 * the timeout triggers). So to ensure that the
5971 	 * memory is actually scrubbed, we have to access each
5972 	 * memory location in the range and then check whether
5973 	 * that access causes a CE.
5974 	 */
5975 	if (flag == SCRUBBER_CEEN_CHECK && va) {
5976 		if ((cpu_error_regs.afar >= pa) &&
5977 		    (cpu_error_regs.afar < (pa + psz))) {
5978 			/*
5979 			 * Force a load from physical memory for each
5980 			 * 64-byte block, then check AFSR to determine
5981 			 * whether this access caused an error.
5982 			 *
5983 			 * This is a slow way to do a scrub, but as it will
5984 			 * only be invoked when the memory scrubber actually
5985 			 * triggered a CE, it should not happen too
5986 			 * frequently.
5987 			 *
5988 			 * cut down what we need to check as the scrubber
5989 			 * has verified up to AFAR, so get it's offset
5990 			 * into the page and start there.
5991 			 */
5992 			page_offset = (uint64_t)(cpu_error_regs.afar &
5993 			    (psz - 1));
5994 			va = (caddr_t)(va + (P2ALIGN(page_offset, 64)));
5995 			psz -= (uint_t)(P2ALIGN(page_offset, 64));
5996 			cpu_check_block((caddr_t)(P2ALIGN((uint64_t)va, 64)),
5997 			    psz);
5998 		}
5999 	}
6000 
6001 	/*
6002 	 * Reset error enable if this CE is not masked.
6003 	 */
6004 	if ((flag == TIMEOUT_CEEN_CHECK) &&
6005 	    (cpu_error_regs.afsr & cpu_ce_not_deferred))
6006 	    set_error_enable(ec_err_enable | EN_REG_CEEN);
6007 
6008 }
6009 
6010 /*
6011  * Attempt a cpu logout for an error that we did not trap for, such
6012  * as a CE noticed with CEEN off.  It is assumed that we are still running
6013  * on the cpu that took the error and that we cannot migrate.  Returns
6014  * 0 on success, otherwise nonzero.
6015  */
6016 static int
6017 cpu_ce_delayed_ec_logout(uint64_t afar)
6018 {
6019 	ch_cpu_logout_t *clop;
6020 
6021 	if (CPU_PRIVATE(CPU) == NULL)
6022 		return (0);
6023 
6024 	clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6025 	if (cas64(&clop->clo_data.chd_afar, LOGOUT_INVALID, afar) !=
6026 	    LOGOUT_INVALID)
6027 		return (0);
6028 
6029 	cpu_delayed_logout(afar, clop);
6030 	return (1);
6031 }
6032 
6033 /*
6034  * We got an error while CEEN was disabled. We
6035  * need to clean up after it and log whatever
6036  * information we have on the CE.
6037  */
6038 void
6039 cpu_ce_detected(ch_cpu_errors_t *cpu_error_regs, int flag)
6040 {
6041 	ch_async_flt_t 	ch_flt;
6042 	struct async_flt *aflt;
6043 	char 		pr_reason[MAX_REASON_STRING];
6044 
6045 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6046 	ch_flt.flt_trapped_ce = flag;
6047 	aflt = (struct async_flt *)&ch_flt;
6048 	aflt->flt_stat = cpu_error_regs->afsr & C_AFSR_MASK;
6049 	ch_flt.afsr_ext = cpu_error_regs->afsr_ext;
6050 	ch_flt.afsr_errs = (cpu_error_regs->afsr_ext & C_AFSR_EXT_ALL_ERRS) |
6051 	    (cpu_error_regs->afsr & C_AFSR_ALL_ERRS);
6052 	aflt->flt_addr = cpu_error_regs->afar;
6053 #if defined(SERRANO)
6054 	ch_flt.afar2 = cpu_error_regs->afar2;
6055 #endif	/* SERRANO */
6056 	aflt->flt_pc = NULL;
6057 	aflt->flt_priv = ((cpu_error_regs->afsr & C_AFSR_PRIV) != 0);
6058 	aflt->flt_tl = 0;
6059 	aflt->flt_panic = 0;
6060 	cpu_log_and_clear_ce(&ch_flt);
6061 
6062 	/*
6063 	 * check if we caused any errors during cleanup
6064 	 */
6065 	if (clear_errors(&ch_flt)) {
6066 		pr_reason[0] = '\0';
6067 		(void) cpu_queue_events(&ch_flt, pr_reason, ch_flt.afsr_errs,
6068 		    NULL);
6069 	}
6070 }
6071 
6072 /*
6073  * Log/clear CEEN-controlled disrupting errors
6074  */
6075 static void
6076 cpu_log_and_clear_ce(ch_async_flt_t *ch_flt)
6077 {
6078 	struct async_flt *aflt;
6079 	uint64_t afsr, afsr_errs;
6080 	ch_cpu_logout_t *clop;
6081 	char 		pr_reason[MAX_REASON_STRING];
6082 	on_trap_data_t	*otp = curthread->t_ontrap;
6083 
6084 	aflt = (struct async_flt *)ch_flt;
6085 	afsr = aflt->flt_stat;
6086 	afsr_errs = ch_flt->afsr_errs;
6087 	aflt->flt_id = gethrtime_waitfree();
6088 	aflt->flt_bus_id = getprocessorid();
6089 	aflt->flt_inst = CPU->cpu_id;
6090 	aflt->flt_prot = AFLT_PROT_NONE;
6091 	aflt->flt_class = CPU_FAULT;
6092 	aflt->flt_status = ECC_C_TRAP;
6093 
6094 	pr_reason[0] = '\0';
6095 	/*
6096 	 * Get the CPU log out info for Disrupting Trap.
6097 	 */
6098 	if (CPU_PRIVATE(CPU) == NULL) {
6099 		clop = NULL;
6100 		ch_flt->flt_diag_data.chd_afar = LOGOUT_INVALID;
6101 	} else {
6102 		clop = CPU_PRIVATE_PTR(CPU, chpr_cecc_logout);
6103 	}
6104 
6105 	if (clop && ch_flt->flt_trapped_ce & CE_CEEN_TIMEOUT) {
6106 		ch_cpu_errors_t cpu_error_regs;
6107 
6108 		get_cpu_error_state(&cpu_error_regs);
6109 		(void) cpu_ce_delayed_ec_logout(cpu_error_regs.afar);
6110 		clop->clo_data.chd_afsr = cpu_error_regs.afsr;
6111 		clop->clo_data.chd_afar = cpu_error_regs.afar;
6112 		clop->clo_data.chd_afsr_ext = cpu_error_regs.afsr_ext;
6113 		clop->clo_sdw_data.chd_afsr = cpu_error_regs.shadow_afsr;
6114 		clop->clo_sdw_data.chd_afar = cpu_error_regs.shadow_afar;
6115 		clop->clo_sdw_data.chd_afsr_ext =
6116 		    cpu_error_regs.shadow_afsr_ext;
6117 #if defined(SERRANO)
6118 		clop->clo_data.chd_afar2 = cpu_error_regs.afar2;
6119 #endif	/* SERRANO */
6120 		ch_flt->flt_data_incomplete = 1;
6121 
6122 		/*
6123 		 * The logging/clear code expects AFSR/AFAR to be cleared.
6124 		 * The trap handler does it for CEEN enabled errors
6125 		 * so we need to do it here.
6126 		 */
6127 		set_cpu_error_state(&cpu_error_regs);
6128 	}
6129 
6130 #if defined(JALAPENO) || defined(SERRANO)
6131 	/*
6132 	 * FRC: Can't scrub memory as we don't have AFAR for Jalapeno.
6133 	 * For Serrano, even thou we do have the AFAR, we still do the
6134 	 * scrub on the RCE side since that's where the error type can
6135 	 * be properly classified as intermittent, persistent, etc.
6136 	 *
6137 	 * CE/RCE:  If error is in memory and AFAR is valid, scrub the memory.
6138 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6139 	 * the flt_status bits.
6140 	 */
6141 	if ((afsr & (C_AFSR_CE|C_AFSR_RCE)) &&
6142 	    (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6143 	    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_RCE)))) {
6144 		cpu_ce_scrub_mem_err(aflt, B_TRUE);
6145 	}
6146 #else /* JALAPENO || SERRANO */
6147 	/*
6148 	 * CE/EMC:  If error is in memory and AFAR is valid, scrub the memory.
6149 	 * Must scrub memory before cpu_queue_events, as scrubbing memory sets
6150 	 * the flt_status bits.
6151 	 */
6152 	if (afsr & (C_AFSR_CE|C_AFSR_EMC)) {
6153 		if (cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_CE)) ||
6154 		    cpu_flt_in_memory(ch_flt, (afsr & C_AFSR_EMC))) {
6155 			cpu_ce_scrub_mem_err(aflt, B_TRUE);
6156 		}
6157 	}
6158 
6159 #endif /* JALAPENO || SERRANO */
6160 
6161 	/*
6162 	 * Update flt_prot if this error occurred under on_trap protection.
6163 	 */
6164 	if (otp != NULL && (otp->ot_prot & OT_DATA_EC))
6165 		aflt->flt_prot = AFLT_PROT_EC;
6166 
6167 	/*
6168 	 * Queue events on the async event queue, one event per error bit.
6169 	 */
6170 	if (cpu_queue_events(ch_flt, pr_reason, afsr_errs, clop) == 0 ||
6171 	    (afsr_errs & (C_AFSR_CECC_ERRS | C_AFSR_EXT_CECC_ERRS)) == 0) {
6172 		ch_flt->flt_type = CPU_INV_AFSR;
6173 		cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_INVALID_AFSR,
6174 		    (void *)ch_flt, sizeof (ch_async_flt_t), ue_queue,
6175 		    aflt->flt_panic);
6176 	}
6177 
6178 	/*
6179 	 * Zero out + invalidate CPU logout.
6180 	 */
6181 	if (clop) {
6182 		bzero(clop, sizeof (ch_cpu_logout_t));
6183 		clop->clo_data.chd_afar = LOGOUT_INVALID;
6184 	}
6185 
6186 	/*
6187 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6188 	 * was disabled, we need to flush either the entire
6189 	 * E$ or an E$ line.
6190 	 */
6191 #if defined(JALAPENO) || defined(SERRANO)
6192 	if (afsr & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_CPU | C_AFSR_WDC))
6193 #else	/* JALAPENO || SERRANO */
6194 	if (afsr_errs & (C_AFSR_EDC | C_AFSR_CPC | C_AFSR_WDC | C_AFSR_L3_EDC |
6195 	    C_AFSR_L3_CPC | C_AFSR_L3_WDC))
6196 #endif	/* JALAPENO || SERRANO */
6197 		cpu_error_ecache_flush(ch_flt);
6198 
6199 }
6200 
6201 /*
6202  * depending on the error type, we determine whether we
6203  * need to flush the entire ecache or just a line.
6204  */
6205 static int
6206 cpu_error_ecache_flush_required(ch_async_flt_t *ch_flt)
6207 {
6208 	struct async_flt *aflt;
6209 	uint64_t	afsr;
6210 	uint64_t	afsr_errs = ch_flt->afsr_errs;
6211 
6212 	aflt = (struct async_flt *)ch_flt;
6213 	afsr = aflt->flt_stat;
6214 
6215 	/*
6216 	 * If we got multiple errors, no point in trying
6217 	 * the individual cases, just flush the whole cache
6218 	 */
6219 	if (afsr & C_AFSR_ME) {
6220 		return (ECACHE_FLUSH_ALL);
6221 	}
6222 
6223 	/*
6224 	 * If either a CPC, WDC or EDC error has occurred while CEEN
6225 	 * was disabled, we need to flush entire E$. We can't just
6226 	 * flush the cache line affected as the ME bit
6227 	 * is not set when multiple correctable errors of the same
6228 	 * type occur, so we might have multiple CPC or EDC errors,
6229 	 * with only the first recorded.
6230 	 */
6231 #if defined(JALAPENO) || defined(SERRANO)
6232 	if (afsr & (C_AFSR_CPC | C_AFSR_CPU | C_AFSR_EDC | C_AFSR_WDC)) {
6233 #else	/* JALAPENO || SERRANO */
6234 	if (afsr_errs & (C_AFSR_CPC | C_AFSR_EDC | C_AFSR_WDC | C_AFSR_L3_CPC |
6235 	    C_AFSR_L3_EDC | C_AFSR_L3_WDC)) {
6236 #endif	/* JALAPENO || SERRANO */
6237 		return (ECACHE_FLUSH_ALL);
6238 	}
6239 
6240 #if defined(JALAPENO) || defined(SERRANO)
6241 	/*
6242 	 * If only UE or RUE is set, flush the Ecache line, otherwise
6243 	 * flush the entire Ecache.
6244 	 */
6245 	if (afsr & (C_AFSR_UE|C_AFSR_RUE)) {
6246 		if ((afsr & C_AFSR_ALL_ERRS) == C_AFSR_UE ||
6247 		    (afsr & C_AFSR_ALL_ERRS) == C_AFSR_RUE) {
6248 			return (ECACHE_FLUSH_LINE);
6249 		} else {
6250 			return (ECACHE_FLUSH_ALL);
6251 		}
6252 	}
6253 #else /* JALAPENO || SERRANO */
6254 	/*
6255 	 * If UE only is set, flush the Ecache line, otherwise
6256 	 * flush the entire Ecache.
6257 	 */
6258 	if (afsr_errs & C_AFSR_UE) {
6259 		if ((afsr_errs & (C_AFSR_ALL_ERRS | C_AFSR_EXT_ALL_ERRS)) ==
6260 		    C_AFSR_UE) {
6261 			return (ECACHE_FLUSH_LINE);
6262 		} else {
6263 			return (ECACHE_FLUSH_ALL);
6264 		}
6265 	}
6266 #endif /* JALAPENO || SERRANO */
6267 
6268 	/*
6269 	 * EDU: If EDU only is set, flush the ecache line, otherwise
6270 	 * flush the entire Ecache.
6271 	 */
6272 	if (afsr_errs & (C_AFSR_EDU | C_AFSR_L3_EDU)) {
6273 		if (((afsr_errs & ~C_AFSR_EDU) == 0) ||
6274 		    ((afsr_errs & ~C_AFSR_L3_EDU) == 0)) {
6275 			return (ECACHE_FLUSH_LINE);
6276 		} else {
6277 			return (ECACHE_FLUSH_ALL);
6278 		}
6279 	}
6280 
6281 	/*
6282 	 * BERR: If BERR only is set, flush the Ecache line, otherwise
6283 	 * flush the entire Ecache.
6284 	 */
6285 	if (afsr_errs & C_AFSR_BERR) {
6286 		if ((afsr_errs & ~C_AFSR_BERR) == 0) {
6287 			return (ECACHE_FLUSH_LINE);
6288 		} else {
6289 			return (ECACHE_FLUSH_ALL);
6290 		}
6291 	}
6292 
6293 	return (0);
6294 }
6295 
6296 void
6297 cpu_error_ecache_flush(ch_async_flt_t *ch_flt)
6298 {
6299 	int	ecache_flush_flag =
6300 	    cpu_error_ecache_flush_required(ch_flt);
6301 
6302 	/*
6303 	 * Flush Ecache line or entire Ecache based on above checks.
6304 	 */
6305 	if (ecache_flush_flag == ECACHE_FLUSH_ALL)
6306 		cpu_flush_ecache();
6307 	else if (ecache_flush_flag == ECACHE_FLUSH_LINE) {
6308 		cpu_flush_ecache_line(ch_flt);
6309 	}
6310 
6311 }
6312 
6313 /*
6314  * Extract the PA portion from the E$ tag.
6315  */
6316 uint64_t
6317 cpu_ectag_to_pa(int setsize, uint64_t tag)
6318 {
6319 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6320 		return (JG_ECTAG_TO_PA(setsize, tag));
6321 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6322 		return (PN_L3TAG_TO_PA(tag));
6323 	else
6324 		return (CH_ECTAG_TO_PA(setsize, tag));
6325 }
6326 
6327 /*
6328  * Convert the E$ tag PA into an E$ subblock index.
6329  */
6330 static int
6331 cpu_ectag_pa_to_subblk(int cachesize, uint64_t subaddr)
6332 {
6333 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6334 		return (JG_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6335 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6336 		/* Panther has only one subblock per line */
6337 		return (0);
6338 	else
6339 		return (CH_ECTAG_PA_TO_SUBBLK(cachesize, subaddr));
6340 }
6341 
6342 /*
6343  * All subblocks in an E$ line must be invalid for
6344  * the line to be invalid.
6345  */
6346 int
6347 cpu_ectag_line_invalid(int cachesize, uint64_t tag)
6348 {
6349 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6350 		return (JG_ECTAG_LINE_INVALID(cachesize, tag));
6351 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6352 		return (PN_L3_LINE_INVALID(tag));
6353 	else
6354 		return (CH_ECTAG_LINE_INVALID(cachesize, tag));
6355 }
6356 
6357 /*
6358  * Extract state bits for a subblock given the tag.  Note that for Panther
6359  * this works on both l2 and l3 tags.
6360  */
6361 static int
6362 cpu_ectag_pa_to_subblk_state(int cachesize, uint64_t subaddr, uint64_t tag)
6363 {
6364 	if (IS_JAGUAR(cpunodes[CPU->cpu_id].implementation))
6365 		return (JG_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6366 	else if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation))
6367 		return (tag & CH_ECSTATE_MASK);
6368 	else
6369 		return (CH_ECTAG_PA_TO_SUBBLK_STATE(cachesize, subaddr, tag));
6370 }
6371 
6372 /*
6373  * Cpu specific initialization.
6374  */
6375 void
6376 cpu_mp_init(void)
6377 {
6378 #ifdef	CHEETAHPLUS_ERRATUM_25
6379 	if (cheetah_sendmondo_recover) {
6380 		cheetah_nudge_init();
6381 	}
6382 #endif
6383 }
6384 
6385 void
6386 cpu_ereport_post(struct async_flt *aflt)
6387 {
6388 	char *cpu_type, buf[FM_MAX_CLASS];
6389 	char sbuf[21]; /* sizeof (UINT64_MAX) + '\0' */
6390 	nv_alloc_t *nva = NULL;
6391 	nvlist_t *ereport, *detector, *resource;
6392 	errorq_elem_t *eqep;
6393 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6394 	char unum[UNUM_NAMLEN];
6395 	int len = 0;
6396 	uint8_t msg_type, mask;
6397 	plat_ecc_ch_async_flt_t	plat_ecc_ch_flt;
6398 
6399 	if (aflt->flt_panic || panicstr) {
6400 		eqep = errorq_reserve(ereport_errorq);
6401 		if (eqep == NULL)
6402 			return;
6403 		ereport = errorq_elem_nvl(ereport_errorq, eqep);
6404 		nva = errorq_elem_nva(ereport_errorq, eqep);
6405 	} else {
6406 		ereport = fm_nvlist_create(nva);
6407 	}
6408 
6409 	/*
6410 	 * Create the scheme "cpu" FMRI.
6411 	 */
6412 	detector = fm_nvlist_create(nva);
6413 	resource = fm_nvlist_create(nva);
6414 	switch (cpunodes[aflt->flt_inst].implementation) {
6415 	case CHEETAH_IMPL:
6416 		cpu_type = FM_EREPORT_CPU_USIII;
6417 		break;
6418 	case CHEETAH_PLUS_IMPL:
6419 		cpu_type = FM_EREPORT_CPU_USIIIplus;
6420 		break;
6421 	case JALAPENO_IMPL:
6422 		cpu_type = FM_EREPORT_CPU_USIIIi;
6423 		break;
6424 	case SERRANO_IMPL:
6425 		cpu_type = FM_EREPORT_CPU_USIIIiplus;
6426 		break;
6427 	case JAGUAR_IMPL:
6428 		cpu_type = FM_EREPORT_CPU_USIV;
6429 		break;
6430 	case PANTHER_IMPL:
6431 		cpu_type = FM_EREPORT_CPU_USIVplus;
6432 		break;
6433 	default:
6434 		cpu_type = FM_EREPORT_CPU_UNSUPPORTED;
6435 		break;
6436 	}
6437 	mask = cpunodes[aflt->flt_inst].version;
6438 	(void) snprintf(sbuf, sizeof (sbuf), "%llX",
6439 	    (u_longlong_t)cpunodes[aflt->flt_inst].device_id);
6440 	(void) fm_fmri_cpu_set(detector, FM_CPU_SCHEME_VERSION, NULL,
6441 	    aflt->flt_inst, &mask, (const char *)sbuf);
6442 
6443 	/*
6444 	 * Encode all the common data into the ereport.
6445 	 */
6446 	(void) snprintf(buf, FM_MAX_CLASS, "%s.%s.%s",
6447 		FM_ERROR_CPU, cpu_type, aflt->flt_erpt_class);
6448 
6449 	fm_ereport_set(ereport, FM_EREPORT_VERSION, buf,
6450 	    fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst, FM_ENA_FMT1),
6451 	    detector, NULL);
6452 
6453 	/*
6454 	 * Encode the error specific data that was saved in
6455 	 * the async_flt structure into the ereport.
6456 	 */
6457 	cpu_payload_add_aflt(aflt, ereport, resource,
6458 	    &plat_ecc_ch_flt.ecaf_afar_status,
6459 	    &plat_ecc_ch_flt.ecaf_synd_status);
6460 
6461 	if (aflt->flt_panic || panicstr) {
6462 		errorq_commit(ereport_errorq, eqep, ERRORQ_SYNC);
6463 	} else {
6464 		(void) fm_ereport_post(ereport, EVCH_TRYHARD);
6465 		fm_nvlist_destroy(ereport, FM_NVA_FREE);
6466 		fm_nvlist_destroy(detector, FM_NVA_FREE);
6467 		fm_nvlist_destroy(resource, FM_NVA_FREE);
6468 	}
6469 	/*
6470 	 * Send the enhanced error information (plat_ecc_error2_data_t)
6471 	 * to the SC olny if it can process it.
6472 	 */
6473 
6474 	if (&plat_ecc_capability_sc_get &&
6475 	    plat_ecc_capability_sc_get(PLAT_ECC_ERROR2_MESSAGE)) {
6476 		msg_type = cpu_flt_bit_to_plat_error(aflt);
6477 		if (msg_type != PLAT_ECC_ERROR2_NONE) {
6478 			/*
6479 			 * If afar status is not invalid do a unum lookup.
6480 			 */
6481 			if (plat_ecc_ch_flt.ecaf_afar_status !=
6482 			    AFLT_STAT_INVALID) {
6483 				(void) cpu_get_mem_unum_aflt(
6484 				    plat_ecc_ch_flt.ecaf_synd_status, aflt,
6485 				    unum, UNUM_NAMLEN, &len);
6486 			} else {
6487 				unum[0] = '\0';
6488 			}
6489 			plat_ecc_ch_flt.ecaf_sdw_afar = ch_flt->flt_sdw_afar;
6490 			plat_ecc_ch_flt.ecaf_sdw_afsr = ch_flt->flt_sdw_afsr;
6491 			plat_ecc_ch_flt.ecaf_afsr_ext = ch_flt->afsr_ext;
6492 			plat_ecc_ch_flt.ecaf_sdw_afsr_ext =
6493 			    ch_flt->flt_sdw_afsr_ext;
6494 
6495 			if (&plat_log_fruid_error2)
6496 				plat_log_fruid_error2(msg_type, unum, aflt,
6497 				    &plat_ecc_ch_flt);
6498 		}
6499 	}
6500 }
6501 
6502 void
6503 cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
6504 {
6505 	int status;
6506 	ddi_fm_error_t de;
6507 
6508 	bzero(&de, sizeof (ddi_fm_error_t));
6509 
6510 	de.fme_version = DDI_FME_VERSION;
6511 	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
6512 	    FM_ENA_FMT1);
6513 	de.fme_flag = expected;
6514 	de.fme_bus_specific = (void *)aflt->flt_addr;
6515 	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);
6516 	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
6517 		aflt->flt_panic = 1;
6518 }
6519 
6520 void
6521 cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
6522     errorq_t *eqp, uint_t flag)
6523 {
6524 	struct async_flt *aflt = (struct async_flt *)payload;
6525 
6526 	aflt->flt_erpt_class = error_class;
6527 	errorq_dispatch(eqp, payload, payload_sz, flag);
6528 }
6529 
6530 /*
6531  * This routine may be called by the IO module, but does not do
6532  * anything in this cpu module. The SERD algorithm is handled by
6533  * cpumem-diagnosis engine instead.
6534  */
6535 /*ARGSUSED*/
6536 void
6537 cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
6538 {}
6539 
6540 void
6541 adjust_hw_copy_limits(int ecache_size)
6542 {
6543 	/*
6544 	 * Set hw copy limits.
6545 	 *
6546 	 * /etc/system will be parsed later and can override one or more
6547 	 * of these settings.
6548 	 *
6549 	 * At this time, ecache size seems only mildly relevant.
6550 	 * We seem to run into issues with the d-cache and stalls
6551 	 * we see on misses.
6552 	 *
6553 	 * Cycle measurement indicates that 2 byte aligned copies fare
6554 	 * little better than doing things with VIS at around 512 bytes.
6555 	 * 4 byte aligned shows promise until around 1024 bytes. 8 Byte
6556 	 * aligned is faster whenever the source and destination data
6557 	 * in cache and the total size is less than 2 Kbytes.  The 2K
6558 	 * limit seems to be driven by the 2K write cache.
6559 	 * When more than 2K of copies are done in non-VIS mode, stores
6560 	 * backup in the write cache.  In VIS mode, the write cache is
6561 	 * bypassed, allowing faster cache-line writes aligned on cache
6562 	 * boundaries.
6563 	 *
6564 	 * In addition, in non-VIS mode, there is no prefetching, so
6565 	 * for larger copies, the advantage of prefetching to avoid even
6566 	 * occasional cache misses is enough to justify using the VIS code.
6567 	 *
6568 	 * During testing, it was discovered that netbench ran 3% slower
6569 	 * when hw_copy_limit_8 was 2K or larger.  Apparently for server
6570 	 * applications, data is only used once (copied to the output
6571 	 * buffer, then copied by the network device off the system).  Using
6572 	 * the VIS copy saves more L2 cache state.  Network copies are
6573 	 * around 1.3K to 1.5K in size for historical reasons.
6574 	 *
6575 	 * Therefore, a limit of 1K bytes will be used for the 8 byte
6576 	 * aligned copy even for large caches and 8 MB ecache.  The
6577 	 * infrastructure to allow different limits for different sized
6578 	 * caches is kept to allow further tuning in later releases.
6579 	 */
6580 
6581 	if (min_ecache_size == 0 && use_hw_bcopy) {
6582 		/*
6583 		 * First time through - should be before /etc/system
6584 		 * is read.
6585 		 * Could skip the checks for zero but this lets us
6586 		 * preserve any debugger rewrites.
6587 		 */
6588 		if (hw_copy_limit_1 == 0) {
6589 			hw_copy_limit_1 = VIS_COPY_THRESHOLD;
6590 			priv_hcl_1 = hw_copy_limit_1;
6591 		}
6592 		if (hw_copy_limit_2 == 0) {
6593 			hw_copy_limit_2 = 2 * VIS_COPY_THRESHOLD;
6594 			priv_hcl_2 = hw_copy_limit_2;
6595 		}
6596 		if (hw_copy_limit_4 == 0) {
6597 			hw_copy_limit_4 = 4 * VIS_COPY_THRESHOLD;
6598 			priv_hcl_4 = hw_copy_limit_4;
6599 		}
6600 		if (hw_copy_limit_8 == 0) {
6601 			hw_copy_limit_8 = 4 * VIS_COPY_THRESHOLD;
6602 			priv_hcl_8 = hw_copy_limit_8;
6603 		}
6604 		min_ecache_size = ecache_size;
6605 	} else {
6606 		/*
6607 		 * MP initialization. Called *after* /etc/system has
6608 		 * been parsed. One CPU has already been initialized.
6609 		 * Need to cater for /etc/system having scragged one
6610 		 * of our values.
6611 		 */
6612 		if (ecache_size == min_ecache_size) {
6613 			/*
6614 			 * Same size ecache. We do nothing unless we
6615 			 * have a pessimistic ecache setting. In that
6616 			 * case we become more optimistic (if the cache is
6617 			 * large enough).
6618 			 */
6619 			if (hw_copy_limit_8 == 4 * VIS_COPY_THRESHOLD) {
6620 				/*
6621 				 * Need to adjust hw_copy_limit* from our
6622 				 * pessimistic uniprocessor value to a more
6623 				 * optimistic UP value *iff* it hasn't been
6624 				 * reset.
6625 				 */
6626 				if ((ecache_size > 1048576) &&
6627 				    (priv_hcl_8 == hw_copy_limit_8)) {
6628 					if (ecache_size <= 2097152)
6629 						hw_copy_limit_8 = 4 *
6630 						    VIS_COPY_THRESHOLD;
6631 					else if (ecache_size <= 4194304)
6632 						hw_copy_limit_8 = 4 *
6633 						    VIS_COPY_THRESHOLD;
6634 					else
6635 						hw_copy_limit_8 = 4 *
6636 						    VIS_COPY_THRESHOLD;
6637 					priv_hcl_8 = hw_copy_limit_8;
6638 				}
6639 			}
6640 		} else if (ecache_size < min_ecache_size) {
6641 			/*
6642 			 * A different ecache size. Can this even happen?
6643 			 */
6644 			if (priv_hcl_8 == hw_copy_limit_8) {
6645 				/*
6646 				 * The previous value that we set
6647 				 * is unchanged (i.e., it hasn't been
6648 				 * scragged by /etc/system). Rewrite it.
6649 				 */
6650 				if (ecache_size <= 1048576)
6651 					hw_copy_limit_8 = 8 *
6652 					    VIS_COPY_THRESHOLD;
6653 				else if (ecache_size <= 2097152)
6654 					hw_copy_limit_8 = 8 *
6655 					    VIS_COPY_THRESHOLD;
6656 				else if (ecache_size <= 4194304)
6657 					hw_copy_limit_8 = 8 *
6658 					    VIS_COPY_THRESHOLD;
6659 				else
6660 					hw_copy_limit_8 = 10 *
6661 					    VIS_COPY_THRESHOLD;
6662 				priv_hcl_8 = hw_copy_limit_8;
6663 				min_ecache_size = ecache_size;
6664 			}
6665 		}
6666 	}
6667 }
6668 
6669 /*
6670  * Called from illegal instruction trap handler to see if we can attribute
6671  * the trap to a fpras check.
6672  */
6673 int
6674 fpras_chktrap(struct regs *rp)
6675 {
6676 	int op;
6677 	struct fpras_chkfngrp *cgp;
6678 	uintptr_t tpc = (uintptr_t)rp->r_pc;
6679 
6680 	if (fpras_chkfngrps == NULL)
6681 		return (0);
6682 
6683 	cgp = &fpras_chkfngrps[CPU->cpu_id];
6684 	for (op = 0; op < FPRAS_NCOPYOPS; ++op) {
6685 		if (tpc >= (uintptr_t)&cgp->fpras_fn[op].fpras_blk0 &&
6686 		    tpc < (uintptr_t)&cgp->fpras_fn[op].fpras_chkresult)
6687 			break;
6688 	}
6689 	if (op == FPRAS_NCOPYOPS)
6690 		return (0);
6691 
6692 	/*
6693 	 * This is an fpRAS failure caught through an illegal
6694 	 * instruction - trampoline.
6695 	 */
6696 	rp->r_pc = (uintptr_t)&cgp->fpras_fn[op].fpras_trampoline;
6697 	rp->r_npc = rp->r_pc + 4;
6698 	return (1);
6699 }
6700 
6701 /*
6702  * fpras_failure is called when a fpras check detects a bad calculation
6703  * result or an illegal instruction trap is attributed to an fpras
6704  * check.  In all cases we are still bound to CPU.
6705  */
6706 int
6707 fpras_failure(int op, int how)
6708 {
6709 	int use_hw_bcopy_orig, use_hw_bzero_orig;
6710 	uint_t hcl1_orig, hcl2_orig, hcl4_orig, hcl8_orig;
6711 	ch_async_flt_t ch_flt;
6712 	struct async_flt *aflt = (struct async_flt *)&ch_flt;
6713 	struct fpras_chkfn *sfp, *cfp;
6714 	uint32_t *sip, *cip;
6715 	int i;
6716 
6717 	/*
6718 	 * We're running on a sick CPU.  Avoid further FPU use at least for
6719 	 * the time in which we dispatch an ereport and (if applicable) panic.
6720 	 */
6721 	use_hw_bcopy_orig = use_hw_bcopy;
6722 	use_hw_bzero_orig = use_hw_bzero;
6723 	hcl1_orig = hw_copy_limit_1;
6724 	hcl2_orig = hw_copy_limit_2;
6725 	hcl4_orig = hw_copy_limit_4;
6726 	hcl8_orig = hw_copy_limit_8;
6727 	use_hw_bcopy = use_hw_bzero = 0;
6728 	hw_copy_limit_1 = hw_copy_limit_2 = hw_copy_limit_4 =
6729 	    hw_copy_limit_8 = 0;
6730 
6731 	bzero(&ch_flt, sizeof (ch_async_flt_t));
6732 	aflt->flt_id = gethrtime_waitfree();
6733 	aflt->flt_class = CPU_FAULT;
6734 	aflt->flt_inst = CPU->cpu_id;
6735 	aflt->flt_status = (how << 8) | op;
6736 	aflt->flt_payload = FM_EREPORT_PAYLOAD_FPU_HWCOPY;
6737 	ch_flt.flt_type = CPU_FPUERR;
6738 
6739 	/*
6740 	 * We must panic if the copy operation had no lofault protection -
6741 	 * ie, don't panic for copyin, copyout, kcopy and bcopy called
6742 	 * under on_fault and do panic for unprotected bcopy and hwblkpagecopy.
6743 	 */
6744 	aflt->flt_panic = (curthread->t_lofault == NULL);
6745 
6746 	/*
6747 	 * XOR the source instruction block with the copied instruction
6748 	 * block - this will show us which bit(s) are corrupted.
6749 	 */
6750 	sfp = (struct fpras_chkfn *)fpras_chkfn_type1;
6751 	cfp = &fpras_chkfngrps[CPU->cpu_id].fpras_fn[op];
6752 	if (op == FPRAS_BCOPY || op == FPRAS_COPYOUT) {
6753 		sip = &sfp->fpras_blk0[0];
6754 		cip = &cfp->fpras_blk0[0];
6755 	} else {
6756 		sip = &sfp->fpras_blk1[0];
6757 		cip = &cfp->fpras_blk1[0];
6758 	}
6759 	for (i = 0; i < 16; ++i, ++sip, ++cip)
6760 		ch_flt.flt_fpdata[i] = *sip ^ *cip;
6761 
6762 	cpu_errorq_dispatch(FM_EREPORT_CPU_USIII_FPU_HWCOPY, (void *)&ch_flt,
6763 	    sizeof (ch_async_flt_t), ue_queue, aflt->flt_panic);
6764 
6765 	if (aflt->flt_panic)
6766 		fm_panic("FPU failure on CPU %d", CPU->cpu_id);
6767 
6768 	/*
6769 	 * We get here for copyin/copyout and kcopy or bcopy where the
6770 	 * caller has used on_fault.  We will flag the error so that
6771 	 * the process may be killed  The trap_async_hwerr mechanism will
6772 	 * take appropriate further action (such as a reboot, contract
6773 	 * notification etc).  Since we may be continuing we will
6774 	 * restore the global hardware copy acceleration switches.
6775 	 *
6776 	 * When we return from this function to the copy function we want to
6777 	 * avoid potentially bad data being used, ie we want the affected
6778 	 * copy function to return an error.  The caller should therefore
6779 	 * invoke its lofault handler (which always exists for these functions)
6780 	 * which will return the appropriate error.
6781 	 */
6782 	ttolwp(curthread)->lwp_pcb.pcb_flags |= ASYNC_HWERR;
6783 	aston(curthread);
6784 
6785 	use_hw_bcopy = use_hw_bcopy_orig;
6786 	use_hw_bzero = use_hw_bzero_orig;
6787 	hw_copy_limit_1 = hcl1_orig;
6788 	hw_copy_limit_2 = hcl2_orig;
6789 	hw_copy_limit_4 = hcl4_orig;
6790 	hw_copy_limit_8 = hcl8_orig;
6791 
6792 	return (1);
6793 }
6794 
6795 #define	VIS_BLOCKSIZE		64
6796 
6797 int
6798 dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
6799 {
6800 	int ret, watched;
6801 
6802 	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6803 	ret = dtrace_blksuword32(addr, data, 0);
6804 	if (watched)
6805 		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
6806 
6807 	return (ret);
6808 }
6809 
6810 /*
6811  * Called when a cpu enters the CPU_FAULTED state (by the cpu placing the
6812  * faulted cpu into that state).  Cross-trap to the faulted cpu to clear
6813  * CEEN from the EER to disable traps for further disrupting error types
6814  * on that cpu.  We could cross-call instead, but that has a larger
6815  * instruction and data footprint than cross-trapping, and the cpu is known
6816  * to be faulted.
6817  */
6818 
6819 void
6820 cpu_faulted_enter(struct cpu *cp)
6821 {
6822 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_CLRBITS);
6823 }
6824 
6825 /*
6826  * Called when a cpu leaves the CPU_FAULTED state to return to one of
6827  * offline, spare, or online (by the cpu requesting this state change).
6828  * First we cross-call to clear the AFSR (and AFSR_EXT on Panther) of
6829  * disrupting error bits that have accumulated without trapping, then
6830  * we cross-trap to re-enable CEEN controlled traps.
6831  */
6832 void
6833 cpu_faulted_exit(struct cpu *cp)
6834 {
6835 	ch_cpu_errors_t cpu_error_regs;
6836 
6837 	cpu_error_regs.afsr = C_AFSR_CECC_ERRS;
6838 	if (IS_PANTHER(cpunodes[cp->cpu_id].implementation))
6839 		cpu_error_regs.afsr_ext &= C_AFSR_EXT_CECC_ERRS;
6840 	xc_one(cp->cpu_id, (xcfunc_t *)set_cpu_error_state,
6841 	    (uint64_t)&cpu_error_regs, 0);
6842 
6843 	xt_one(cp->cpu_id, set_error_enable_tl1, EN_REG_CEEN, EER_SET_SETBITS);
6844 }
6845 
6846 /*
6847  * Return 1 if the errors in ch_flt's AFSR are secondary errors caused by
6848  * the errors in the original AFSR, 0 otherwise.
6849  *
6850  * For all procs if the initial error was a BERR or TO, then it is possible
6851  * that we may have caused a secondary BERR or TO in the process of logging the
6852  * inital error via cpu_run_bus_error_handlers().  If this is the case then
6853  * if the request was protected then a panic is still not necessary, if not
6854  * protected then aft_panic is already set - so either way there's no need
6855  * to set aft_panic for the secondary error.
6856  *
6857  * For Cheetah and Jalapeno, if the original error was a UE which occurred on
6858  * a store merge, then the error handling code will call cpu_deferred_error().
6859  * When clear_errors() is called, it will determine that secondary errors have
6860  * occurred - in particular, the store merge also caused a EDU and WDU that
6861  * weren't discovered until this point.
6862  *
6863  * We do three checks to verify that we are in this case.  If we pass all three
6864  * checks, we return 1 to indicate that we should not panic.  If any unexpected
6865  * errors occur, we return 0.
6866  *
6867  * For Cheetah+ and derivative procs, the store merge causes a DUE, which is
6868  * handled in cpu_disrupting_errors().  Since this function is not even called
6869  * in the case we are interested in, we just return 0 for these processors.
6870  */
6871 /*ARGSUSED*/
6872 static int
6873 cpu_check_secondary_errors(ch_async_flt_t *ch_flt, uint64_t t_afsr_errs,
6874     uint64_t t_afar)
6875 {
6876 #if defined(CHEETAH_PLUS)
6877 #else	/* CHEETAH_PLUS */
6878 	struct async_flt *aflt = (struct async_flt *)ch_flt;
6879 #endif	/* CHEETAH_PLUS */
6880 
6881 	/*
6882 	 * Was the original error a BERR or TO and only a BERR or TO
6883 	 * (multiple errors are also OK)
6884 	 */
6885 	if ((t_afsr_errs & ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0) {
6886 		/*
6887 		 * Is the new error a BERR or TO and only a BERR or TO
6888 		 * (multiple errors are also OK)
6889 		 */
6890 		if ((ch_flt->afsr_errs &
6891 		    ~(C_AFSR_BERR | C_AFSR_TO | C_AFSR_ME)) == 0)
6892 			return (1);
6893 	}
6894 
6895 #if defined(CHEETAH_PLUS)
6896 	return (0);
6897 #else	/* CHEETAH_PLUS */
6898 	/*
6899 	 * Now look for secondary effects of a UE on cheetah/jalapeno
6900 	 *
6901 	 * Check the original error was a UE, and only a UE.  Note that
6902 	 * the ME bit will cause us to fail this check.
6903 	 */
6904 	if (t_afsr_errs != C_AFSR_UE)
6905 		return (0);
6906 
6907 	/*
6908 	 * Check the secondary errors were exclusively an EDU and/or WDU.
6909 	 */
6910 	if ((ch_flt->afsr_errs & ~(C_AFSR_EDU|C_AFSR_WDU)) != 0)
6911 		return (0);
6912 
6913 	/*
6914 	 * Check the AFAR of the original error and secondary errors
6915 	 * match to the 64-byte boundary
6916 	 */
6917 	if (P2ALIGN(aflt->flt_addr, 64) != P2ALIGN(t_afar, 64))
6918 		return (0);
6919 
6920 	/*
6921 	 * We've passed all the checks, so it's a secondary error!
6922 	 */
6923 	return (1);
6924 #endif	/* CHEETAH_PLUS */
6925 }
6926 
6927 /*
6928  * Translate the flt_bit or flt_type into an error type.  First, flt_bit
6929  * is checked for any valid errors.  If found, the error type is
6930  * returned. If not found, the flt_type is checked for L1$ parity errors.
6931  */
6932 /*ARGSUSED*/
6933 static uint8_t
6934 cpu_flt_bit_to_plat_error(struct async_flt *aflt)
6935 {
6936 #if defined(JALAPENO)
6937 	/*
6938 	 * Currently, logging errors to the SC is not supported on Jalapeno
6939 	 */
6940 	return (PLAT_ECC_ERROR2_NONE);
6941 #else
6942 	ch_async_flt_t *ch_flt = (ch_async_flt_t *)aflt;
6943 
6944 	switch (ch_flt->flt_bit) {
6945 	case C_AFSR_CE:
6946 		return (PLAT_ECC_ERROR2_CE);
6947 	case C_AFSR_UCC:
6948 	case C_AFSR_EDC:
6949 	case C_AFSR_WDC:
6950 	case C_AFSR_CPC:
6951 		return (PLAT_ECC_ERROR2_L2_CE);
6952 	case C_AFSR_EMC:
6953 		return (PLAT_ECC_ERROR2_EMC);
6954 	case C_AFSR_IVC:
6955 		return (PLAT_ECC_ERROR2_IVC);
6956 	case C_AFSR_UE:
6957 		return (PLAT_ECC_ERROR2_UE);
6958 	case C_AFSR_UCU:
6959 	case C_AFSR_EDU:
6960 	case C_AFSR_WDU:
6961 	case C_AFSR_CPU:
6962 		return (PLAT_ECC_ERROR2_L2_UE);
6963 	case C_AFSR_IVU:
6964 		return (PLAT_ECC_ERROR2_IVU);
6965 	case C_AFSR_TO:
6966 		return (PLAT_ECC_ERROR2_TO);
6967 	case C_AFSR_BERR:
6968 		return (PLAT_ECC_ERROR2_BERR);
6969 #if defined(CHEETAH_PLUS)
6970 	case C_AFSR_L3_EDC:
6971 	case C_AFSR_L3_UCC:
6972 	case C_AFSR_L3_CPC:
6973 	case C_AFSR_L3_WDC:
6974 		return (PLAT_ECC_ERROR2_L3_CE);
6975 	case C_AFSR_IMC:
6976 		return (PLAT_ECC_ERROR2_IMC);
6977 	case C_AFSR_TSCE:
6978 		return (PLAT_ECC_ERROR2_L2_TSCE);
6979 	case C_AFSR_THCE:
6980 		return (PLAT_ECC_ERROR2_L2_THCE);
6981 	case C_AFSR_L3_MECC:
6982 		return (PLAT_ECC_ERROR2_L3_MECC);
6983 	case C_AFSR_L3_THCE:
6984 		return (PLAT_ECC_ERROR2_L3_THCE);
6985 	case C_AFSR_L3_CPU:
6986 	case C_AFSR_L3_EDU:
6987 	case C_AFSR_L3_UCU:
6988 	case C_AFSR_L3_WDU:
6989 		return (PLAT_ECC_ERROR2_L3_UE);
6990 	case C_AFSR_DUE:
6991 		return (PLAT_ECC_ERROR2_DUE);
6992 	case C_AFSR_DTO:
6993 		return (PLAT_ECC_ERROR2_DTO);
6994 	case C_AFSR_DBERR:
6995 		return (PLAT_ECC_ERROR2_DBERR);
6996 #endif	/* CHEETAH_PLUS */
6997 	default:
6998 		switch (ch_flt->flt_type) {
6999 #if defined(CPU_IMP_L1_CACHE_PARITY)
7000 		case CPU_IC_PARITY:
7001 			return (PLAT_ECC_ERROR2_IPE);
7002 		case CPU_DC_PARITY:
7003 			if (IS_PANTHER(cpunodes[CPU->cpu_id].implementation)) {
7004 				if (ch_flt->parity_data.dpe.cpl_cache ==
7005 				    CPU_PC_PARITY) {
7006 					return (PLAT_ECC_ERROR2_PCACHE);
7007 				}
7008 			}
7009 			return (PLAT_ECC_ERROR2_DPE);
7010 #endif /* CPU_IMP_L1_CACHE_PARITY */
7011 		case CPU_ITLB_PARITY:
7012 			return (PLAT_ECC_ERROR2_ITLB);
7013 		case CPU_DTLB_PARITY:
7014 			return (PLAT_ECC_ERROR2_DTLB);
7015 		default:
7016 			return (PLAT_ECC_ERROR2_NONE);
7017 		}
7018 	}
7019 #endif	/* JALAPENO */
7020 }
7021