sun4u/cpu/spitfire.c

/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/types.h>
#include <sys/systm.h>
#include <sys/archsystm.h>
#include <sys/machparam.h>
#include <sys/machsystm.h>
#include <sys/cpu.h>
#include <sys/elf_SPARC.h>
#include <vm/hat_sfmmu.h>
#include <vm/page.h>
#include <sys/cpuvar.h>
#include <sys/spitregs.h>
#include <sys/async.h>
#include <sys/cmn_err.h>
#include <sys/debug.h>
#include <sys/dditypes.h>
#include <sys/sunddi.h>
#include <sys/cpu_module.h>
#include <sys/prom_debug.h>
#include <sys/vmsystm.h>
#include <sys/prom_plat.h>
#include <sys/sysmacros.h>
#include <sys/intreg.h>
#include <sys/machtrap.h>
#include <sys/ontrap.h>
#include <sys/ivintr.h>
#include <sys/atomic.h>
#include <sys/panic.h>
#include <sys/ndifm.h>
#include <sys/fm/protocol.h>
#include <sys/fm/util.h>
#include <sys/fm/cpu/UltraSPARC-II.h>
#include <sys/ddi.h>
#include <sys/ecc_kstat.h>
#include <sys/watchpoint.h>
#include <sys/dtrace.h>
#include <sys/errclassify.h>

uchar_t	*ctx_pgsz_array = NULL;

/*
 * Structure for the 8 byte ecache data dump and the associated AFSR state.
 * There will be 8 of these structures used to dump an ecache line (64 bytes).
 */
typedef struct sf_ec_data_elm {
	uint64_t ec_d8;
	uint64_t ec_afsr;
} ec_data_t;

/*
 * Define spitfire (Ultra I/II) specific asynchronous error structure
 */
typedef struct spitfire_async_flt {
	struct async_flt cmn_asyncflt;	/* common - see sun4u/sys/async.h */
	ushort_t flt_type;		/* types of faults - cpu specific */
	ec_data_t flt_ec_data[8];	/* for E$ or mem dump/state */
	uint64_t flt_ec_tag;		/* E$ tag info */
	int flt_ec_lcnt;		/* number of bad E$ lines */
	ushort_t flt_sdbh;		/* UDBH reg */
	ushort_t flt_sdbl;		/* UDBL reg */
} spitf_async_flt;

/*
 * Prototypes for support routines in spitfire_asm.s:
 */
extern void flush_ecache(uint64_t physaddr, size_t size, size_t linesize);
extern uint64_t get_lsu(void);
extern void set_lsu(uint64_t ncc);
extern void get_ecache_dtag(uint32_t ecache_idx, uint64_t *data, uint64_t *tag,
				uint64_t *oafsr, uint64_t *acc_afsr);
extern uint64_t check_ecache_line(uint32_t id, uint64_t *acc_afsr);
extern uint64_t get_ecache_tag(uint32_t id, uint64_t *nafsr,
				uint64_t *acc_afsr);
extern uint64_t read_and_clear_afsr();
extern void write_ec_tag_parity(uint32_t id);
extern void write_hb_ec_tag_parity(uint32_t id);

/*
 * Spitfire module routines:
 */
static void cpu_async_log_err(void *flt);
/*PRINTFLIKE6*/
static void cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt,
    uint_t logflags, const char *endstr, const char *fmt, ...);

static void cpu_read_paddr(struct async_flt *aflt, short verbose, short ce_err);
static void cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum);
static void cpu_log_ecmem_info(spitf_async_flt *spf_flt);

static void log_ce_err(struct async_flt *aflt, char *unum);
static void log_ue_err(struct async_flt *aflt, char *unum);
static void check_misc_err(spitf_async_flt *spf_flt);
static ushort_t ecc_gen(uint_t high_bytes, uint_t low_bytes);
static int check_ecc(struct async_flt *aflt);
static uint_t get_cpu_status(uint64_t arg);
static uint64_t clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr);
static void scan_ecache(uint64_t *afar, ec_data_t *data, uint64_t *tag,
		int *m, uint64_t *afsr);
static void ecache_kstat_init(struct cpu *cp);
static void ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag,
		uint64_t paddr, int mpb, uint64_t);
static uint64_t ecache_scrub_misc_err(int, uint64_t);
static void ecache_scrub_tag_err(uint64_t, uchar_t, uint32_t);
static void ecache_page_retire(void *);
static int ecc_kstat_update(kstat_t *ksp, int rw);
static int ce_count_unum(int status, int len, char *unum);
static void add_leaky_bucket_timeout(void);
static int synd_to_synd_code(int synd_status, ushort_t synd);

extern uint_t read_all_memscrub;
extern void memscrub_run(void);

static uchar_t	isus2i;			/* set if sabre */
static uchar_t	isus2e;			/* set if hummingbird */

/*
 * Default ecache mask and shift settings for Spitfire.  If we detect a
 * different CPU implementation, we will modify these values at boot time.
 */
static uint64_t cpu_ec_tag_mask		= S_ECTAG_MASK;
static uint64_t cpu_ec_state_mask	= S_ECSTATE_MASK;
static uint64_t cpu_ec_par_mask		= S_ECPAR_MASK;
static int cpu_ec_par_shift		= S_ECPAR_SHIFT;
static int cpu_ec_tag_shift		= S_ECTAG_SHIFT;
static int cpu_ec_state_shift		= S_ECSTATE_SHIFT;
static uchar_t cpu_ec_state_exl		= S_ECSTATE_EXL;
static uchar_t cpu_ec_state_mod		= S_ECSTATE_MOD;
static uchar_t cpu_ec_state_shr		= S_ECSTATE_SHR;
static uchar_t cpu_ec_state_own		= S_ECSTATE_OWN;

/*
 * Default ecache state bits for Spitfire.  These individual bits indicate if
 * the given line is in any of the valid or modified states, respectively.
 * Again, we modify these at boot if we detect a different CPU.
 */
static uchar_t cpu_ec_state_valid	= S_ECSTATE_VALID;
static uchar_t cpu_ec_state_dirty	= S_ECSTATE_DIRTY;
static uchar_t cpu_ec_parity		= S_EC_PARITY;
static uchar_t cpu_ec_state_parity	= S_ECSTATE_PARITY;

/*
 * This table is used to determine which bit(s) is(are) bad when an ECC
 * error occurrs.  The array is indexed an 8-bit syndrome.  The entries
 * of this array have the following semantics:
 *
 *      00-63   The number of the bad bit, when only one bit is bad.
 *      64      ECC bit C0 is bad.
 *      65      ECC bit C1 is bad.
 *      66      ECC bit C2 is bad.
 *      67      ECC bit C3 is bad.
 *      68      ECC bit C4 is bad.
 *      69      ECC bit C5 is bad.
 *      70      ECC bit C6 is bad.
 *      71      ECC bit C7 is bad.
 *      72      Two bits are bad.
 *      73      Three bits are bad.
 *      74      Four bits are bad.
 *      75      More than Four bits are bad.
 *      76      NO bits are bad.
 * Based on "Galaxy Memory Subsystem SPECIFICATION" rev 0.6, pg. 28.
 */

#define	C0	64
#define	C1	65
#define	C2	66
#define	C3	67
#define	C4	68
#define	C5	69
#define	C6	70
#define	C7	71
#define	M2	72
#define	M3	73
#define	M4	74
#define	MX	75
#define	NA	76

#define	SYND_IS_SINGLE_BIT_DATA(synd_code)	((synd_code >= 0) && \
						    (synd_code < C0))
#define	SYND_IS_SINGLE_BIT_CHK(synd_code)	((synd_code >= C0) && \
						    (synd_code <= C7))

static char ecc_syndrome_tab[] =
{
	NA, C0, C1, M2, C2, M2, M2, M3, C3, M2, M2, M3, M2, M3, M3, M4,
	C4, M2, M2, 32, M2, 57, MX, M2, M2, 37, 49, M2, 40, M2, M2, 44,
	C5, M2, M2, 33, M2, 61,  4, M2, M2, MX, 53, M2, 45, M2, M2, 41,
	M2,  0,  1, M2, 10, M2, M2, MX, 15, M2, M2, MX, M2, M3, M3, M2,
	C6, M2, M2, 42, M2, 59, 39, M2, M2, MX, 51, M2, 34, M2, M2, 46,
	M2, 25, 29, M2, 27, M4, M2, MX, 31, M2, M4, MX, M2, MX, MX, M2,
	M2, MX, 36, M2,  7, M2, M2, 54, MX, M2, M2, 62, M2, 48, 56, M2,
	M3, M2, M2, MX, M2, MX, 22, M2, M2, 18, MX, M2, M3, M2, M2, MX,
	C7, M2, M2, 47, M2, 63, MX, M2, M2,  6, 55, M2, 35, M2, M2, 43,
	M2,  5, MX, M2, MX, M2, M2, 50, 38, M2, M2, 58, M2, 52, 60, M2,
	M2, 17, 21, M2, 19, M4, M2, MX, 23, M2, M4, MX, M2, MX, MX, M2,
	M3, M2, M2, MX, M2, MX, 30, M2, M2, 26, MX, M2, M3, M2, M2, MX,
	M2,  8, 13, M2,  2, M2, M2, M3,  3, M2, M2, M3, M2, MX, MX, M2,
	M3, M2, M2, M3, M2, MX, 16, M2, M2, 20, MX, M2, MX, M2, M2, MX,
	M3, M2, M2, M3, M2, MX, 24, M2, M2, 28, MX, M2, MX, M2, M2, MX,
	M4, 12,  9, M2, 14, M2, M2, MX, 11, M2, M2, MX, M2, MX, MX, M4
};

#define	SYND_TBL_SIZE 256

/*
 * Hack for determining UDBH/UDBL, for later cpu-specific error reporting.
 * Cannot use bit 3 in afar, because it is a valid bit on a Sabre/Hummingbird.
 */
#define	UDBL_REG	0x8000
#define	UDBL(synd)	((synd & UDBL_REG) >> 15)
#define	SYND(synd)	(synd & 0x7FFF)

/*
 * These error types are specific to Spitfire and are used internally for the
 * spitfire fault structure flt_type field.
 */
#define	CPU_UE_ERR		0	/* uncorrectable errors - UEs */
#define	CPU_EDP_LDP_ERR		1	/* LDP or EDP parity error */
#define	CPU_WP_ERR		2	/* WP parity error */
#define	CPU_BTO_BERR_ERR	3	/* bus timeout errors */
#define	CPU_PANIC_CP_ERR	4	/* cp error from panic polling */
#define	CPU_TRAPPING_CP_ERR	5	/* for sabre/hbird only, cp error */
#define	CPU_BADLINE_CI_ERR	6	/* E$ clean_bad line when idle */
#define	CPU_BADLINE_CB_ERR	7	/* E$ clean_bad line when busy */
#define	CPU_BADLINE_DI_ERR	8	/* E$ dirty_bad line when idle */
#define	CPU_BADLINE_DB_ERR	9	/* E$ dirty_bad line when busy */
#define	CPU_ORPHAN_CP_ERR	10	/* Orphan CP error */
#define	CPU_ECACHE_ADDR_PAR_ERR	11	/* Ecache Address parity error */
#define	CPU_ECACHE_STATE_ERR	12	/* Ecache state error */
#define	CPU_ECACHE_ETP_ETS_ERR	13	/* ETP set but ETS is zero */
#define	CPU_ECACHE_TAG_ERR	14	/* Scrub the E$ tag, if state clean */
#define	CPU_ADDITIONAL_ERR	15	/* Additional errors occurred */

/*
 * Macro to access the "Spitfire cpu private" data structure.
 */
#define	CPU_PRIVATE_PTR(cp, x)	(&(((spitfire_private_t *)CPU_PRIVATE(cp))->x))

/*
 * set to 0 to disable automatic retiring of pages on
 * DIMMs that have excessive soft errors
 */
int automatic_page_removal = 1;

/*
 * Heuristic for figuring out which module to replace.
 * Relative likelihood that this P_SYND indicates that this module is bad.
 * We call it a "score", though, not a relative likelihood.
 *
 * Step 1.
 * Assign a score to each byte of P_SYND according to the following rules:
 * If no bits on (0x00) or all bits on (0xFF), then give it a 5.
 * If one bit on, give it a 95.
 * If seven bits on, give it a 10.
 * If two bits on:
 *   in different nybbles, a 90
 *   in same nybble, but unaligned, 85
 *   in same nybble and as an aligned pair, 80
 * If six bits on, look at the bits that are off:
 *   in same nybble and as an aligned pair, 15
 *   in same nybble, but unaligned, 20
 *   in different nybbles, a 25
 * If three bits on:
 *   in diferent nybbles, no aligned pairs, 75
 *   in diferent nybbles, one aligned pair, 70
 *   in the same nybble, 65
 * If five bits on, look at the bits that are off:
 *   in the same nybble, 30
 *   in diferent nybbles, one aligned pair, 35
 *   in diferent nybbles, no aligned pairs, 40
 * If four bits on:
 *   all in one nybble, 45
 *   as two aligned pairs, 50
 *   one aligned pair, 55
 *   no aligned pairs, 60
 *
 * Step 2:
 * Take the higher of the two scores (one for each byte) as the score
 * for the module.
 *
 * Print the score for each module, and field service should replace the
 * module with the highest score.
 */

/*
 * In the table below, the first row/column comment indicates the
 * number of bits on in that nybble; the second row/column comment is
 * the hex digit.
 */

static int
p_synd_score_table[256] = {
	/* 0   1   1   2   1   2   2   3   1   2   2   3   2   3   3   4 */
	/* 0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  A,  B,  C,  D,  E,  F */
/* 0 0 */  5, 95, 95, 80, 95, 85, 85, 65, 95, 85, 85, 65, 80, 65, 65, 45,
/* 1 1 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 1 2 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 3 */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
/* 1 4 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 5 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 2 6 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 3 7 */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 1 8 */ 95, 90, 90, 70, 90, 75, 75, 55, 90, 75, 75, 55, 70, 55, 55, 30,
/* 2 9 */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 2 A */ 85, 75, 75, 55, 75, 60, 60, 40, 75, 60, 60, 40, 55, 40, 40, 20,
/* 3 B */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 2 C */ 80, 70, 70, 50, 70, 55, 55, 35, 70, 55, 55, 35, 50, 35, 35, 15,
/* 3 D */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 3 E */ 65, 55, 55, 35, 55, 40, 40, 25, 55, 40, 40, 25, 35, 25, 25, 10,
/* 4 F */ 45, 30, 30, 15, 30, 20, 20, 10, 30, 20, 20, 10, 15, 10, 10,  5,
};

int
ecc_psynd_score(ushort_t p_synd)
{
	int i, j, a, b;

	i = p_synd & 0xFF;
	j = (p_synd >> 8) & 0xFF;

	a = p_synd_score_table[i];
	b = p_synd_score_table[j];

	return (a > b ? a : b);
}

/*
 * Async Fault Logging
 *
 * To ease identifying, reading, and filtering async fault log messages, the
 * label [AFT#] is now prepended to each async fault message.  These messages
 * and the logging rules are implemented by cpu_aflt_log(), below.
 *
 * [AFT0] - Tag for log messages that are associated with corrected ECC errors.
 *          This includes both corrected ECC memory and ecache faults.
 *
 * [AFT1] - Tag for log messages that are not ECC corrected (i.e. everything
 *          else except CE errors) with a priority of 1 (highest).  This tag
 *          is also used for panic messages that result from an async fault.
 *
 * [AFT2] - These are lower priority diagnostic messages for uncorrected ECC
 * [AFT3]   or parity errors.  For example, AFT2 is used for the actual dump
 *          of the E-$ data and tags.
 *
 * In a non-DEBUG kernel, AFT > 1 logs will be sent to the system log but not
 * printed on the console.  To send all AFT logs to both the log and the
 * console, set aft_verbose = 1.
 */

#define	CPU_FLTCPU		0x0001	/* print flt_inst as a CPU id */
#define	CPU_SPACE		0x0002	/* print flt_status (data or instr) */
#define	CPU_ERRID		0x0004	/* print flt_id */
#define	CPU_TL			0x0008	/* print flt_tl */
#define	CPU_ERRID_FIRST 	0x0010	/* print flt_id first in message */
#define	CPU_AFSR		0x0020	/* print flt_stat as decoded %afsr */
#define	CPU_AFAR		0x0040	/* print flt_addr as %afar */
#define	CPU_AF_PSYND		0x0080	/* print flt_stat %afsr.PSYND */
#define	CPU_AF_ETS		0x0100	/* print flt_stat %afsr.ETS */
#define	CPU_UDBH		0x0200	/* print flt_sdbh and syndrome */
#define	CPU_UDBL		0x0400	/* print flt_sdbl and syndrome */
#define	CPU_FAULTPC		0x0800	/* print flt_pc */
#define	CPU_SYND		0x1000	/* print flt_synd and unum */

#define	CMN_LFLAGS	(CPU_FLTCPU | CPU_SPACE | CPU_ERRID | CPU_TL |	\
				CPU_AFSR | CPU_AFAR | CPU_AF_PSYND |	\
				CPU_AF_ETS | CPU_UDBH | CPU_UDBL |	\
				CPU_FAULTPC)
#define	UE_LFLAGS	(CMN_LFLAGS | CPU_SYND)
#define	CE_LFLAGS	(UE_LFLAGS & ~CPU_UDBH & ~CPU_UDBL & ~CPU_TL &	\
				~CPU_SPACE)
#define	PARERR_LFLAGS	(CMN_LFLAGS)
#define	WP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL)
#define	CP_LFLAGS	(CMN_LFLAGS & ~CPU_SPACE & ~CPU_TL &		\
				~CPU_FLTCPU & ~CPU_FAULTPC)
#define	BERRTO_LFLAGS	(CMN_LFLAGS)
#define	NO_LFLAGS	(0)

#define	AFSR_FMTSTR0	"\020\1ME"
#define	AFSR_FMTSTR1	"\020\040PRIV\037ISAP\036ETP\035IVUE\034TO"	\
			"\033BERR\032LDP\031CP\030WP\027EDP\026UE\025CE"
#define	UDB_FMTSTR	"\020\012UE\011CE"

/*
 * Maximum number of contexts for Spitfire.
 */
#define	MAX_NCTXS	(1 << 13)

/*
 * Save the cache bootup state for use when internal
 * caches are to be re-enabled after an error occurs.
 */
uint64_t	cache_boot_state = 0;

/*
 * PA[31:0] represent Displacement in UPA configuration space.
 */
uint_t	root_phys_addr_lo_mask = 0xffffffff;

/*
 * Spitfire legacy globals
 */
int	itlb_entries;
int	dtlb_entries;

void
cpu_setup(void)
{
	extern int page_retire_messages;
	extern int page_retire_first_ue;
	extern int at_flags;
#if defined(SF_ERRATA_57)
	extern caddr_t errata57_limit;
#endif
	extern int disable_text_largepages;
	extern int disable_initdata_largepages;

	cache |= (CACHE_VAC | CACHE_PTAG | CACHE_IOCOHERENT);

	at_flags = EF_SPARC_32PLUS | EF_SPARC_SUN_US1;

	/*
	 * Spitfire isn't currently FMA-aware, so we have to enable the
	 * page retirement messages. We also change the default policy
	 * for UE retirement to allow clearing of transient errors.
	 */
	page_retire_messages = 1;
	page_retire_first_ue = 0;

	/*
	 * save the cache bootup state.
	 */
	cache_boot_state = get_lsu() & (LSU_IC | LSU_DC);

	/*
	 * Use the maximum number of contexts available for Spitfire unless
	 * it has been tuned for debugging.
	 * We are checking against 0 here since this value can be patched
	 * while booting.  It can not be patched via /etc/system since it
	 * will be patched too late and thus cause the system to panic.
	 */
	if (nctxs == 0)
		nctxs = MAX_NCTXS;

	if (use_page_coloring) {
		do_pg_coloring = 1;
		if (use_virtual_coloring)
			do_virtual_coloring = 1;
	}

	/*
	 * Tune pp_slots to use up to 1/8th of the tlb entries.
	 */
	pp_slots = MIN(8, MAXPP_SLOTS);

	/*
	 * Block stores invalidate all pages of the d$ so pagecopy
	 * et. al. do not need virtual translations with virtual
	 * coloring taken into consideration.
	 */
	pp_consistent_coloring = 0;

	isa_list =
	    "sparcv9+vis sparcv9 "
	    "sparcv8plus+vis sparcv8plus "
	    "sparcv8 sparcv8-fsmuld sparcv7 sparc";

	cpu_hwcap_flags = AV_SPARC_VIS;

	/*
	 * On Spitfire, there's a hole in the address space
	 * that we must never map (the hardware only support 44-bits of
	 * virtual address).  Later CPUs are expected to have wider
	 * supported address ranges.
	 *
	 * See address map on p23 of the UltraSPARC 1 user's manual.
	 */
	hole_start = (caddr_t)0x80000000000ull;
	hole_end = (caddr_t)0xfffff80000000000ull;

	/*
	 * A spitfire call bug requires us to be a further 4Gbytes of
	 * firewall from the spec.
	 *
	 * See Spitfire Errata #21
	 */
	hole_start = (caddr_t)((uintptr_t)hole_start - (1ul << 32));
	hole_end = (caddr_t)((uintptr_t)hole_end + (1ul << 32));

	/*
	 * The kpm mapping window.
	 * kpm_size:
	 *	The size of a single kpm range.
	 *	The overall size will be: kpm_size * vac_colors.
	 * kpm_vbase:
	 *	The virtual start address of the kpm range within the kernel
	 *	virtual address space. kpm_vbase has to be kpm_size aligned.
	 */
	kpm_size = (size_t)(2ull * 1024 * 1024 * 1024 * 1024); /* 2TB */
	kpm_size_shift = 41;
	kpm_vbase = (caddr_t)0xfffffa0000000000ull; /* 16EB - 6TB */

#if defined(SF_ERRATA_57)
	errata57_limit = (caddr_t)0x80000000ul;
#endif

	/*
	 * Allow only 8K, 64K and 4M pages for text by default.
	 * Allow only 8K and 64K page for initialized data segments by
	 * default.
	 */
	disable_text_largepages = (1 << TTE512K) | (1 << TTE32M) |
	    (1 << TTE256M);
	disable_initdata_largepages = (1 << TTE512K) | (1 << TTE4M) |
	    (1 << TTE32M) | (1 << TTE256M);
}

static int
getintprop(pnode_t node, char *name, int deflt)
{
	int	value;

	switch (prom_getproplen(node, name)) {
	case 0:
		value = 1;	/* boolean properties */
		break;

	case sizeof (int):
		(void) prom_getprop(node, name, (caddr_t)&value);
		break;

	default:
		value = deflt;
		break;
	}

	return (value);
}

/*
 * Set the magic constants of the implementation.
 */
void
cpu_fiximp(pnode_t dnode)
{
	extern int vac_size, vac_shift;
	extern uint_t vac_mask;
	extern int dcache_line_mask;
	int i, a;
	static struct {
		char	*name;
		int	*var;
	} prop[] = {
		"dcache-size",		&dcache_size,
		"dcache-line-size",	&dcache_linesize,
		"icache-size",		&icache_size,
		"icache-line-size",	&icache_linesize,
		"ecache-size",		&ecache_size,
		"ecache-line-size",	&ecache_alignsize,
		"ecache-associativity", &ecache_associativity,
		"#itlb-entries",	&itlb_entries,
		"#dtlb-entries",	&dtlb_entries,
		};

	for (i = 0; i < sizeof (prop) / sizeof (prop[0]); i++) {
		if ((a = getintprop(dnode, prop[i].name, -1)) != -1) {
			*prop[i].var = a;
		}
	}

	ecache_setsize = ecache_size / ecache_associativity;

	vac_size = S_VAC_SIZE;
	vac_mask = MMU_PAGEMASK & (vac_size - 1);
	i = 0; a = vac_size;
	while (a >>= 1)
		++i;
	vac_shift = i;
	shm_alignment = vac_size;
	vac = 1;

	dcache_line_mask = (dcache_size - 1) & ~(dcache_linesize - 1);

	/*
	 * UltraSPARC I & II have ecache sizes running
	 * as follows: .25 MB, .5 MB, 1 MB, 2 MB, 4 MB
	 * and 8 MB. Adjust the copyin/copyout limits
	 * according to the cache size. The magic number
	 * of VIS_COPY_THRESHOLD comes from the copyin/copyout code
	 * and its floor of VIS_COPY_THRESHOLD bytes before it will use
	 * VIS instructions.
	 *
	 * We assume that all CPUs on the system have the same size
	 * ecache. We're also called very early in the game.
	 * /etc/system will be parsed *after* we're called so
	 * these values can be overwritten.
	 */

	hw_copy_limit_1 = VIS_COPY_THRESHOLD;
	if (ecache_size <= 524288) {
		hw_copy_limit_2 = VIS_COPY_THRESHOLD;
		hw_copy_limit_4 = VIS_COPY_THRESHOLD;
		hw_copy_limit_8 = VIS_COPY_THRESHOLD;
	} else if (ecache_size == 1048576) {
		hw_copy_limit_2 = 1024;
		hw_copy_limit_4 = 1280;
		hw_copy_limit_8 = 1536;
	} else if (ecache_size == 2097152) {
		hw_copy_limit_2 = 1536;
		hw_copy_limit_4 = 2048;
		hw_copy_limit_8 = 2560;
	} else if (ecache_size == 4194304) {
		hw_copy_limit_2 = 2048;
		hw_copy_limit_4 = 2560;
		hw_copy_limit_8 = 3072;
	} else {
		hw_copy_limit_2 = 2560;
		hw_copy_limit_4 = 3072;
		hw_copy_limit_8 = 3584;
	}
}

/*
 * Called by setcpudelay
 */
void
cpu_init_tick_freq(void)
{
	/*
	 * Determine the cpu frequency by calling
	 * tod_get_cpufrequency. Use an approximate freqency
	 * value computed by the prom if the tod module
	 * is not initialized and loaded yet.
	 */
	if (tod_ops.tod_get_cpufrequency != NULL) {
		mutex_enter(&tod_lock);
		sys_tick_freq = tod_ops.tod_get_cpufrequency();
		mutex_exit(&tod_lock);
	} else {
#if defined(HUMMINGBIRD)
		/*
		 * the hummingbird version of %stick is used as the basis for
		 * low level timing; this provides an independent constant-rate
		 * clock for general system use, and frees power mgmt to set
		 * various cpu clock speeds.
		 */
		if (system_clock_freq == 0)
			cmn_err(CE_PANIC, "invalid system_clock_freq 0x%lx",
			    system_clock_freq);
		sys_tick_freq = system_clock_freq;
#else /* SPITFIRE */
		sys_tick_freq = cpunodes[CPU->cpu_id].clock_freq;
#endif
	}
}


void shipit(int upaid);
extern uint64_t xc_tick_limit;
extern uint64_t xc_tick_jump_limit;

#ifdef SEND_MONDO_STATS
uint64_t x_early[NCPU][64];
#endif

/*
 * Note: A version of this function is used by the debugger via the KDI,
 * and must be kept in sync with this version.  Any changes made to this
 * function to support new chips or to accomodate errata must also be included
 * in the KDI-specific version.  See spitfire_kdi.c.
 */
void
send_one_mondo(int cpuid)
{
	uint64_t idsr, starttick, endtick;
	int upaid, busy, nack;
	uint64_t tick, tick_prev;
	ulong_t ticks;

	CPU_STATS_ADDQ(CPU, sys, xcalls, 1);
	upaid = CPUID_TO_UPAID(cpuid);
	tick = starttick = gettick();
	shipit(upaid);
	endtick = starttick + xc_tick_limit;
	busy = nack = 0;
	for (;;) {
		idsr = getidsr();
		if (idsr == 0)
			break;
		/*
		 * When we detect an irregular tick jump, we adjust
		 * the timer window to the current tick value.
		 */
		tick_prev = tick;
		tick = gettick();
		ticks = tick - tick_prev;
		if (ticks > xc_tick_jump_limit) {
			endtick = tick + xc_tick_limit;
		} else if (tick > endtick) {
			if (panic_quiesce)
				return;
			cmn_err(CE_PANIC,
			"send mondo timeout (target 0x%x) [%d NACK %d BUSY]",
			upaid, nack, busy);
		}
		if (idsr & IDSR_BUSY) {
			busy++;
			continue;
		}
		drv_usecwait(1);
		shipit(upaid);
		nack++;
		busy = 0;
	}
#ifdef SEND_MONDO_STATS
	x_early[getprocessorid()][highbit(gettick() - starttick) - 1]++;
#endif
}

void
send_mondo_set(cpuset_t set)
{
	int i;

	for (i = 0; i < NCPU; i++)
		if (CPU_IN_SET(set, i)) {
			send_one_mondo(i);
			CPUSET_DEL(set, i);
			if (CPUSET_ISNULL(set))
				break;
		}
}

void
syncfpu(void)
{
}

/*
 * Determine the size of the CPU module's error structure in bytes.  This is
 * called once during boot to initialize the error queues.
 */
int
cpu_aflt_size(void)
{
	/*
	 * We need to determine whether this is a sabre, Hummingbird or a
	 * Spitfire/Blackbird impl and set the appropriate state variables for
	 * ecache tag manipulation.  We can't do this in cpu_setup() as it is
	 * too early in the boot flow and the cpunodes are not initialized.
	 * This routine will be called once after cpunodes[] is ready, so do
	 * it here.
	 */
	if (cpunodes[CPU->cpu_id].implementation == SABRE_IMPL) {
		isus2i = 1;
		cpu_ec_tag_mask = SB_ECTAG_MASK;
		cpu_ec_state_mask = SB_ECSTATE_MASK;
		cpu_ec_par_mask = SB_ECPAR_MASK;
		cpu_ec_par_shift = SB_ECPAR_SHIFT;
		cpu_ec_tag_shift = SB_ECTAG_SHIFT;
		cpu_ec_state_shift = SB_ECSTATE_SHIFT;
		cpu_ec_state_exl = SB_ECSTATE_EXL;
		cpu_ec_state_mod = SB_ECSTATE_MOD;

		/* These states do not exist in sabre - set to 0xFF */
		cpu_ec_state_shr = 0xFF;
		cpu_ec_state_own = 0xFF;

		cpu_ec_state_valid = SB_ECSTATE_VALID;
		cpu_ec_state_dirty = SB_ECSTATE_DIRTY;
		cpu_ec_state_parity = SB_ECSTATE_PARITY;
		cpu_ec_parity = SB_EC_PARITY;
	} else if (cpunodes[CPU->cpu_id].implementation == HUMMBRD_IMPL) {
		isus2e = 1;
		cpu_ec_tag_mask = HB_ECTAG_MASK;
		cpu_ec_state_mask = HB_ECSTATE_MASK;
		cpu_ec_par_mask = HB_ECPAR_MASK;
		cpu_ec_par_shift = HB_ECPAR_SHIFT;
		cpu_ec_tag_shift = HB_ECTAG_SHIFT;
		cpu_ec_state_shift = HB_ECSTATE_SHIFT;
		cpu_ec_state_exl = HB_ECSTATE_EXL;
		cpu_ec_state_mod = HB_ECSTATE_MOD;

		/* These states do not exist in hummingbird - set to 0xFF */
		cpu_ec_state_shr = 0xFF;
		cpu_ec_state_own = 0xFF;

		cpu_ec_state_valid = HB_ECSTATE_VALID;
		cpu_ec_state_dirty = HB_ECSTATE_DIRTY;
		cpu_ec_state_parity = HB_ECSTATE_PARITY;
		cpu_ec_parity = HB_EC_PARITY;
	}

	return (sizeof (spitf_async_flt));
}


/*
 * Correctable ecc error trap handler
 */
/*ARGSUSED*/
void
cpu_ce_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
	uint_t p_afsr_high, uint_t p_afar_high)
{
	ushort_t sdbh, sdbl;
	ushort_t e_syndh, e_syndl;
	spitf_async_flt spf_flt;
	struct async_flt *ecc;
	int queue = 1;

	uint64_t t_afar = p_afar;
	uint64_t t_afsr = p_afsr;

	/*
	 * Note: the Spitfire data buffer error registers
	 * (upper and lower halves) are or'ed into the upper
	 * word of the afsr by ce_err().
	 */
	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);

	e_syndh = (uchar_t)(sdbh & (uint_t)P_DER_E_SYND);
	e_syndl = (uchar_t)(sdbl & (uint_t)P_DER_E_SYND);

	t_afsr &= S_AFSR_MASK;
	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */

	/* Setup the async fault structure */
	bzero(&spf_flt, sizeof (spitf_async_flt));
	ecc = (struct async_flt *)&spf_flt;
	ecc->flt_id = gethrtime_waitfree();
	ecc->flt_stat = t_afsr;
	ecc->flt_addr = t_afar;
	ecc->flt_status = ECC_C_TRAP;
	ecc->flt_bus_id = getprocessorid();
	ecc->flt_inst = CPU->cpu_id;
	ecc->flt_pc = (caddr_t)rp->r_pc;
	ecc->flt_func = log_ce_err;
	ecc->flt_in_memory =
		(pf_is_memory(ecc->flt_addr >> MMU_PAGESHIFT)) ? 1: 0;
	spf_flt.flt_sdbh = sdbh;
	spf_flt.flt_sdbl = sdbl;

	/*
	 * Check for fatal conditions.
	 */
	check_misc_err(&spf_flt);

	/*
	 * Pananoid checks for valid AFSR and UDBs
	 */
	if ((t_afsr & P_AFSR_CE) == 0) {
		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
			"** Panic due to CE bit not set in the AFSR",
			"  Corrected Memory Error on");
	}

	/*
	 * We want to skip logging only if ALL the following
	 * conditions are true:
	 *
	 *	1. There is only one error
	 *	2. That error is a correctable memory error
	 *	3. The error is caused by the memory scrubber (in which case
	 *	    the error will have occurred under on_trap protection)
	 *	4. The error is on a retired page
	 *
	 * Note: OT_DATA_EC is used places other than the memory scrubber.
	 * However, none of those errors should occur on a retired page.
	 */
	if ((ecc->flt_stat & (S_AFSR_ALL_ERRS & ~P_AFSR_ME)) == P_AFSR_CE &&
	    curthread->t_ontrap != NULL) {

		if (curthread->t_ontrap->ot_prot & OT_DATA_EC) {
			if (page_retire_check(ecc->flt_addr, NULL) == 0) {
				queue = 0;
			}
		}
	}

	if (((sdbh & P_DER_CE) == 0) && ((sdbl & P_DER_CE) == 0)) {
		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CMN_LFLAGS,
			"** Panic due to CE bits not set in the UDBs",
			" Corrected Memory Error on");
	}

	if ((sdbh >> 8) & 1) {
		ecc->flt_synd = e_syndh;
		ce_scrub(ecc);
		if (queue) {
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
		}
	}

	if ((sdbl >> 8) & 1) {
		ecc->flt_addr = t_afar | 0x8;	/* Sabres do not have a UDBL */
		ecc->flt_synd = e_syndl | UDBL_REG;
		ce_scrub(ecc);
		if (queue) {
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CE, ecc,
			    sizeof (*ecc), ce_queue, ERRORQ_ASYNC);
		}
	}

	/*
	 * Re-enable all error trapping (CEEN currently cleared).
	 */
	clr_datapath();
	set_asyncflt(P_AFSR_CE);
	set_error_enable(EER_ENABLE);
}

/*
 * Cpu specific CE logging routine
 */
static void
log_ce_err(struct async_flt *aflt, char *unum)
{
	spitf_async_flt spf_flt;

	if ((aflt->flt_stat & P_AFSR_CE) && (ce_verbose_memory == 0)) {
		return;
	}

	spf_flt.cmn_asyncflt = *aflt;
	cpu_aflt_log(CE_CONT, 0, &spf_flt, CE_LFLAGS, unum,
	    " Corrected Memory Error detected by");
}

/*
 * Spitfire does not perform any further CE classification refinement
 */
/*ARGSUSED*/
int
ce_scrub_xdiag_recirc(struct async_flt *ecc, errorq_t *eqp, errorq_elem_t *eqep,
    size_t afltoffset)
{
	return (0);
}

char *
flt_to_error_type(struct async_flt *aflt)
{
	if (aflt->flt_status & ECC_INTERMITTENT)
		return (ERR_TYPE_DESC_INTERMITTENT);
	if (aflt->flt_status & ECC_PERSISTENT)
		return (ERR_TYPE_DESC_PERSISTENT);
	if (aflt->flt_status & ECC_STICKY)
		return (ERR_TYPE_DESC_STICKY);
	return (ERR_TYPE_DESC_UNKNOWN);
}

/*
 * Called by correctable ecc error logging code to print out
 * the stick/persistent/intermittent status of the error.
 */
static void
cpu_ce_log_status(spitf_async_flt *spf_flt, char *unum)
{
	ushort_t status;
	char *status1_str = "Memory";
	char *status2_str = "Intermittent";
	struct async_flt *aflt = (struct async_flt *)spf_flt;

	status = aflt->flt_status;

	if (status & ECC_ECACHE)
		status1_str = "Ecache";

	if (status & ECC_STICKY)
		status2_str = "Sticky";
	else if (status & ECC_PERSISTENT)
		status2_str = "Persistent";

	cpu_aflt_log(CE_CONT, 0, spf_flt, CPU_ERRID_FIRST,
		NULL, " Corrected %s Error on %s is %s",
		status1_str, unum, status2_str);
}

/*
 * check for a valid ce syndrome, then call the
 * displacement flush scrubbing code, and then check the afsr to see if
 * the error was persistent or intermittent. Reread the afar/afsr to see
 * if the error was not scrubbed successfully, and is therefore sticky.
 */
/*ARGSUSED1*/
void
cpu_ce_scrub_mem_err(struct async_flt *ecc, boolean_t triedcpulogout)
{
	uint64_t eer, afsr;
	ushort_t status;

	ASSERT(getpil() > LOCK_LEVEL);

	/*
	 * It is possible that the flt_addr is not a valid
	 * physical address. To deal with this, we disable
	 * NCEEN while we scrub that address. If this causes
	 * a TIMEOUT/BERR, we know this is an invalid
	 * memory location.
	 */
	kpreempt_disable();
	eer = get_error_enable();
	if (eer & (EER_CEEN | EER_NCEEN))
	    set_error_enable(eer & ~(EER_CEEN | EER_NCEEN));

	/*
	 * To check if the error detected by IO is persistent, sticky or
	 * intermittent.
	 */
	if (ecc->flt_status & ECC_IOBUS) {
		ecc->flt_stat = P_AFSR_CE;
	}

	scrubphys(P2ALIGN(ecc->flt_addr, 64),
	    cpunodes[CPU->cpu_id].ecache_size);

	get_asyncflt(&afsr);
	if (afsr & (P_AFSR_TO | P_AFSR_BERR)) {
		/*
		 * Must ensure that we don't get the TIMEOUT/BERR
		 * when we reenable NCEEN, so we clear the AFSR.
		 */
		set_asyncflt(afsr & (P_AFSR_TO | P_AFSR_BERR));
		if (eer & (EER_CEEN | EER_NCEEN))
		    set_error_enable(eer);
		kpreempt_enable();
		return;
	}

	if (eer & EER_NCEEN)
	    set_error_enable(eer & ~EER_CEEN);

	/*
	 * Check and clear any ECC errors from the scrub.  If the scrub did
	 * not trip over the error, mark it intermittent.  If the scrub did
	 * trip the error again and it did not scrub away, mark it sticky.
	 * Otherwise mark it persistent.
	 */
	if (check_ecc(ecc) != 0) {
		cpu_read_paddr(ecc, 0, 1);

		if (check_ecc(ecc) != 0)
			status = ECC_STICKY;
		else
			status = ECC_PERSISTENT;
	} else
		status = ECC_INTERMITTENT;

	if (eer & (EER_CEEN | EER_NCEEN))
	    set_error_enable(eer);
	kpreempt_enable();

	ecc->flt_status &= ~(ECC_INTERMITTENT | ECC_PERSISTENT | ECC_STICKY);
	ecc->flt_status |= status;
}

/*
 * get the syndrome and unum, and then call the routines
 * to check the other cpus and iobuses, and then do the error logging.
 */
/*ARGSUSED1*/
void
cpu_ce_log_err(struct async_flt *ecc, errorq_elem_t *eqep)
{
	char unum[UNUM_NAMLEN];
	int len = 0;
	int ce_verbose = 0;
	int err;

	ASSERT(ecc->flt_func != NULL);

	/* Get the unum string for logging purposes */
	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, ecc, unum,
	    UNUM_NAMLEN, &len);

	/* Call specific error logging routine */
	(void) (*ecc->flt_func)(ecc, unum);

	/*
	 * Count errors per unum.
	 * Non-memory errors are all counted via a special unum string.
	 */
	if ((err = ce_count_unum(ecc->flt_status, len, unum)) != PR_OK &&
	    automatic_page_removal) {
		(void) page_retire(ecc->flt_addr, err);
	}

	if (ecc->flt_panic) {
		ce_verbose = 1;
	} else if ((ecc->flt_class == BUS_FAULT) ||
	    (ecc->flt_stat & P_AFSR_CE)) {
		ce_verbose = (ce_verbose_memory > 0);
	} else {
		ce_verbose = 1;
	}

	if (ce_verbose) {
		spitf_async_flt sflt;
		int synd_code;

		sflt.cmn_asyncflt = *ecc;	/* for cpu_aflt_log() */

		cpu_ce_log_status(&sflt, unum);

		synd_code = synd_to_synd_code(AFLT_STAT_VALID,
				SYND(ecc->flt_synd));

		if (SYND_IS_SINGLE_BIT_DATA(synd_code)) {
			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
			    NULL, " ECC Data Bit %2d was in error "
			    "and corrected", synd_code);
		} else if (SYND_IS_SINGLE_BIT_CHK(synd_code)) {
			cpu_aflt_log(CE_CONT, 0, &sflt, CPU_ERRID_FIRST,
			    NULL, " ECC Check Bit %2d was in error "
			    "and corrected", synd_code - C0);
		} else {
			/*
			 * These are UE errors - we shouldn't be getting CE
			 * traps for these; handle them in case of bad h/w.
			 */
			switch (synd_code) {
			case M2:
				cpu_aflt_log(CE_CONT, 0, &sflt,
				    CPU_ERRID_FIRST, NULL,
				    " Two ECC Bits were in error");
				break;
			case M3:
				cpu_aflt_log(CE_CONT, 0, &sflt,
				    CPU_ERRID_FIRST, NULL,
				    " Three ECC Bits were in error");
				break;
			case M4:
				cpu_aflt_log(CE_CONT, 0, &sflt,
				    CPU_ERRID_FIRST, NULL,
				    " Four ECC Bits were in error");
				break;
			case MX:
				cpu_aflt_log(CE_CONT, 0, &sflt,
				    CPU_ERRID_FIRST, NULL,
				    " More than Four ECC bits were "
				    "in error");
				break;
			default:
				cpu_aflt_log(CE_CONT, 0, &sflt,
				    CPU_ERRID_FIRST, NULL,
				    " Unknown fault syndrome %d",
				    synd_code);
				break;
			}
		}
	}

	/* Display entire cache line, if valid address */
	if (ce_show_data && ecc->flt_addr != AFLT_INV_ADDR)
		read_ecc_data(ecc, 1, 1);
}

/*
 * We route all errors through a single switch statement.
 */
void
cpu_ue_log_err(struct async_flt *aflt)
{

	switch (aflt->flt_class) {
	case CPU_FAULT:
		cpu_async_log_err(aflt);
		break;

	case BUS_FAULT:
		bus_async_log_err(aflt);
		break;

	default:
		cmn_err(CE_WARN, "discarding async error 0x%p with invalid "
		    "fault class (0x%x)", (void *)aflt, aflt->flt_class);
		break;
	}
}

/* Values for action variable in cpu_async_error() */
#define	ACTION_NONE		0
#define	ACTION_TRAMPOLINE	1
#define	ACTION_AST_FLAGS	2

/*
 * Access error trap handler for asynchronous cpu errors.  This routine is
 * called to handle a data or instruction access error.  All fatal errors are
 * completely handled by this routine (by panicking).  Non fatal error logging
 * is queued for later processing either via AST or softint at a lower PIL.
 * In case of panic, the error log queue will also be processed as part of the
 * panic flow to ensure all errors are logged.  This routine is called with all
 * errors disabled at PIL15.  The AFSR bits are cleared and the UDBL and UDBH
 * error bits are also cleared.  The hardware has also disabled the I and
 * D-caches for us, so we must re-enable them before returning.
 *
 * A summary of the handling of tl=0 UE/LDP/EDP/TO/BERR/WP/CP:
 *
 *		_______________________________________________________________
 *		|        Privileged tl0		|         Unprivileged	      |
 *		| Protected	| Unprotected	| Protected	| Unprotected |
 *		|on_trap|lofault|		|		|	      |
 * -------------|-------|-------+---------------+---------------+-------------|
 *		|	|	|		|		|	      |
 * UE/LDP/EDP	| L,T,p	| L,R,p	| L,P		| n/a		| L,R,p	      |
 *		|	|	|		|		|	      |
 * TO/BERR	| T	| S	| L,P		| n/a		| S	      |
 *		|	|	|		|		|	      |
 * WP		| L,M,p | L,M,p	| L,M,p		| n/a		| L,M,p       |
 *		|	|	|		|		|	      |
 * CP (IIi/IIe)	| L,P	| L,P	| L,P		| n/a		| L,P	      |
 * ____________________________________________________________________________
 *
 *
 * Action codes:
 *
 * L - log
 * M - kick off memscrubber if flt_in_memory
 * P - panic
 * p - panic if US-IIi or US-IIe (Sabre); overrides R and M
 * R - i)  if aft_panic is set, panic
 *     ii) otherwise, send hwerr event to contract and SIGKILL to process
 * S - send SIGBUS to process
 * T - trampoline
 *
 * Special cases:
 *
 * 1) if aft_testfatal is set, all faults result in a panic regardless
 *    of type (even WP), protection (even on_trap), or privilege.
 */
/*ARGSUSED*/
void
cpu_async_error(struct regs *rp, ulong_t p_afar, ulong_t p_afsr,
	uint_t p_afsr_high, uint_t p_afar_high)
{
	ushort_t sdbh, sdbl, ttype, tl;
	spitf_async_flt spf_flt;
	struct async_flt *aflt;
	char pr_reason[28];
	uint64_t oafsr;
	uint64_t acc_afsr = 0;			/* accumulated afsr */
	int action = ACTION_NONE;
	uint64_t t_afar = p_afar;
	uint64_t t_afsr = p_afsr;
	int expected = DDI_FM_ERR_UNEXPECTED;
	ddi_acc_hdl_t *hp;

	/*
	 * We need to look at p_flag to determine if the thread detected an
	 * error while dumping core.  We can't grab p_lock here, but it's ok
	 * because we just need a consistent snapshot and we know that everyone
	 * else will store a consistent set of bits while holding p_lock.  We
	 * don't have to worry about a race because SDOCORE is set once prior
	 * to doing i/o from the process's address space and is never cleared.
	 */
	uint_t pflag = ttoproc(curthread)->p_flag;

	pr_reason[0] = '\0';

	/*
	 * Note: the Spitfire data buffer error registers
	 * (upper and lower halves) are or'ed into the upper
	 * word of the afsr by async_err() if P_AFSR_UE is set.
	 */
	sdbh = (ushort_t)((t_afsr >> 33) & 0x3FF);
	sdbl = (ushort_t)((t_afsr >> 43) & 0x3FF);

	/*
	 * Grab the ttype encoded in <63:53> of the saved
	 * afsr passed from async_err()
	 */
	ttype = (ushort_t)((t_afsr >> 53) & 0x1FF);
	tl = (ushort_t)(t_afsr >> 62);

	t_afsr &= S_AFSR_MASK;
	t_afar &= SABRE_AFAR_PA;	/* must use Sabre AFAR mask */

	/*
	 * Initialize most of the common and CPU-specific structure.  We derive
	 * aflt->flt_priv from %tstate, instead of from the AFSR.PRIV bit.  The
	 * initial setting of aflt->flt_panic is based on TL: we must panic if
	 * the error occurred at TL > 0.  We also set flt_panic if the test/demo
	 * tuneable aft_testfatal is set (not the default).
	 */
	bzero(&spf_flt, sizeof (spitf_async_flt));
	aflt = (struct async_flt *)&spf_flt;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_stat = t_afsr;
	aflt->flt_addr = t_afar;
	aflt->flt_bus_id = getprocessorid();
	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_pc = (caddr_t)rp->r_pc;
	aflt->flt_prot = AFLT_PROT_NONE;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_priv = (rp->r_tstate & TSTATE_PRIV) ? 1 : 0;
	aflt->flt_tl = (uchar_t)tl;
	aflt->flt_panic = (tl != 0 || aft_testfatal != 0);
	aflt->flt_core = (pflag & SDOCORE) ? 1 : 0;

	/*
	 * Set flt_status based on the trap type.  If we end up here as the
	 * result of a UE detected by the CE handling code, leave status 0.
	 */
	switch (ttype) {
	case T_DATA_ERROR:
		aflt->flt_status = ECC_D_TRAP;
		break;
	case T_INSTR_ERROR:
		aflt->flt_status = ECC_I_TRAP;
		break;
	}

	spf_flt.flt_sdbh = sdbh;
	spf_flt.flt_sdbl = sdbl;

	/*
	 * Check for fatal async errors.
	 */
	check_misc_err(&spf_flt);

	/*
	 * If the trap occurred in privileged mode at TL=0, we need to check to
	 * see if we were executing in the kernel under on_trap() or t_lofault
	 * protection.  If so, modify the saved registers so that we return
	 * from the trap to the appropriate trampoline routine.
	 */
	if (aflt->flt_priv && tl == 0) {
		if (curthread->t_ontrap != NULL) {
			on_trap_data_t *otp = curthread->t_ontrap;

			if (otp->ot_prot & OT_DATA_EC) {
				aflt->flt_prot = AFLT_PROT_EC;
				otp->ot_trap |= OT_DATA_EC;
				rp->r_pc = otp->ot_trampoline;
				rp->r_npc = rp->r_pc + 4;
				action = ACTION_TRAMPOLINE;
			}

			if ((t_afsr & (P_AFSR_TO | P_AFSR_BERR)) &&
			    (otp->ot_prot & OT_DATA_ACCESS)) {
				aflt->flt_prot = AFLT_PROT_ACCESS;
				otp->ot_trap |= OT_DATA_ACCESS;
				rp->r_pc = otp->ot_trampoline;
				rp->r_npc = rp->r_pc + 4;
				action = ACTION_TRAMPOLINE;
				/*
				 * for peeks and caut_gets errors are expected
				 */
				hp = (ddi_acc_hdl_t *)otp->ot_handle;
				if (!hp)
					expected = DDI_FM_ERR_PEEK;
				else if (hp->ah_acc.devacc_attr_access ==
				    DDI_CAUTIOUS_ACC)
					expected = DDI_FM_ERR_EXPECTED;
			}

		} else if (curthread->t_lofault) {
			aflt->flt_prot = AFLT_PROT_COPY;
			rp->r_g1 = EFAULT;
			rp->r_pc = curthread->t_lofault;
			rp->r_npc = rp->r_pc + 4;
			action = ACTION_TRAMPOLINE;
		}
	}

	/*
	 * Determine if this error needs to be treated as fatal.  Note that
	 * multiple errors detected upon entry to this trap handler does not
	 * necessarily warrant a panic.  We only want to panic if the trap
	 * happened in privileged mode and not under t_ontrap or t_lofault
	 * protection.  The exception is WP: if we *only* get WP, it is not
	 * fatal even if the trap occurred in privileged mode, except on Sabre.
	 *
	 * aft_panic, if set, effectively makes us treat usermode
	 * UE/EDP/LDP faults as if they were privileged - so we we will
	 * panic instead of sending a contract event.  A lofault-protected
	 * fault will normally follow the contract event; if aft_panic is
	 * set this will be changed to a panic.
	 *
	 * For usermode BERR/BTO errors, eg from processes performing device
	 * control through mapped device memory, we need only deliver
	 * a SIGBUS to the offending process.
	 *
	 * Some additional flt_panic reasons (eg, WP on Sabre) will be
	 * checked later; for now we implement the common reasons.
	 */
	if (aflt->flt_prot == AFLT_PROT_NONE) {
		/*
		 * Beware - multiple bits may be set in AFSR
		 */
		if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP)) {
			if (aflt->flt_priv || aft_panic)
				aflt->flt_panic = 1;
		}

		if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
			if (aflt->flt_priv)
				aflt->flt_panic = 1;
		}
	} else if (aflt->flt_prot == AFLT_PROT_COPY && aft_panic) {
		aflt->flt_panic = 1;
	}

	/*
	 * UE/BERR/TO: Call our bus nexus friends to check for
	 * IO errors that may have resulted in this trap.
	 */
	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR | P_AFSR_UE)) {
		cpu_run_bus_error_handlers(aflt, expected);
	}

	/*
	 * Handle UE: If the UE is in memory, we need to flush the bad line from
	 * the E-cache.  We also need to query the bus nexus for fatal errors.
	 * For sabre, we will panic on UEs. Attempts to do diagnostic read on
	 * caches may introduce more parity errors (especially when the module
	 * is bad) and in sabre there is no guarantee that such errors
	 * (if introduced) are written back as poisoned data.
	 */
	if (t_afsr & P_AFSR_UE) {
		int i;

		(void) strcat(pr_reason, "UE ");

		spf_flt.flt_type = CPU_UE_ERR;
		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
			MMU_PAGESHIFT)) ? 1: 0;

		/*
		 * With UE, we have the PA of the fault.
		 * Let do a diagnostic read to get the ecache
		 * data and tag info of the bad line for logging.
		 */
		if (aflt->flt_in_memory) {
			uint32_t ec_set_size;
			uchar_t state;
			uint32_t ecache_idx;
			uint64_t faultpa = P2ALIGN(aflt->flt_addr, 64);

			/* touch the line to put it in ecache */
			acc_afsr |= read_and_clear_afsr();
			(void) lddphys(faultpa);
			acc_afsr |= (read_and_clear_afsr() &
				    ~(P_AFSR_EDP | P_AFSR_UE));

			ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
			    ecache_associativity;

			for (i = 0; i < ecache_associativity; i++) {
				ecache_idx = i * ec_set_size +
				    (aflt->flt_addr % ec_set_size);
				get_ecache_dtag(P2ALIGN(ecache_idx, 64),
					(uint64_t *)&spf_flt.flt_ec_data[0],
					&spf_flt.flt_ec_tag, &oafsr, &acc_afsr);
				acc_afsr |= oafsr;

				state = (uchar_t)((spf_flt.flt_ec_tag &
				    cpu_ec_state_mask) >> cpu_ec_state_shift);

				if ((state & cpu_ec_state_valid) &&
				    ((spf_flt.flt_ec_tag & cpu_ec_tag_mask) ==
				    ((uint64_t)aflt->flt_addr >>
				    cpu_ec_tag_shift)))
					break;
			}

			/*
			 * Check to see if the ecache tag is valid for the
			 * fault PA. In the very unlikely event where the
			 * line could be victimized, no ecache info will be
			 * available. If this is the case, capture the line
			 * from memory instead.
			 */
			if ((state & cpu_ec_state_valid) == 0 ||
			    (spf_flt.flt_ec_tag & cpu_ec_tag_mask) !=
			    ((uint64_t)aflt->flt_addr >> cpu_ec_tag_shift)) {
				for (i = 0; i < 8; i++, faultpa += 8) {
				    ec_data_t *ecdptr;

					ecdptr = &spf_flt.flt_ec_data[i];
					acc_afsr |= read_and_clear_afsr();
					ecdptr->ec_d8 = lddphys(faultpa);
					acc_afsr |= (read_and_clear_afsr() &
						    ~(P_AFSR_EDP | P_AFSR_UE));
					ecdptr->ec_afsr = 0;
							/* null afsr value */
				}

				/*
				 * Mark tag invalid to indicate mem dump
				 * when we print out the info.
				 */
				spf_flt.flt_ec_tag = AFLT_INV_ADDR;
			}
			spf_flt.flt_ec_lcnt = 1;

			/*
			 * Flush out the bad line
			 */
			flushecacheline(P2ALIGN(aflt->flt_addr, 64),
				cpunodes[CPU->cpu_id].ecache_size);

			acc_afsr |= clear_errors(NULL, NULL);
		}

		/*
		 * Ask our bus nexus friends if they have any fatal errors. If
		 * so, they will log appropriate error messages and panic as a
		 * result. We then queue an event for each UDB that reports a
		 * UE. Each UE reported in a UDB will have its own log message.
		 *
		 * Note from kbn: In the case where there are multiple UEs
		 * (ME bit is set) - the AFAR address is only accurate to
		 * the 16-byte granularity. One cannot tell whether the AFAR
		 * belongs to the UDBH or UDBL syndromes. In this case, we
		 * always report the AFAR address to be 16-byte aligned.
		 *
		 * If we're on a Sabre, there is no SDBL, but it will always
		 * read as zero, so the sdbl test below will safely fail.
		 */
		if (bus_func_invoke(BF_TYPE_UE) == BF_FATAL || isus2i || isus2e)
			aflt->flt_panic = 1;

		if (sdbh & P_DER_UE) {
			aflt->flt_synd = sdbh & P_DER_E_SYND;
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
			    aflt->flt_panic);
		}
		if (sdbl & P_DER_UE) {
			aflt->flt_synd = sdbl & P_DER_E_SYND;
			aflt->flt_synd |= UDBL_REG;	/* indicates UDBL */
			if (!(aflt->flt_stat & P_AFSR_ME))
				aflt->flt_addr |= 0x8;
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UE,
			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
			    aflt->flt_panic);
		}

		/*
		 * We got a UE and are panicking, save the fault PA in a known
		 * location so that the platform specific panic code can check
		 * for copyback errors.
		 */
		if (aflt->flt_panic && aflt->flt_in_memory) {
			panic_aflt = *aflt;
		}
	}

	/*
	 * Handle EDP and LDP: Locate the line with bad parity and enqueue an
	 * async error for logging. For Sabre, we panic on EDP or LDP.
	 */
	if (t_afsr & (P_AFSR_EDP | P_AFSR_LDP)) {
		spf_flt.flt_type = CPU_EDP_LDP_ERR;

		if (t_afsr & P_AFSR_EDP)
			(void) strcat(pr_reason, "EDP ");

		if (t_afsr & P_AFSR_LDP)
			(void) strcat(pr_reason, "LDP ");

		/*
		 * Here we have no PA to work with.
		 * Scan each line in the ecache to look for
		 * the one with bad parity.
		 */
		aflt->flt_addr = AFLT_INV_ADDR;
		scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
			&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);
		acc_afsr |= (oafsr & ~P_AFSR_WP);

		/*
		 * If we found a bad PA, update the state to indicate if it is
		 * memory or I/O space.  This code will be important if we ever
		 * support cacheable frame buffers.
		 */
		if (aflt->flt_addr != AFLT_INV_ADDR) {
			aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
				MMU_PAGESHIFT)) ? 1 : 0;
		}

		if (isus2i || isus2e)
			aflt->flt_panic = 1;

		cpu_errorq_dispatch((t_afsr & P_AFSR_EDP) ?
		    FM_EREPORT_CPU_USII_EDP : FM_EREPORT_CPU_USII_LDP,
		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * Timeout and bus error handling.  There are two cases to consider:
	 *
	 * (1) If we are in the kernel protected by ddi_peek or ddi_poke,we
	 * have already modified the saved registers so that we will return
	 * from the trap to the appropriate trampoline routine; otherwise panic.
	 *
	 * (2) In user mode, we can simply use our AST mechanism to deliver
	 * a SIGBUS.  We do not log the occurence - processes performing
	 * device control would generate lots of uninteresting messages.
	 */
	if (t_afsr & (P_AFSR_TO | P_AFSR_BERR)) {
		if (t_afsr & P_AFSR_TO)
			(void) strcat(pr_reason, "BTO ");

		if (t_afsr & P_AFSR_BERR)
			(void) strcat(pr_reason, "BERR ");

		spf_flt.flt_type = CPU_BTO_BERR_ERR;
		if (aflt->flt_priv && aflt->flt_prot == AFLT_PROT_NONE) {
			cpu_errorq_dispatch((t_afsr & P_AFSR_TO) ?
			    FM_EREPORT_CPU_USII_TO : FM_EREPORT_CPU_USII_BERR,
			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
			    aflt->flt_panic);
		}
	}

	/*
	 * Handle WP: WP happens when the ecache is victimized and a parity
	 * error was detected on a writeback.  The data in question will be
	 * poisoned as a UE will be written back.  The PA is not logged and
	 * it is possible that it doesn't belong to the trapped thread.  The
	 * WP trap is not fatal, but it could be fatal to someone that
	 * subsequently accesses the toxic page.  We set read_all_memscrub
	 * to force the memscrubber to read all of memory when it awakens.
	 * For Sabre/Hummingbird, WP is fatal because the HW doesn't write a
	 * UE back to poison the data.
	 */
	if (t_afsr & P_AFSR_WP) {
		(void) strcat(pr_reason, "WP ");
		if (isus2i || isus2e) {
			aflt->flt_panic = 1;
		} else {
			read_all_memscrub = 1;
		}
		spf_flt.flt_type = CPU_WP_ERR;
		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_WP,
		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * Handle trapping CP error: In Sabre/Hummingbird, parity error in
	 * the ecache on a copyout due to a PCI DMA read is signaled as a CP.
	 * This is fatal.
	 */

	if (t_afsr & P_AFSR_CP) {
		if (isus2i || isus2e) {
			(void) strcat(pr_reason, "CP ");
			aflt->flt_panic = 1;
			spf_flt.flt_type = CPU_TRAPPING_CP_ERR;
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
			    aflt->flt_panic);
		} else {
			/*
			 * Orphan CP: Happens due to signal integrity problem
			 * on a CPU, where a CP is reported, without reporting
			 * its associated UE. This is handled by locating the
			 * bad parity line and would kick off the memscrubber
			 * to find the UE if in memory or in another's cache.
			 */
			spf_flt.flt_type = CPU_ORPHAN_CP_ERR;
			(void) strcat(pr_reason, "ORPHAN_CP ");

			/*
			 * Here we have no PA to work with.
			 * Scan each line in the ecache to look for
			 * the one with bad parity.
			 */
			aflt->flt_addr = AFLT_INV_ADDR;
			scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
				&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt,
				&oafsr);
			acc_afsr |= oafsr;

			/*
			 * If we found a bad PA, update the state to indicate
			 * if it is memory or I/O space.
			 */
			if (aflt->flt_addr != AFLT_INV_ADDR) {
				aflt->flt_in_memory =
					(pf_is_memory(aflt->flt_addr >>
						MMU_PAGESHIFT)) ? 1 : 0;
			}
			read_all_memscrub = 1;
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
			    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
			    aflt->flt_panic);

		}
	}

	/*
	 * If we queued an error other than WP or CP and we are going to return
	 * from the trap and the error was in user mode or inside of a
	 * copy routine, set AST flag so the queue will be drained before
	 * returning to user mode.
	 *
	 * For UE/LDP/EDP, the AST processing will SIGKILL the process
	 * and send an event to its process contract.
	 *
	 * For BERR/BTO, the AST processing will SIGBUS the process.  There
	 * will have been no error queued in this case.
	 */
	if ((t_afsr &
	    (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP | P_AFSR_BERR | P_AFSR_TO)) &&
	    (!aflt->flt_priv || aflt->flt_prot == AFLT_PROT_COPY)) {
			int pcb_flag = 0;

			if (t_afsr & (P_AFSR_UE | P_AFSR_LDP | P_AFSR_EDP))
				pcb_flag |= ASYNC_HWERR;

			if (t_afsr & P_AFSR_BERR)
				pcb_flag |= ASYNC_BERR;

			if (t_afsr & P_AFSR_TO)
				pcb_flag |= ASYNC_BTO;

			ttolwp(curthread)->lwp_pcb.pcb_flags |= pcb_flag;
			aston(curthread);
			action = ACTION_AST_FLAGS;
	}

	/*
	 * In response to a deferred error, we must do one of three things:
	 * (1) set the AST flags, (2) trampoline, or (3) panic.  action is
	 * set in cases (1) and (2) - check that either action is set or
	 * (3) is true.
	 *
	 * On II, the WP writes poisoned data back to memory, which will
	 * cause a UE and a panic or reboot when read.  In this case, we
	 * don't need to panic at this time.  On IIi and IIe,
	 * aflt->flt_panic is already set above.
	 */
	ASSERT((aflt->flt_panic != 0) || (action != ACTION_NONE) ||
	    (t_afsr & P_AFSR_WP));

	/*
	 * Make a final sanity check to make sure we did not get any more async
	 * errors and accumulate the afsr.
	 */
	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
	    cpunodes[CPU->cpu_id].ecache_linesize);
	(void) clear_errors(&spf_flt, NULL);

	/*
	 * Take care of a special case: If there is a UE in the ecache flush
	 * area, we'll see it in flush_ecache().  This will trigger the
	 * CPU_ADDITIONAL_ERRORS case below.
	 *
	 * This could occur if the original error was a UE in the flush area,
	 * or if the original error was an E$ error that was flushed out of
	 * the E$ in scan_ecache().
	 *
	 * If it's at the same address that we're already logging, then it's
	 * probably one of these cases.  Clear the bit so we don't trip over
	 * it on the additional errors case, which could cause an unnecessary
	 * panic.
	 */
	if ((aflt->flt_stat & P_AFSR_UE) && aflt->flt_addr == t_afar)
		acc_afsr |= aflt->flt_stat & ~P_AFSR_UE;
	else
		acc_afsr |= aflt->flt_stat;

	/*
	 * Check the acumulated afsr for the important bits.
	 * Make sure the spf_flt.flt_type value is set, and
	 * enque an error.
	 */
	if (acc_afsr &
	    (P_AFSR_LEVEL1 | P_AFSR_IVUE | P_AFSR_ETP | P_AFSR_ISAP)) {
		if (acc_afsr & (P_AFSR_UE | P_AFSR_EDP | P_AFSR_LDP |
		    P_AFSR_BERR | P_AFSR_TO | P_AFSR_IVUE | P_AFSR_ETP |
		    P_AFSR_ISAP))
			aflt->flt_panic = 1;

		spf_flt.flt_type = CPU_ADDITIONAL_ERR;
		aflt->flt_stat = acc_afsr;
		cpu_errorq_dispatch(FM_EREPORT_CPU_USII_UNKNOWN,
		    (void *)&spf_flt, sizeof (spf_flt), ue_queue,
		    aflt->flt_panic);
	}

	/*
	 * If aflt->flt_panic is set at this point, we need to panic as the
	 * result of a trap at TL > 0, or an error we determined to be fatal.
	 * We've already enqueued the error in one of the if-clauses above,
	 * and it will be dequeued and logged as part of the panic flow.
	 */
	if (aflt->flt_panic) {
		cpu_aflt_log(CE_PANIC, 1, &spf_flt, CPU_ERRID_FIRST,
		    "See previous message(s) for details", " %sError(s)",
		    pr_reason);
	}

	/*
	 * Before returning, we must re-enable errors, and
	 * reset the caches to their boot-up state.
	 */
	set_lsu(get_lsu() | cache_boot_state);
	set_error_enable(EER_ENABLE);
}

/*
 * Check for miscellaneous fatal errors and call CE_PANIC if any are seen.
 * This routine is shared by the CE and UE handling code.
 */
static void
check_misc_err(spitf_async_flt *spf_flt)
{
	struct async_flt *aflt = (struct async_flt *)spf_flt;
	char *fatal_str = NULL;

	/*
	 * The ISAP and ETP errors are supposed to cause a POR
	 * from the system, so in theory we never, ever see these messages.
	 * ISAP, ETP and IVUE are considered to be fatal.
	 */
	if (aflt->flt_stat & P_AFSR_ISAP)
		fatal_str = " System Address Parity Error on";
	else if (aflt->flt_stat & P_AFSR_ETP)
		fatal_str = " Ecache Tag Parity Error on";
	else if (aflt->flt_stat & P_AFSR_IVUE)
		fatal_str = " Interrupt Vector Uncorrectable Error on";
	if (fatal_str != NULL) {
		cpu_aflt_log(CE_PANIC, 1, spf_flt, CMN_LFLAGS,
			NULL, fatal_str);
	}
}

/*
 * Routine to convert a syndrome into a syndrome code.
 */
static int
synd_to_synd_code(int synd_status, ushort_t synd)
{
	if (synd_status != AFLT_STAT_VALID)
		return (-1);

	/*
	 * Use the 8-bit syndrome to index the ecc_syndrome_tab
	 * to get the code indicating which bit(s) is(are) bad.
	 */
	if ((synd == 0) || (synd >= SYND_TBL_SIZE))
		return (-1);
	else
		return (ecc_syndrome_tab[synd]);
}

/*
 * Routine to return a string identifying the physical name
 * associated with a memory/cache error.
 */
/* ARGSUSED */
int
cpu_get_mem_unum(int synd_status, ushort_t synd, uint64_t afsr,
    uint64_t afar, int cpuid, int flt_in_memory, ushort_t flt_status,
    char *buf, int buflen, int *lenp)
{
	short synd_code;
	int ret;

	if (flt_in_memory) {
		synd_code = synd_to_synd_code(synd_status, synd);
		if (synd_code == -1) {
			ret = EINVAL;
		} else if (prom_get_unum(synd_code, P2ALIGN(afar, 8),
		    buf, buflen, lenp) != 0) {
			ret = EIO;
		} else if (*lenp <= 1) {
			ret = EINVAL;
		} else {
			ret = 0;
		}
	} else {
		ret = ENOTSUP;
	}

	if (ret != 0) {
		buf[0] = '\0';
		*lenp = 0;
	}

	return (ret);
}

/*
 * Wrapper for cpu_get_mem_unum() routine that takes an
 * async_flt struct rather than explicit arguments.
 */
int
cpu_get_mem_unum_aflt(int synd_status, struct async_flt *aflt,
    char *buf, int buflen, int *lenp)
{
	return (cpu_get_mem_unum(synd_status, SYND(aflt->flt_synd),
		aflt->flt_stat, aflt->flt_addr, aflt->flt_bus_id,
		aflt->flt_in_memory, aflt->flt_status, buf, buflen, lenp));
}

/*
 * This routine is a more generic interface to cpu_get_mem_unum(),
 * that may be used by other modules (e.g. mm).
 */
int
cpu_get_mem_name(uint64_t synd, uint64_t *afsr, uint64_t afar,
		char *buf, int buflen, int *lenp)
{
	int synd_status, flt_in_memory, ret;
	char unum[UNUM_NAMLEN];

	/*
	 * Check for an invalid address.
	 */
	if (afar == (uint64_t)-1)
		return (ENXIO);

	if (synd == (uint64_t)-1)
		synd_status = AFLT_STAT_INVALID;
	else
		synd_status = AFLT_STAT_VALID;

	flt_in_memory = (pf_is_memory(afar >> MMU_PAGESHIFT)) ? 1 : 0;

	if ((ret = cpu_get_mem_unum(synd_status, (ushort_t)synd, *afsr, afar,
	    CPU->cpu_id, flt_in_memory, 0, unum, UNUM_NAMLEN, lenp))
	    != 0)
		return (ret);

	if (*lenp >= buflen)
		return (ENAMETOOLONG);

	(void) strncpy(buf, unum, buflen);

	return (0);
}

/*
 * Routine to return memory information associated
 * with a physical address and syndrome.
 */
/* ARGSUSED */
int
cpu_get_mem_info(uint64_t synd, uint64_t afar,
    uint64_t *mem_sizep, uint64_t *seg_sizep, uint64_t *bank_sizep,
    int *segsp, int *banksp, int *mcidp)
{
	return (ENOTSUP);
}

/*
 * Routine to return a string identifying the physical
 * name associated with a cpuid.
 */
/* ARGSUSED */
int
cpu_get_cpu_unum(int cpuid, char *buf, int buflen, int *lenp)
{
	return (ENOTSUP);
}

/*
 * This routine returns the size of the kernel's FRU name buffer.
 */
size_t
cpu_get_name_bufsize()
{
	return (UNUM_NAMLEN);
}

/*
 * Cpu specific log func for UEs.
 */
static void
log_ue_err(struct async_flt *aflt, char *unum)
{
	spitf_async_flt *spf_flt = (spitf_async_flt *)aflt;
	int len = 0;

#ifdef DEBUG
	int afsr_priv = (aflt->flt_stat & P_AFSR_PRIV) ? 1 : 0;

	/*
	 * Paranoid Check for priv mismatch
	 * Only applicable for UEs
	 */
	if (afsr_priv != aflt->flt_priv) {
		/*
		 * The priv bits in %tstate and %afsr did not match; we expect
		 * this to be very rare, so flag it with a message.
		 */
		cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST, NULL,
		    ": PRIV bit in TSTATE and AFSR mismatched; "
		    "TSTATE.PRIV=%d used", (aflt->flt_priv) ? 1 : 0);

		/* update saved afsr to reflect the correct priv */
		aflt->flt_stat &= ~P_AFSR_PRIV;
		if (aflt->flt_priv)
			aflt->flt_stat |= P_AFSR_PRIV;
	}
#endif /* DEBUG */

	(void) cpu_get_mem_unum_aflt(AFLT_STAT_VALID, aflt, unum,
	    UNUM_NAMLEN, &len);

	cpu_aflt_log(CE_WARN, 1, spf_flt, UE_LFLAGS, unum,
	    " Uncorrectable Memory Error on");

	if (SYND(aflt->flt_synd) == 0x3) {
		cpu_aflt_log(CE_WARN, 1, spf_flt, CPU_ERRID_FIRST, NULL,
		    " Syndrome 0x3 indicates that this may not be a "
		    "memory module problem");
	}

	if (aflt->flt_in_memory)
		cpu_log_ecmem_info(spf_flt);
}


/*
 * The cpu_async_log_err() function is called via the ue_drain() function to
 * handle logging for CPU events that are dequeued.  As such, it can be invoked
 * from softint context, from AST processing in the trap() flow, or from the
 * panic flow.  We decode the CPU-specific data, and log appropriate messages.
 */
static void
cpu_async_log_err(void *flt)
{
	spitf_async_flt *spf_flt = (spitf_async_flt *)flt;
	struct async_flt *aflt = (struct async_flt *)flt;
	char unum[UNUM_NAMLEN];
	char *space;
	char *ecache_scrub_logstr = NULL;

	switch (spf_flt->flt_type) {
	    case CPU_UE_ERR:
		/*
		 * We want to skip logging only if ALL the following
		 * conditions are true:
		 *
		 *	1. We are not panicking
		 *	2. There is only one error
		 *	3. That error is a memory error
		 *	4. The error is caused by the memory scrubber (in
		 *	   which case the error will have occurred under
		 *	   on_trap protection)
		 *	5. The error is on a retired page
		 *
		 * Note 1: AFLT_PROT_EC is used places other than the memory
		 * scrubber.  However, none of those errors should occur
		 * on a retired page.
		 *
		 * Note 2: In the CE case, these errors are discarded before
		 * the errorq.  In the UE case, we must wait until now --
		 * softcall() grabs a mutex, which we can't do at a high PIL.
		 */
		if (!panicstr &&
		    (aflt->flt_stat & S_AFSR_ALL_ERRS) == P_AFSR_UE &&
		    aflt->flt_prot == AFLT_PROT_EC) {
			if (page_retire_check(aflt->flt_addr, NULL) == 0) {
				/* Zero the address to clear the error */
				softcall(ecc_page_zero, (void *)aflt->flt_addr);
				return;
			}
		}

		/*
		 * Log the UE and check for causes of this UE error that
		 * don't cause a trap (Copyback error).  cpu_async_error()
		 * has already checked the i/o buses for us.
		 */
		log_ue_err(aflt, unum);
		if (aflt->flt_in_memory)
			cpu_check_allcpus(aflt);
		break;

	    case CPU_EDP_LDP_ERR:
		if (aflt->flt_stat & P_AFSR_EDP)
			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
			    NULL, " EDP event on");

		if (aflt->flt_stat & P_AFSR_LDP)
			cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS,
			    NULL, " LDP event on");

		/* Log ecache info if exist */
		if (spf_flt->flt_ec_lcnt > 0) {
			cpu_log_ecmem_info(spf_flt);

			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
			    NULL, " AFAR was derived from E$Tag");
		} else {
			cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST,
			    NULL, " No error found in ecache (No fault "
			    "PA available)");
		}
		break;

	    case CPU_WP_ERR:
		/*
		 * If the memscrub thread hasn't yet read
		 * all of memory, as we requested in the
		 * trap handler, then give it a kick to
		 * make sure it does.
		 */
		if (!isus2i && !isus2e && read_all_memscrub)
			memscrub_run();

		cpu_aflt_log(CE_WARN, 1, spf_flt, WP_LFLAGS, NULL,
		    " WP event on");
		return;

	    case CPU_BTO_BERR_ERR:
		/*
		 * A bus timeout or error occurred that was in user mode or not
		 * in a protected kernel code region.
		 */
		if (aflt->flt_stat & P_AFSR_BERR) {
			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
			    spf_flt, BERRTO_LFLAGS, NULL,
			    " Bus Error on System Bus in %s mode from",
			    aflt->flt_priv ? "privileged" : "user");
		}

		if (aflt->flt_stat & P_AFSR_TO) {
			cpu_aflt_log(CE_WARN, aflt->flt_panic ? 1 : 2,
			    spf_flt, BERRTO_LFLAGS, NULL,
			    " Timeout on System Bus in %s mode from",
			    aflt->flt_priv ? "privileged" : "user");
		}

		return;

	    case CPU_PANIC_CP_ERR:
		/*
		 * Process the Copyback (CP) error info (if any) obtained from
		 * polling all the cpus in the panic flow. This case is only
		 * entered if we are panicking.
		 */
		ASSERT(panicstr != NULL);
		ASSERT(aflt->flt_id == panic_aflt.flt_id);

		/* See which space - this info may not exist */
		if (panic_aflt.flt_status & ECC_D_TRAP)
			space = "Data ";
		else if (panic_aflt.flt_status & ECC_I_TRAP)
			space = "Instruction ";
		else
			space = "";

		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
		    " AFAR was derived from UE report,"
		    " CP event on CPU%d (caused %saccess error on %s%d)",
		    aflt->flt_inst, space, (panic_aflt.flt_status & ECC_IOBUS) ?
		    "IOBUS" : "CPU", panic_aflt.flt_bus_id);

		if (spf_flt->flt_ec_lcnt > 0)
			cpu_log_ecmem_info(spf_flt);
		else
			cpu_aflt_log(CE_WARN, 2, spf_flt, CPU_ERRID_FIRST,
			    NULL, " No cache dump available");

		return;

	    case CPU_TRAPPING_CP_ERR:
		/*
		 * For sabre only.  This is a copyback ecache parity error due
		 * to a PCI DMA read.  We should be panicking if we get here.
		 */
		ASSERT(panicstr != NULL);
		cpu_aflt_log(CE_WARN, 1, spf_flt, CP_LFLAGS, NULL,
		    " AFAR was derived from UE report,"
		    " CP event on CPU%d (caused Data access error "
		    "on PCIBus)", aflt->flt_inst);
		return;

		/*
		 * We log the ecache lines of the following states,
		 * clean_bad_idle, clean_bad_busy, dirty_bad_idle and
		 * dirty_bad_busy if ecache_scrub_verbose is set and panic
		 * in addition to logging if ecache_scrub_panic is set.
		 */
	    case CPU_BADLINE_CI_ERR:
		ecache_scrub_logstr = "CBI";
		/* FALLTHRU */

	    case CPU_BADLINE_CB_ERR:
		if (ecache_scrub_logstr == NULL)
			ecache_scrub_logstr = "CBB";
		/* FALLTHRU */

	    case CPU_BADLINE_DI_ERR:
		if (ecache_scrub_logstr == NULL)
			ecache_scrub_logstr = "DBI";
		/* FALLTHRU */

	    case CPU_BADLINE_DB_ERR:
		if (ecache_scrub_logstr == NULL)
			ecache_scrub_logstr = "DBB";

		cpu_aflt_log(CE_NOTE, 2, spf_flt,
			(CPU_ERRID_FIRST | CPU_FLTCPU), NULL,
			" %s event on", ecache_scrub_logstr);
		cpu_log_ecmem_info(spf_flt);

		return;

	    case CPU_ORPHAN_CP_ERR:
		/*
		 * Orphan CPs, where the CP bit is set, but when a CPU
		 * doesn't report a UE.
		 */
		if (read_all_memscrub)
			memscrub_run();

		cpu_aflt_log(CE_NOTE, 2, spf_flt, (CP_LFLAGS | CPU_FLTCPU),
			NULL, " Orphan CP event on");

		/* Log ecache info if exist */
		if (spf_flt->flt_ec_lcnt > 0)
			cpu_log_ecmem_info(spf_flt);
		else
			cpu_aflt_log(CE_NOTE, 2, spf_flt,
				(CP_LFLAGS | CPU_FLTCPU), NULL,
				" No error found in ecache (No fault "
				"PA available");
		return;

	    case CPU_ECACHE_ADDR_PAR_ERR:
		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
				" E$ Tag Address Parity error on");
		cpu_log_ecmem_info(spf_flt);
		return;

	    case CPU_ECACHE_STATE_ERR:
		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
				" E$ Tag State Parity error on");
		cpu_log_ecmem_info(spf_flt);
		return;

	    case CPU_ECACHE_TAG_ERR:
		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
				" E$ Tag scrub event on");
		cpu_log_ecmem_info(spf_flt);
		return;

	    case CPU_ECACHE_ETP_ETS_ERR:
		cpu_aflt_log(CE_WARN, 1, spf_flt, PARERR_LFLAGS, NULL,
				" AFSR.ETP is set and AFSR.ETS is zero on");
		cpu_log_ecmem_info(spf_flt);
		return;


	    case CPU_ADDITIONAL_ERR:
		cpu_aflt_log(CE_WARN, 1, spf_flt, CMN_LFLAGS & ~CPU_SPACE, NULL,
		    " Additional errors detected during error processing on");
		return;

	    default:
		cmn_err(CE_WARN, "cpu_async_log_err: fault %p has unknown "
		    "fault type %x", (void *)spf_flt, spf_flt->flt_type);
		return;
	}

	/* ... fall through from the UE, EDP, or LDP cases */

	if (aflt->flt_addr != AFLT_INV_ADDR && aflt->flt_in_memory) {
		if (!panicstr) {
			(void) page_retire(aflt->flt_addr, PR_UE);
		} else {
			/*
			 * Clear UEs on panic so that we don't
			 * get haunted by them during panic or
			 * after reboot
			 */
			clearphys(P2ALIGN(aflt->flt_addr, 64),
			    cpunodes[CPU->cpu_id].ecache_size,
			    cpunodes[CPU->cpu_id].ecache_linesize);

			(void) clear_errors(NULL, NULL);
		}
	}

	/*
	 * Log final recover message
	 */
	if (!panicstr) {
		if (!aflt->flt_priv) {
			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
			    NULL, " Above Error is in User Mode"
			    "\n    and is fatal: "
			    "will SIGKILL process and notify contract");
		} else if (aflt->flt_prot == AFLT_PROT_COPY && aflt->flt_core) {
			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
			    NULL, " Above Error detected while dumping core;"
			    "\n    core file will be truncated");
		} else if (aflt->flt_prot == AFLT_PROT_COPY) {
			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST,
			    NULL, " Above Error is due to Kernel access"
			    "\n    to User space and is fatal: "
			    "will SIGKILL process and notify contract");
		} else if (aflt->flt_prot == AFLT_PROT_EC) {
			cpu_aflt_log(CE_CONT, 3, spf_flt, CPU_ERRID_FIRST, NULL,
			    " Above Error detected by protected Kernel code"
			    "\n    that will try to clear error from system");
		}
	}
}


/*
 * Check all cpus for non-trapping UE-causing errors
 * In Ultra I/II, we look for copyback errors (CPs)
 */
void
cpu_check_allcpus(struct async_flt *aflt)
{
	spitf_async_flt cp;
	spitf_async_flt *spf_cpflt = &cp;
	struct async_flt *cpflt = (struct async_flt *)&cp;
	int pix;

	cpflt->flt_id = aflt->flt_id;
	cpflt->flt_addr = aflt->flt_addr;

	for (pix = 0; pix < NCPU; pix++) {
		if (CPU_XCALL_READY(pix)) {
			xc_one(pix, (xcfunc_t *)get_cpu_status,
			    (uint64_t)cpflt, 0);

			if (cpflt->flt_stat & P_AFSR_CP) {
				char *space;

				/* See which space - this info may not exist */
				if (aflt->flt_status & ECC_D_TRAP)
					space = "Data ";
				else if (aflt->flt_status & ECC_I_TRAP)
					space = "Instruction ";
				else
					space = "";

				cpu_aflt_log(CE_WARN, 1, spf_cpflt, CP_LFLAGS,
				    NULL, " AFAR was derived from UE report,"
				    " CP event on CPU%d (caused %saccess "
				    "error on %s%d)", pix, space,
				    (aflt->flt_status & ECC_IOBUS) ?
				    "IOBUS" : "CPU", aflt->flt_bus_id);

				if (spf_cpflt->flt_ec_lcnt > 0)
					cpu_log_ecmem_info(spf_cpflt);
				else
					cpu_aflt_log(CE_WARN, 2, spf_cpflt,
					    CPU_ERRID_FIRST, NULL,
					    " No cache dump available");
			}
		}
	}
}

#ifdef DEBUG
int test_mp_cp = 0;
#endif

/*
 * Cross-call callback routine to tell a CPU to read its own %afsr to check
 * for copyback errors and capture relevant information.
 */
static uint_t
get_cpu_status(uint64_t arg)
{
	struct async_flt *aflt = (struct async_flt *)arg;
	spitf_async_flt *spf_flt = (spitf_async_flt *)arg;
	uint64_t afsr;
	uint32_t ec_idx;
	uint64_t sdbh, sdbl;
	int i;
	uint32_t ec_set_size;
	uchar_t valid;
	ec_data_t ec_data[8];
	uint64_t ec_tag, flt_addr_tag, oafsr;
	uint64_t *acc_afsr = NULL;

	get_asyncflt(&afsr);
	if (CPU_PRIVATE(CPU) != NULL) {
		acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
		afsr |= *acc_afsr;
		*acc_afsr = 0;
	}

#ifdef DEBUG
	if (test_mp_cp)
		afsr |= P_AFSR_CP;
#endif
	aflt->flt_stat = afsr;

	if (afsr & P_AFSR_CP) {
		/*
		 * Capture the UDBs
		 */
		get_udb_errors(&sdbh, &sdbl);
		spf_flt->flt_sdbh = (ushort_t)(sdbh & 0x3FF);
		spf_flt->flt_sdbl = (ushort_t)(sdbl & 0x3FF);

		/*
		 * Clear CP bit before capturing ecache data
		 * and AFSR info.
		 */
		set_asyncflt(P_AFSR_CP);

		/*
		 * See if we can capture the ecache line for the
		 * fault PA.
		 *
		 * Return a valid matching ecache line, if any.
		 * Otherwise, return the first matching ecache
		 * line marked invalid.
		 */
		flt_addr_tag = aflt->flt_addr >> cpu_ec_tag_shift;
		ec_set_size = cpunodes[CPU->cpu_id].ecache_size /
		    ecache_associativity;
		spf_flt->flt_ec_lcnt = 0;

		for (i = 0, ec_idx = (aflt->flt_addr % ec_set_size);
		    i < ecache_associativity; i++, ec_idx += ec_set_size) {
			get_ecache_dtag(P2ALIGN(ec_idx, 64),
				(uint64_t *)&ec_data[0], &ec_tag, &oafsr,
				    acc_afsr);

			if ((ec_tag & cpu_ec_tag_mask) != flt_addr_tag)
				continue;

			valid = cpu_ec_state_valid &
			    (uchar_t)((ec_tag & cpu_ec_state_mask) >>
			    cpu_ec_state_shift);

			if (valid || spf_flt->flt_ec_lcnt == 0) {
				spf_flt->flt_ec_tag = ec_tag;
				bcopy(&ec_data, &spf_flt->flt_ec_data,
				    sizeof (ec_data));
				spf_flt->flt_ec_lcnt = 1;

				if (valid)
					break;
			}
		}
	}
	return (0);
}

/*
 * CPU-module callback for the non-panicking CPUs.  This routine is invoked
 * from panic_idle() as part of the other CPUs stopping themselves when a
 * panic occurs.  We need to be VERY careful what we do here, since panicstr
 * is NOT set yet and we cannot blow through locks.  If panic_aflt is set
 * (panic_aflt.flt_id is non-zero), we need to read our %afsr to look for
 * CP error information.
 */
void
cpu_async_panic_callb(void)
{
	spitf_async_flt cp;
	struct async_flt *aflt = (struct async_flt *)&cp;
	uint64_t *scrub_afsr;

	if (panic_aflt.flt_id != 0) {
		aflt->flt_addr = panic_aflt.flt_addr;
		(void) get_cpu_status((uint64_t)aflt);

		if (CPU_PRIVATE(CPU) != NULL) {
			scrub_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
			if (*scrub_afsr & P_AFSR_CP) {
				aflt->flt_stat |= *scrub_afsr;
				*scrub_afsr = 0;
			}
		}
		if (aflt->flt_stat & P_AFSR_CP) {
			aflt->flt_id = panic_aflt.flt_id;
			aflt->flt_panic = 1;
			aflt->flt_inst = CPU->cpu_id;
			aflt->flt_class = CPU_FAULT;
			cp.flt_type = CPU_PANIC_CP_ERR;
			cpu_errorq_dispatch(FM_EREPORT_CPU_USII_CP,
			    (void *)&cp, sizeof (cp), ue_queue,
			    aflt->flt_panic);
		}
	}
}

/*
 * Turn off all cpu error detection, normally only used for panics.
 */
void
cpu_disable_errors(void)
{
	xt_all(set_error_enable_tl1, EER_DISABLE, EER_SET_ABSOLUTE);
}

/*
 * Enable errors.
 */
void
cpu_enable_errors(void)
{
	xt_all(set_error_enable_tl1, EER_ENABLE, EER_SET_ABSOLUTE);
}

static void
cpu_read_paddr(struct async_flt *ecc, short verbose, short ce_err)
{
	uint64_t aligned_addr = P2ALIGN(ecc->flt_addr, 8);
	int i, loop = 1;
	ushort_t ecc_0;
	uint64_t paddr;
	uint64_t data;

	if (verbose)
		loop = 8;
	for (i = 0; i < loop; i++) {
		paddr = aligned_addr + (i * 8);
		data = lddphys(paddr);
		if (verbose) {
			if (ce_err) {
			    ecc_0 = ecc_gen((uint32_t)(data>>32),
			    (uint32_t)data);
			    cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
				NULL, "    Paddr 0x%" PRIx64 ", "
				"Data 0x%08x.%08x, ECC 0x%x", paddr,
				(uint32_t)(data>>32), (uint32_t)data, ecc_0);
			} else {
				cpu_aflt_log(CE_CONT, 0, NULL, NO_LFLAGS,
				    NULL, "    Paddr 0x%" PRIx64 ", "
				    "Data 0x%08x.%08x", paddr,
				    (uint32_t)(data>>32), (uint32_t)data);
			}
		}
	}
}

static struct {		/* sec-ded-s4ed ecc code */
	uint_t hi, lo;
} ecc_code[8] = {
	{ 0xee55de23U, 0x16161161U },
	{ 0x55eede93U, 0x61612212U },
	{ 0xbb557b8cU, 0x49494494U },
	{ 0x55bb7b6cU, 0x94948848U },
	{ 0x16161161U, 0xee55de23U },
	{ 0x61612212U, 0x55eede93U },
	{ 0x49494494U, 0xbb557b8cU },
	{ 0x94948848U, 0x55bb7b6cU }
};

static ushort_t
ecc_gen(uint_t high_bytes, uint_t low_bytes)
{
	int i, j;
	uchar_t checker, bit_mask;
	struct {
		uint_t hi, lo;
	} hex_data, masked_data[8];

	hex_data.hi = high_bytes;
	hex_data.lo = low_bytes;

	/* mask out bits according to sec-ded-s4ed ecc code */
	for (i = 0; i < 8; i++) {
		masked_data[i].hi = hex_data.hi & ecc_code[i].hi;
		masked_data[i].lo = hex_data.lo & ecc_code[i].lo;
	}

	/*
	 * xor all bits in masked_data[i] to get bit_i of checker,
	 * where i = 0 to 7
	 */
	checker = 0;
	for (i = 0; i < 8; i++) {
		bit_mask = 1 << i;
		for (j = 0; j < 32; j++) {
			if (masked_data[i].lo & 1) checker ^= bit_mask;
			if (masked_data[i].hi & 1) checker ^= bit_mask;
			masked_data[i].hi >>= 1;
			masked_data[i].lo >>= 1;
		}
	}
	return (checker);
}

/*
 * Flush the entire ecache using displacement flush by reading through a
 * physical address range as large as the ecache.
 */
void
cpu_flush_ecache(void)
{
	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
	    cpunodes[CPU->cpu_id].ecache_linesize);
}

/*
 * read and display the data in the cache line where the
 * original ce error occurred.
 * This routine is mainly used for debugging new hardware.
 */
void
read_ecc_data(struct async_flt *ecc, short verbose, short ce_err)
{
	kpreempt_disable();
	/* disable ECC error traps */
	set_error_enable(EER_ECC_DISABLE);

	/*
	 * flush the ecache
	 * read the data
	 * check to see if an ECC error occured
	 */
	flush_ecache(ecache_flushaddr, cpunodes[CPU->cpu_id].ecache_size * 2,
	    cpunodes[CPU->cpu_id].ecache_linesize);
	set_lsu(get_lsu() | cache_boot_state);
	cpu_read_paddr(ecc, verbose, ce_err);
	(void) check_ecc(ecc);

	/* enable ECC error traps */
	set_error_enable(EER_ENABLE);
	kpreempt_enable();
}

/*
 * Check the AFSR bits for UE/CE persistence.
 * If UE or CE errors are detected, the routine will
 * clears all the AFSR sticky bits (except CP for
 * spitfire/blackbird) and the UDBs.
 * if ce_debug or ue_debug is set, log any ue/ce errors detected.
 */
static int
check_ecc(struct async_flt *ecc)
{
	uint64_t t_afsr;
	uint64_t t_afar;
	uint64_t udbh;
	uint64_t udbl;
	ushort_t udb;
	int persistent = 0;

	/*
	 * Capture the AFSR, AFAR and UDBs info
	 */
	get_asyncflt(&t_afsr);
	get_asyncaddr(&t_afar);
	t_afar &= SABRE_AFAR_PA;
	get_udb_errors(&udbh, &udbl);

	if ((t_afsr & P_AFSR_UE) || (t_afsr & P_AFSR_CE)) {
		/*
		 * Clear the errors
		 */
		clr_datapath();

		if (isus2i || isus2e)
			set_asyncflt(t_afsr);
		else
			set_asyncflt(t_afsr & ~P_AFSR_CP);

		/*
		 * determine whether to check UDBH or UDBL for persistence
		 */
		if (ecc->flt_synd & UDBL_REG) {
			udb = (ushort_t)udbl;
			t_afar |= 0x8;
		} else {
			udb = (ushort_t)udbh;
		}

		if (ce_debug || ue_debug) {
			spitf_async_flt spf_flt; /* for logging */
			struct async_flt *aflt =
				(struct async_flt *)&spf_flt;

			/* Package the info nicely in the spf_flt struct */
			bzero(&spf_flt, sizeof (spitf_async_flt));
			aflt->flt_stat = t_afsr;
			aflt->flt_addr = t_afar;
			spf_flt.flt_sdbh = (ushort_t)(udbh & 0x3FF);
			spf_flt.flt_sdbl = (ushort_t)(udbl & 0x3FF);

			cpu_aflt_log(CE_CONT, 0, &spf_flt, (CPU_AFSR |
			    CPU_AFAR | CPU_UDBH | CPU_UDBL), NULL,
			    " check_ecc: Dumping captured error states ...");
		}

		/*
		 * if the fault addresses don't match, not persistent
		 */
		if (t_afar != ecc->flt_addr) {
			return (persistent);
		}

		/*
		 * check for UE persistence
		 * since all DIMMs in the bank are identified for a UE,
		 * there's no reason to check the syndrome
		 */
		if ((ecc->flt_stat & P_AFSR_UE) && (t_afsr & P_AFSR_UE)) {
			persistent = 1;
		}

		/*
		 * check for CE persistence
		 */
		if ((ecc->flt_stat & P_AFSR_CE) && (t_afsr & P_AFSR_CE)) {
			if ((udb & P_DER_E_SYND) ==
			    (ecc->flt_synd & P_DER_E_SYND)) {
				persistent = 1;
			}
		}
	}
	return (persistent);
}

#ifdef HUMMINGBIRD
#define	HB_FULL_DIV		1
#define	HB_HALF_DIV		2
#define	HB_LOWEST_DIV		8
#define	HB_ECLK_INVALID		0xdeadbad
static uint64_t hb_eclk[HB_LOWEST_DIV + 1] = {
	HB_ECLK_INVALID, HB_ECLK_1, HB_ECLK_2, HB_ECLK_INVALID,
	HB_ECLK_4, HB_ECLK_INVALID, HB_ECLK_6, HB_ECLK_INVALID,
	HB_ECLK_8 };

#define	HB_SLOW_DOWN		0
#define	HB_SPEED_UP		1

#define	SET_ESTAR_MODE(mode)					\
	stdphysio(HB_ESTAR_MODE, (mode));			\
	/*							\
	 * PLL logic requires minimum of 16 clock		\
	 * cycles to lock to the new clock speed.		\
	 * Wait 1 usec to satisfy this requirement.		\
	 */							\
	drv_usecwait(1);

#define	CHANGE_REFRESH_COUNT(direction, cur_div, new_div)	\
{								\
	volatile uint64_t data;					\
	uint64_t count, new_count;				\
	clock_t delay;						\
	data = lddphysio(HB_MEM_CNTRL0);			\
	count = (data & HB_REFRESH_COUNT_MASK) >> 		\
	    HB_REFRESH_COUNT_SHIFT;				\
	new_count = (HB_REFRESH_INTERVAL *			\
	    cpunodes[CPU->cpu_id].clock_freq) /			\
	    (HB_REFRESH_CLOCKS_PER_COUNT * (new_div) * NANOSEC);\
	data = (data & ~HB_REFRESH_COUNT_MASK) |		\
	    (new_count << HB_REFRESH_COUNT_SHIFT);		\
	stdphysio(HB_MEM_CNTRL0, data);				\
	data = lddphysio(HB_MEM_CNTRL0);        		\
	/*							\
	 * If we are slowing down the cpu and Memory		\
	 * Self Refresh is not enabled, it is required		\
	 * to wait for old refresh count to count-down and	\
	 * new refresh count to go into effect (let new value	\
	 * counts down once).					\
	 */							\
	if ((direction) == HB_SLOW_DOWN &&			\
	    (data & HB_SELF_REFRESH_MASK) == 0) {		\
		/*						\
		 * Each count takes 64 cpu clock cycles		\
		 * to decrement.  Wait for current refresh	\
		 * count plus new refresh count at current	\
		 * cpu speed to count down to zero.  Round	\
		 * up the delay time.				\
		 */						\
		delay = ((HB_REFRESH_CLOCKS_PER_COUNT *		\
		    (count + new_count) * MICROSEC * (cur_div)) /\
		    cpunodes[CPU->cpu_id].clock_freq) + 1;	\
		drv_usecwait(delay);				\
	}							\
}

#define	SET_SELF_REFRESH(bit)					\
{								\
	volatile uint64_t data;					\
	data = lddphysio(HB_MEM_CNTRL0);			\
	data = (data & ~HB_SELF_REFRESH_MASK) |			\
	    ((bit) << HB_SELF_REFRESH_SHIFT);			\
	stdphysio(HB_MEM_CNTRL0, data);				\
	data = lddphysio(HB_MEM_CNTRL0);			\
}
#endif	/* HUMMINGBIRD */

/* ARGSUSED */
void
cpu_change_speed(uint64_t new_divisor, uint64_t arg2)
{
#ifdef HUMMINGBIRD
	uint64_t cur_mask, cur_divisor = 0;
	volatile uint64_t reg;
	int index;

	if ((new_divisor < HB_FULL_DIV || new_divisor > HB_LOWEST_DIV) ||
	    (hb_eclk[new_divisor] == HB_ECLK_INVALID)) {
		cmn_err(CE_WARN, "cpu_change_speed: bad divisor 0x%lx",
		    new_divisor);
		return;
	}

	reg = lddphysio(HB_ESTAR_MODE);
	cur_mask = reg & HB_ECLK_MASK;
	for (index = HB_FULL_DIV; index <= HB_LOWEST_DIV; index++) {
		if (hb_eclk[index] == cur_mask) {
			cur_divisor = index;
			break;
		}
	}

	if (cur_divisor == 0)
		cmn_err(CE_PANIC, "cpu_change_speed: current divisor "
		    "can't be determined!");

	/*
	 * If we are already at the requested divisor speed, just
	 * return.
	 */
	if (cur_divisor == new_divisor)
		return;

	if (cur_divisor == HB_FULL_DIV && new_divisor == HB_HALF_DIV) {
		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
		SET_ESTAR_MODE(hb_eclk[new_divisor]);
		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);

	} else if (cur_divisor == HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
		SET_ESTAR_MODE(hb_eclk[new_divisor]);
		/* LINTED: E_FALSE_LOGICAL_EXPR */
		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);

	} else if (cur_divisor == HB_FULL_DIV && new_divisor > HB_HALF_DIV) {
		/*
		 * Transition to 1/2 speed first, then to
		 * lower speed.
		 */
		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, HB_HALF_DIV);
		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
		SET_SELF_REFRESH(HB_SELF_REFRESH_ENABLE);

		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, HB_HALF_DIV, new_divisor);
		SET_ESTAR_MODE(hb_eclk[new_divisor]);

	} else if (cur_divisor > HB_HALF_DIV && new_divisor == HB_FULL_DIV) {
		/*
		 * Transition to 1/2 speed first, then to
		 * full speed.
		 */
		SET_ESTAR_MODE(hb_eclk[HB_HALF_DIV]);
		/* LINTED: E_FALSE_LOGICAL_EXPR */
		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, HB_HALF_DIV);

		SET_SELF_REFRESH(HB_SELF_REFRESH_DISABLE);
		SET_ESTAR_MODE(hb_eclk[new_divisor]);
		/* LINTED: E_FALSE_LOGICAL_EXPR */
		CHANGE_REFRESH_COUNT(HB_SPEED_UP, HB_HALF_DIV, new_divisor);

	} else if (cur_divisor < new_divisor) {
		CHANGE_REFRESH_COUNT(HB_SLOW_DOWN, cur_divisor, new_divisor);
		SET_ESTAR_MODE(hb_eclk[new_divisor]);

	} else if (cur_divisor > new_divisor) {
		SET_ESTAR_MODE(hb_eclk[new_divisor]);
		/* LINTED: E_FALSE_LOGICAL_EXPR */
		CHANGE_REFRESH_COUNT(HB_SPEED_UP, cur_divisor, new_divisor);
	}
	CPU->cpu_m.divisor = (uchar_t)new_divisor;
#endif
}

/*
 * Clear the AFSR sticky bits and the UDBs. For Sabre/Spitfire/Blackbird,
 * we clear all the sticky bits. If a non-null pointer to a async fault
 * structure argument is passed in, the captured error state (AFSR, AFAR, UDBs)
 * info will be returned in the structure.  If a non-null pointer to a
 * uint64_t is passed in, this will be updated if the CP bit is set in the
 * AFSR.  The afsr will be returned.
 */
static uint64_t
clear_errors(spitf_async_flt *spf_flt, uint64_t *acc_afsr)
{
	struct async_flt *aflt = (struct async_flt *)spf_flt;
	uint64_t afsr;
	uint64_t udbh, udbl;

	get_asyncflt(&afsr);

	if ((acc_afsr != NULL) && (afsr & P_AFSR_CP))
		*acc_afsr |= afsr;

	if (spf_flt != NULL) {
		aflt->flt_stat = afsr;
		get_asyncaddr(&aflt->flt_addr);
		aflt->flt_addr &= SABRE_AFAR_PA;

		get_udb_errors(&udbh, &udbl);
		spf_flt->flt_sdbh = (ushort_t)(udbh & 0x3FF);
		spf_flt->flt_sdbl = (ushort_t)(udbl & 0x3FF);
	}

	set_asyncflt(afsr);		/* clear afsr */
	clr_datapath();			/* clear udbs */
	return (afsr);
}

/*
 * Scan the ecache to look for bad lines.  If found, the afsr, afar, e$ data
 * tag of the first bad line will be returned. We also return the old-afsr
 * (before clearing the sticky bits). The linecnt data will be updated to
 * indicate the number of bad lines detected.
 */
static void
scan_ecache(uint64_t *t_afar, ec_data_t *ecache_data,
	uint64_t *ecache_tag, int *linecnt, uint64_t *t_afsr)
{
	ec_data_t t_ecdata[8];
	uint64_t t_etag, oafsr;
	uint64_t pa = AFLT_INV_ADDR;
	uint32_t i, j, ecache_sz;
	uint64_t acc_afsr = 0;
	uint64_t *cpu_afsr = NULL;

	if (CPU_PRIVATE(CPU) != NULL)
		cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);

	*linecnt = 0;
	ecache_sz = cpunodes[CPU->cpu_id].ecache_size;

	for (i = 0; i < ecache_sz; i += 64) {
		get_ecache_dtag(i, (uint64_t *)&t_ecdata[0], &t_etag, &oafsr,
		    cpu_afsr);
		acc_afsr |= oafsr;

		/*
		 * Scan through the whole 64 bytes line in 8 8-byte chunks
		 * looking for the first occurrence of an EDP error.  The AFSR
		 * info is captured for each 8-byte chunk.  Note that for
		 * Spitfire/Blackbird, the AFSR.PSYND is captured by h/w in
		 * 16-byte chunk granularity (i.e. the AFSR will be the same
		 * for the high and low 8-byte words within the 16-byte chunk).
		 * For Sabre/Hummingbird, the AFSR.PSYND is captured in 8-byte
		 * granularity and only PSYND bits [7:0] are used.
		 */
		for (j = 0; j < 8; j++) {
			ec_data_t *ecdptr = &t_ecdata[j];

			if (ecdptr->ec_afsr & P_AFSR_EDP) {
				uint64_t errpa;
				ushort_t psynd;
				uint32_t ec_set_size = ecache_sz /
				    ecache_associativity;

				/*
				 * For Spitfire/Blackbird, we need to look at
				 * the PSYND to make sure that this 8-byte chunk
				 * is the right one.  PSYND bits [15:8] belong
				 * to the upper 8-byte (even) chunk.  Bits
				 * [7:0] belong to the lower 8-byte chunk (odd).
				 */
				psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
				if (!isus2i && !isus2e) {
					if (j & 0x1)
						psynd = psynd & 0xFF;
					else
						psynd = psynd >> 8;

					if (!psynd)
						continue; /* wrong chunk */
				}

				/* Construct the PA */
				errpa = ((t_etag & cpu_ec_tag_mask) <<
				    cpu_ec_tag_shift) | ((i | (j << 3)) %
				    ec_set_size);

				/* clean up the cache line */
				flushecacheline(P2ALIGN(errpa, 64),
					cpunodes[CPU->cpu_id].ecache_size);

				oafsr = clear_errors(NULL, cpu_afsr);
				acc_afsr |= oafsr;

				(*linecnt)++;

				/*
				 * Capture the PA for the first bad line found.
				 * Return the ecache dump and tag info.
				 */
				if (pa == AFLT_INV_ADDR) {
					int k;

					pa = errpa;
					for (k = 0; k < 8; k++)
						ecache_data[k] = t_ecdata[k];
					*ecache_tag = t_etag;
				}
				break;
			}
		}
	}
	*t_afar = pa;
	*t_afsr = acc_afsr;
}

static void
cpu_log_ecmem_info(spitf_async_flt *spf_flt)
{
	struct async_flt *aflt = (struct async_flt *)spf_flt;
	uint64_t ecache_tag = spf_flt->flt_ec_tag;
	char linestr[30];
	char *state_str;
	int i;

	/*
	 * Check the ecache tag to make sure it
	 * is valid. If invalid, a memory dump was
	 * captured instead of a ecache dump.
	 */
	if (spf_flt->flt_ec_tag != AFLT_INV_ADDR) {
		uchar_t eparity = (uchar_t)
		    ((ecache_tag & cpu_ec_par_mask) >> cpu_ec_par_shift);

		uchar_t estate = (uchar_t)
		    ((ecache_tag & cpu_ec_state_mask) >> cpu_ec_state_shift);

		if (estate == cpu_ec_state_shr)
			state_str = "Shared";
		else if (estate == cpu_ec_state_exl)
			state_str = "Exclusive";
		else if (estate == cpu_ec_state_own)
			state_str = "Owner";
		else if (estate == cpu_ec_state_mod)
			state_str = "Modified";
		else
			state_str = "Invalid";

		if (spf_flt->flt_ec_lcnt > 1) {
			(void) snprintf(linestr, sizeof (linestr),
			    "Badlines found=%d", spf_flt->flt_ec_lcnt);
		} else {
			linestr[0] = '\0';
		}

		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
		    " PA=0x%08x.%08x\n    E$tag 0x%08x.%08x E$State: %s "
		    "E$parity 0x%02x %s", (uint32_t)(aflt->flt_addr >> 32),
		    (uint32_t)aflt->flt_addr, (uint32_t)(ecache_tag >> 32),
		    (uint32_t)ecache_tag, state_str,
		    (uint32_t)eparity, linestr);
	} else {
		cpu_aflt_log(CE_CONT, 2, spf_flt, CPU_ERRID_FIRST, NULL,
		    " E$tag != PA from AFAR; E$line was victimized"
		    "\n    dumping memory from PA 0x%08x.%08x instead",
		    (uint32_t)(P2ALIGN(aflt->flt_addr, 64) >> 32),
		    (uint32_t)P2ALIGN(aflt->flt_addr, 64));
	}

	/*
	 * Dump out all 8 8-byte ecache data captured
	 * For each 8-byte data captured, we check the
	 * captured afsr's parity syndrome to find out
	 * which 8-byte chunk is bad. For memory dump, the
	 * AFSR values were initialized to 0.
	 */
	for (i = 0; i < 8; i++) {
		ec_data_t *ecdptr;
		uint_t offset;
		ushort_t psynd;
		ushort_t bad;
		uint64_t edp;

		offset = i << 3;	/* multiply by 8 */
		ecdptr = &spf_flt->flt_ec_data[i];
		psynd = ecdptr->ec_afsr & P_AFSR_P_SYND;
		edp = ecdptr->ec_afsr & P_AFSR_EDP;

		/*
		 * For Sabre/Hummingbird, parity synd is captured only
		 * in [7:0] of AFSR.PSYND for each 8-byte chunk.
		 * For spitfire/blackbird, AFSR.PSYND is captured
		 * in 16-byte granularity. [15:8] represent
		 * the upper 8 byte and [7:0] the lower 8 byte.
		 */
		if (isus2i || isus2e || (i & 0x1))
			bad = (psynd & 0xFF);		/* check bits [7:0] */
		else
			bad = (psynd & 0xFF00);		/* check bits [15:8] */

		if (bad && edp) {
			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
			    " E$Data (0x%02x): 0x%08x.%08x "
			    "*Bad* PSYND=0x%04x", offset,
			    (uint32_t)(ecdptr->ec_d8 >> 32),
			    (uint32_t)ecdptr->ec_d8, psynd);
		} else {
			cpu_aflt_log(CE_CONT, 2, spf_flt, NO_LFLAGS, NULL,
			    " E$Data (0x%02x): 0x%08x.%08x", offset,
			    (uint32_t)(ecdptr->ec_d8 >> 32),
			    (uint32_t)ecdptr->ec_d8);
		}
	}
}

/*
 * Common logging function for all cpu async errors.  This function allows the
 * caller to generate a single cmn_err() call that logs the appropriate items
 * from the fault structure, and implements our rules for AFT logging levels.
 *
 *	ce_code: cmn_err() code (e.g. CE_PANIC, CE_WARN, CE_CONT)
 *	tagnum: 0, 1, 2, .. generate the [AFT#] tag
 *	spflt: pointer to spitfire async fault structure
 *	logflags: bitflags indicating what to output
 *	endstr: a end string to appear at the end of this log
 *	fmt: a format string to appear at the beginning of the log
 *
 * The logflags allows the construction of predetermined output from the spflt
 * structure.  The individual data items always appear in a consistent order.
 * Note that either or both of the spflt structure pointer and logflags may be
 * NULL or zero respectively, indicating that the predetermined output
 * substrings are not requested in this log.  The output looks like this:
 *
 *	[AFT#] <CPU_ERRID_FIRST><fmt string><CPU_FLTCPU>
 *	<CPU_SPACE><CPU_ERRID>
 *	newline+4spaces<CPU_AFSR><CPU_AFAR>
 *	newline+4spaces<CPU_AF_PSYND><CPU_AF_ETS><CPU_FAULTPC>
 *	newline+4spaces<CPU_UDBH><CPU_UDBL>
 *	newline+4spaces<CPU_SYND>
 *	newline+4spaces<endstr>
 *
 * Note that <endstr> may not start on a newline if we are logging <CPU_PSYND>;
 * it is assumed that <endstr> will be the unum string in this case.  The size
 * of our intermediate formatting buf[] is based on the worst case of all flags
 * being enabled.  We pass the caller's varargs directly to vcmn_err() for
 * formatting so we don't need additional stack space to format them here.
 */
/*PRINTFLIKE6*/
static void
cpu_aflt_log(int ce_code, int tagnum, spitf_async_flt *spflt, uint_t logflags,
	const char *endstr, const char *fmt, ...)
{
	struct async_flt *aflt = (struct async_flt *)spflt;
	char buf[400], *p, *q; /* see comments about buf[] size above */
	va_list ap;
	int console_log_flag;

	if ((aflt == NULL) || ((aflt->flt_class == CPU_FAULT) &&
				(aflt->flt_stat & P_AFSR_LEVEL1)) ||
	    (aflt->flt_panic)) {
		console_log_flag = (tagnum < 2) || aft_verbose;
	} else {
		int verbose = ((aflt->flt_class == BUS_FAULT) ||
		    (aflt->flt_stat & P_AFSR_CE)) ?
		    ce_verbose_memory : ce_verbose_other;

		if (!verbose)
			return;

		console_log_flag = (verbose > 1);
	}

	if (console_log_flag)
		(void) sprintf(buf, "[AFT%d]", tagnum);
	else
		(void) sprintf(buf, "![AFT%d]", tagnum);

	p = buf + strlen(buf);	/* current buffer position */
	q = buf + sizeof (buf);	/* pointer past end of buffer */

	if (spflt != NULL && (logflags & CPU_ERRID_FIRST)) {
		(void) snprintf(p, (size_t)(q - p), " errID 0x%08x.%08x",
		    (uint32_t)(aflt->flt_id >> 32), (uint32_t)aflt->flt_id);
		p += strlen(p);
	}

	/*
	 * Copy the caller's format string verbatim into buf[].  It will be
	 * formatted by the call to vcmn_err() at the end of this function.
	 */
	if (fmt != NULL && p < q) {
		(void) strncpy(p, fmt, (size_t)(q - p - 1));
		buf[sizeof (buf) - 1] = '\0';
		p += strlen(p);
	}

	if (spflt != NULL) {
		if (logflags & CPU_FLTCPU) {
			(void) snprintf(p, (size_t)(q - p), " CPU%d",
			    aflt->flt_inst);
			p += strlen(p);
		}

		if (logflags & CPU_SPACE) {
			if (aflt->flt_status & ECC_D_TRAP)
				(void) snprintf(p, (size_t)(q - p),
				    " Data access");
			else if (aflt->flt_status & ECC_I_TRAP)
				(void) snprintf(p, (size_t)(q - p),
				    " Instruction access");
			p += strlen(p);
		}

		if (logflags & CPU_TL) {
			(void) snprintf(p, (size_t)(q - p), " at TL%s",
			    aflt->flt_tl ? ">0" : "=0");
			p += strlen(p);
		}

		if (logflags & CPU_ERRID) {
			(void) snprintf(p, (size_t)(q - p),
			    ", errID 0x%08x.%08x",
			    (uint32_t)(aflt->flt_id >> 32),
			    (uint32_t)aflt->flt_id);
			p += strlen(p);
		}

		if (logflags & CPU_AFSR) {
			(void) snprintf(p, (size_t)(q - p),
			    "\n    AFSR 0x%08b.%08b",
			    (uint32_t)(aflt->flt_stat >> 32), AFSR_FMTSTR0,
			    (uint32_t)aflt->flt_stat, AFSR_FMTSTR1);
			p += strlen(p);
		}

		if (logflags & CPU_AFAR) {
			(void) snprintf(p, (size_t)(q - p), " AFAR 0x%08x.%08x",
			    (uint32_t)(aflt->flt_addr >> 32),
			    (uint32_t)aflt->flt_addr);
			p += strlen(p);
		}

		if (logflags & CPU_AF_PSYND) {
			ushort_t psynd = (ushort_t)
			    (aflt->flt_stat & P_AFSR_P_SYND);

			(void) snprintf(p, (size_t)(q - p),
			    "\n    AFSR.PSYND 0x%04x(Score %02d)",
			    psynd, ecc_psynd_score(psynd));
			p += strlen(p);
		}

		if (logflags & CPU_AF_ETS) {
			(void) snprintf(p, (size_t)(q - p), " AFSR.ETS 0x%02x",
			    (uchar_t)((aflt->flt_stat & P_AFSR_ETS) >> 16));
			p += strlen(p);
		}

		if (logflags & CPU_FAULTPC) {
			(void) snprintf(p, (size_t)(q - p), " Fault_PC 0x%p",
			    (void *)aflt->flt_pc);
			p += strlen(p);
		}

		if (logflags & CPU_UDBH) {
			(void) snprintf(p, (size_t)(q - p),
			    "\n    UDBH 0x%04b UDBH.ESYND 0x%02x",
			    spflt->flt_sdbh, UDB_FMTSTR,
			    spflt->flt_sdbh & 0xFF);
			p += strlen(p);
		}

		if (logflags & CPU_UDBL) {
			(void) snprintf(p, (size_t)(q - p),
			    " UDBL 0x%04b UDBL.ESYND 0x%02x",
			    spflt->flt_sdbl, UDB_FMTSTR,
			    spflt->flt_sdbl & 0xFF);
			p += strlen(p);
		}

		if (logflags & CPU_SYND) {
			ushort_t synd = SYND(aflt->flt_synd);

			(void) snprintf(p, (size_t)(q - p),
			    "\n    %s Syndrome 0x%x Memory Module ",
			    UDBL(aflt->flt_synd) ? "UDBL" : "UDBH", synd);
			p += strlen(p);
		}
	}

	if (endstr != NULL) {
		if (!(logflags & CPU_SYND))
			(void) snprintf(p, (size_t)(q - p), "\n    %s", endstr);
		else
			(void) snprintf(p, (size_t)(q - p), "%s", endstr);
		p += strlen(p);
	}

	if (ce_code == CE_CONT && (p < q - 1))
		(void) strcpy(p, "\n"); /* add final \n if needed */

	va_start(ap, fmt);
	vcmn_err(ce_code, buf, ap);
	va_end(ap);
}

/*
 * Ecache Scrubbing
 *
 * The basic idea is to prevent lines from sitting in the ecache long enough
 * to build up soft errors which can lead to ecache parity errors.
 *
 * The following rules are observed when flushing the ecache:
 *
 * 1. When the system is busy, flush bad clean lines
 * 2. When the system is idle, flush all clean lines
 * 3. When the system is idle, flush good dirty lines
 * 4. Never flush bad dirty lines.
 *
 *	modify	parity	busy   idle
 *	----------------------------
 *	clean	good		X
 * 	clean	bad	X	X
 * 	dirty	good		X
 *	dirty	bad
 *
 * Bad or good refers to whether a line has an E$ parity error or not.
 * Clean or dirty refers to the state of the modified bit.  We currently
 * default the scan rate to 100 (scan 10% of the cache per second).
 *
 * The following are E$ states and actions.
 *
 * We encode our state as a 3-bit number, consisting of:
 *	ECACHE_STATE_MODIFIED	(0=clean, 1=dirty)
 *	ECACHE_STATE_PARITY	(0=good,  1=bad)
 *	ECACHE_STATE_BUSY	(0=idle,  1=busy)
 *
 * We associate a flushing and a logging action with each state.
 *
 * E$ actions are different for Spitfire and Sabre/Hummingbird modules.
 * MIRROR_FLUSH indicates that an E$ line will be flushed for the mirrored
 * E$ only, in addition to value being set by ec_flush.
 */

#define	ALWAYS_FLUSH		0x1	/* flush E$ line on all E$ types */
#define	NEVER_FLUSH		0x0	/* never the flush the E$ line */
#define	MIRROR_FLUSH		0xF	/* flush E$ line on mirrored E$ only */

struct {
	char	ec_flush;		/* whether to flush or not */
	char	ec_log;			/* ecache logging */
	char	ec_log_type;		/* log type info */
} ec_action[] = {	/* states of the E$ line in M P B */
	{ ALWAYS_FLUSH, 0, 0 },			 /* 0 0 0 clean_good_idle */
	{ MIRROR_FLUSH, 0, 0 },			 /* 0 0 1 clean_good_busy */
	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CI_ERR }, /* 0 1 0 clean_bad_idle */
	{ ALWAYS_FLUSH, 1, CPU_BADLINE_CB_ERR }, /* 0 1 1 clean_bad_busy */
	{ ALWAYS_FLUSH, 0, 0 },			 /* 1 0 0 dirty_good_idle */
	{ MIRROR_FLUSH, 0, 0 },			 /* 1 0 1 dirty_good_busy */
	{ NEVER_FLUSH, 1, CPU_BADLINE_DI_ERR },	 /* 1 1 0 dirty_bad_idle */
	{ NEVER_FLUSH, 1, CPU_BADLINE_DB_ERR }	 /* 1 1 1 dirty_bad_busy */
};

/*
 * Offsets into the ec_action[] that determines clean_good_busy and
 * dirty_good_busy lines.
 */
#define	ECACHE_CGB_LINE		1	/* E$ clean_good_busy line */
#define	ECACHE_DGB_LINE		5	/* E$ dirty_good_busy line */

/*
 * We are flushing lines which are Clean_Good_Busy and also the lines
 * Dirty_Good_Busy. And we only follow it for non-mirrored E$.
 */
#define	CGB(x, m)	(((x) == ECACHE_CGB_LINE) && (m != ECACHE_CPU_MIRROR))
#define	DGB(x, m)	(((x) == ECACHE_DGB_LINE) && (m != ECACHE_CPU_MIRROR))

#define	ECACHE_STATE_MODIFIED	0x4
#define	ECACHE_STATE_PARITY	0x2
#define	ECACHE_STATE_BUSY	0x1

/*
 * If ecache is mirrored ecache_calls_a_sec and ecache_scan_rate are reduced.
 */
int ecache_calls_a_sec_mirrored = 1;
int ecache_lines_per_call_mirrored = 1;

int ecache_scrub_enable = 1;	/* ecache scrubbing is on by default */
int ecache_scrub_verbose = 1;		/* prints clean and dirty lines */
int ecache_scrub_panic = 0;		/* panics on a clean and dirty line */
int ecache_calls_a_sec = 100;		/* scrubber calls per sec */
int ecache_scan_rate = 100;		/* scan rate (in tenths of a percent) */
int ecache_idle_factor = 1;		/* increase the scan rate when idle */
int ecache_flush_clean_good_busy = 50;	/* flush rate (in percent) */
int ecache_flush_dirty_good_busy = 100;	/* flush rate (in percent) */

volatile int ec_timeout_calls = 1;	/* timeout calls */

/*
 * Interrupt number and pil for ecache scrubber cross-trap calls.
 */
static uint_t ecache_scrub_inum;
uint_t ecache_scrub_pil = PIL_9;

/*
 * Kstats for the E$ scrubber.
 */
typedef struct ecache_kstat {
	kstat_named_t clean_good_idle;		/* # of lines scrubbed */
	kstat_named_t clean_good_busy;		/* # of lines skipped */
	kstat_named_t clean_bad_idle;		/* # of lines scrubbed */
	kstat_named_t clean_bad_busy;		/* # of lines scrubbed */
	kstat_named_t dirty_good_idle;		/* # of lines scrubbed */
	kstat_named_t dirty_good_busy;		/* # of lines skipped */
	kstat_named_t dirty_bad_idle;		/* # of lines skipped */
	kstat_named_t dirty_bad_busy;		/* # of lines skipped */
	kstat_named_t invalid_lines;		/* # of invalid lines */
	kstat_named_t clean_good_busy_flush;    /* # of lines scrubbed */
	kstat_named_t dirty_good_busy_flush;    /* # of lines scrubbed */
	kstat_named_t tags_cleared;		/* # of E$ tags cleared */
} ecache_kstat_t;

static ecache_kstat_t ec_kstat_template = {
	{ "clean_good_idle", KSTAT_DATA_ULONG },
	{ "clean_good_busy", KSTAT_DATA_ULONG },
	{ "clean_bad_idle", KSTAT_DATA_ULONG },
	{ "clean_bad_busy", KSTAT_DATA_ULONG },
	{ "dirty_good_idle", KSTAT_DATA_ULONG },
	{ "dirty_good_busy", KSTAT_DATA_ULONG },
	{ "dirty_bad_idle", KSTAT_DATA_ULONG },
	{ "dirty_bad_busy", KSTAT_DATA_ULONG },
	{ "invalid_lines", KSTAT_DATA_ULONG },
	{ "clean_good_busy_flush", KSTAT_DATA_ULONG },
	{ "dirty_good_busy_flush", KSTAT_DATA_ULONG },
	{ "ecache_tags_cleared", KSTAT_DATA_ULONG }
};

struct kmem_cache *sf_private_cache;

/*
 * Called periodically on each CPU to scan the ecache once a sec.
 * adjusting the ecache line index appropriately
 */
void
scrub_ecache_line()
{
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
	int cpuid = CPU->cpu_id;
	uint32_t index = ssmp->ecache_flush_index;
	uint64_t ec_size = cpunodes[cpuid].ecache_size;
	size_t ec_linesize = cpunodes[cpuid].ecache_linesize;
	int nlines = ssmp->ecache_nlines;
	uint32_t ec_set_size = ec_size / ecache_associativity;
	int ec_mirror = ssmp->ecache_mirror;
	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;

	int line, scan_lines, flush_clean_busy = 0, flush_dirty_busy = 0;
	int mpb;		/* encode Modified, Parity, Busy for action */
	uchar_t state;
	uint64_t ec_tag, paddr, oafsr, tafsr, nafsr;
	uint64_t *acc_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
	ec_data_t ec_data[8];
	kstat_named_t *ec_knp;

	switch (ec_mirror) {
		default:
		case ECACHE_CPU_NON_MIRROR:
			/*
			 * The E$ scan rate is expressed in units of tenths of
			 * a percent.  ecache_scan_rate = 1000 (100%) means the
			 * whole cache is scanned every second.
			 */
			scan_lines = (nlines * ecache_scan_rate) /
					(1000 * ecache_calls_a_sec);
			if (!(ssmp->ecache_busy)) {
				if (ecache_idle_factor > 0) {
					scan_lines *= ecache_idle_factor;
				}
			} else {
				flush_clean_busy = (scan_lines *
					ecache_flush_clean_good_busy) / 100;
				flush_dirty_busy = (scan_lines *
					ecache_flush_dirty_good_busy) / 100;
			}

			ec_timeout_calls = (ecache_calls_a_sec ?
						ecache_calls_a_sec : 1);
			break;

		case ECACHE_CPU_MIRROR:
			scan_lines = ecache_lines_per_call_mirrored;
			ec_timeout_calls = (ecache_calls_a_sec_mirrored ?
					ecache_calls_a_sec_mirrored : 1);
			break;
	}

	/*
	 * The ecache scrubber algorithm operates by reading and
	 * decoding the E$ tag to determine whether the corresponding E$ line
	 * can be scrubbed. There is a implicit assumption in the scrubber
	 * logic that the E$ tag is valid. Unfortunately, this assertion is
	 * flawed since the E$ tag may also be corrupted and have parity errors
	 * The scrubber logic is enhanced to check the validity of the E$ tag
	 * before scrubbing. When a parity error is detected in the E$ tag,
	 * it is possible to recover and scrub the tag under certain conditions
	 * so that a ETP error condition can be avoided.
	 */

	for (mpb = line = 0; line < scan_lines; line++, mpb = 0) {
		/*
		 * We get the old-AFSR before clearing the AFSR sticky bits
		 * in {get_ecache_tag, check_ecache_line, get_ecache_dtag}
		 * If CP bit is set in the old-AFSR, we log an Orphan CP event.
		 */
		ec_tag = get_ecache_tag(index, &nafsr, acc_afsr);
		state = (uchar_t)((ec_tag & cpu_ec_state_mask) >>
				cpu_ec_state_shift);

		/*
		 * ETP is set try to scrub the ecache tag.
		 */
		if (nafsr & P_AFSR_ETP) {
			ecache_scrub_tag_err(nafsr, state, index);
		} else if (state & cpu_ec_state_valid) {
			/*
			 * ETP is not set, E$ tag is valid.
			 * Proceed with the E$ scrubbing.
			 */
			if (state & cpu_ec_state_dirty)
				mpb |= ECACHE_STATE_MODIFIED;

			tafsr = check_ecache_line(index, acc_afsr);

			if (tafsr & P_AFSR_EDP) {
				mpb |= ECACHE_STATE_PARITY;

				if (ecache_scrub_verbose ||
							ecache_scrub_panic) {
					get_ecache_dtag(P2ALIGN(index, 64),
						(uint64_t *)&ec_data[0],
						&ec_tag, &oafsr, acc_afsr);
				}
			}

			if (ssmp->ecache_busy)
				mpb |= ECACHE_STATE_BUSY;

			ec_knp = (kstat_named_t *)ec_ksp + mpb;
			ec_knp->value.ul++;

			paddr = ((ec_tag & cpu_ec_tag_mask) <<
				cpu_ec_tag_shift) | (index % ec_set_size);

			/*
			 * We flush the E$ lines depending on the ec_flush,
			 * we additionally flush clean_good_busy and
			 * dirty_good_busy lines for mirrored E$.
			 */
			if (ec_action[mpb].ec_flush == ALWAYS_FLUSH) {
				flushecacheline(paddr, ec_size);
			} else if ((ec_mirror == ECACHE_CPU_MIRROR) &&
				(ec_action[mpb].ec_flush == MIRROR_FLUSH)) {
					flushecacheline(paddr, ec_size);
			} else if (ec_action[mpb].ec_flush == NEVER_FLUSH) {
				softcall(ecache_page_retire, (void *)paddr);
			}

			/*
			 * Conditionally flush both the clean_good and
			 * dirty_good lines when busy.
			 */
			if (CGB(mpb, ec_mirror) && (flush_clean_busy > 0)) {
				flush_clean_busy--;
				flushecacheline(paddr, ec_size);
				ec_ksp->clean_good_busy_flush.value.ul++;
			} else if (DGB(mpb, ec_mirror) &&
						(flush_dirty_busy > 0)) {
				flush_dirty_busy--;
				flushecacheline(paddr, ec_size);
				ec_ksp->dirty_good_busy_flush.value.ul++;
			}

			if (ec_action[mpb].ec_log && (ecache_scrub_verbose ||
						ecache_scrub_panic)) {
				ecache_scrub_log(ec_data, ec_tag, paddr, mpb,
						tafsr);
			}

		} else {
			ec_ksp->invalid_lines.value.ul++;
		}

		if ((index += ec_linesize) >= ec_size)
			index = 0;

	}

	/*
	 * set the ecache scrub index for the next time around
	 */
	ssmp->ecache_flush_index = index;

	if (*acc_afsr & P_AFSR_CP) {
		uint64_t ret_afsr;

		ret_afsr = ecache_scrub_misc_err(CPU_ORPHAN_CP_ERR, *acc_afsr);
		if ((ret_afsr & P_AFSR_CP) == 0)
			*acc_afsr = 0;
	}
}

/*
 * Handler for ecache_scrub_inum softint.  Call scrub_ecache_line until
 * we decrement the outstanding request count to zero.
 */

/*ARGSUSED*/
uint_t
scrub_ecache_line_intr(caddr_t arg1, caddr_t arg2)
{
	int i;
	int outstanding;
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
	uint32_t *countp = &ssmp->ec_scrub_outstanding;

	do {
		outstanding = *countp;
		ASSERT(outstanding > 0);
		for (i = 0; i < outstanding; i++)
			scrub_ecache_line();
	} while (atomic_add_32_nv(countp, -outstanding));

	return (DDI_INTR_CLAIMED);
}

/*
 * force each cpu to perform an ecache scrub, called from a timeout
 */
extern xcfunc_t ecache_scrubreq_tl1;

void
do_scrub_ecache_line(void)
{
	long delta;

	if (ecache_calls_a_sec > hz)
		ecache_calls_a_sec = hz;
	else if (ecache_calls_a_sec <= 0)
	    ecache_calls_a_sec = 1;

	if (ecache_calls_a_sec_mirrored > hz)
		ecache_calls_a_sec_mirrored = hz;
	else if (ecache_calls_a_sec_mirrored <= 0)
	    ecache_calls_a_sec_mirrored = 1;

	if (ecache_scrub_enable) {
		xt_all(ecache_scrubreq_tl1, ecache_scrub_inum, 0);
		delta = hz / ec_timeout_calls;
	} else {
		delta = hz;
	}

	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
		delta);
}

/*
 * initialization for ecache scrubbing
 * This routine is called AFTER all cpus have had cpu_init_private called
 * to initialize their private data areas.
 */
void
cpu_init_cache_scrub(void)
{
	if (ecache_calls_a_sec > hz) {
		cmn_err(CE_NOTE, "ecache_calls_a_sec set too high (%d); "
		    "resetting to hz (%d)", ecache_calls_a_sec, hz);
		ecache_calls_a_sec = hz;
	}

	/*
	 * Register softint for ecache scrubbing.
	 */
	ecache_scrub_inum = add_softintr(ecache_scrub_pil,
	    scrub_ecache_line_intr, NULL);

	/*
	 * kick off the scrubbing using realtime timeout
	 */
	(void) realtime_timeout((void(*)(void *))do_scrub_ecache_line, 0,
	    hz / ecache_calls_a_sec);
}

/*
 * Unset the busy flag for this cpu.
 */
void
cpu_idle_ecache_scrub(struct cpu *cp)
{
	if (CPU_PRIVATE(cp) != NULL) {
		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
							sfpr_scrub_misc);
		ssmp->ecache_busy = ECACHE_CPU_IDLE;
	}
}

/*
 * Set the busy flag for this cpu.
 */
void
cpu_busy_ecache_scrub(struct cpu *cp)
{
	if (CPU_PRIVATE(cp) != NULL) {
		spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp,
							sfpr_scrub_misc);
		ssmp->ecache_busy = ECACHE_CPU_BUSY;
	}
}

/*
 * initialize the ecache scrubber data structures
 * The global entry point cpu_init_private replaces this entry point.
 *
 */
static void
cpu_init_ecache_scrub_dr(struct cpu *cp)
{
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);
	int cpuid = cp->cpu_id;

	/*
	 * intialize bookkeeping for cache scrubbing
	 */
	bzero(ssmp, sizeof (spitfire_scrub_misc_t));

	ssmp->ecache_flush_index = 0;

	ssmp->ecache_nlines =
		cpunodes[cpuid].ecache_size / cpunodes[cpuid].ecache_linesize;

	/*
	 * Determine whether we are running on mirrored SRAM
	 */

	if (cpunodes[cpuid].msram == ECACHE_CPU_MIRROR)
		ssmp->ecache_mirror = ECACHE_CPU_MIRROR;
	else
		ssmp->ecache_mirror = ECACHE_CPU_NON_MIRROR;

	cpu_busy_ecache_scrub(cp);

	/*
	 * initialize the kstats
	 */
	ecache_kstat_init(cp);
}

/*
 * uninitialize the ecache scrubber data structures
 * The global entry point cpu_uninit_private replaces this entry point.
 */
static void
cpu_uninit_ecache_scrub_dr(struct cpu *cp)
{
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);

	if (ssmp->ecache_ksp != NULL) {
		kstat_delete(ssmp->ecache_ksp);
		ssmp->ecache_ksp = NULL;
	}

	/*
	 * un-initialize bookkeeping for cache scrubbing
	 */
	bzero(ssmp, sizeof (spitfire_scrub_misc_t));

	cpu_idle_ecache_scrub(cp);
}

struct kmem_cache *sf_private_cache;

/*
 * Cpu private initialization.  This includes allocating the cpu_private
 * data structure, initializing it, and initializing the scrubber for this
 * cpu.  This is called once for EVERY cpu, including CPU 0. This function
 * calls cpu_init_ecache_scrub_dr to init the scrubber.
 * We use kmem_cache_create for the spitfire private data structure because it
 * needs to be allocated on a S_ECACHE_MAX_LSIZE (64) byte boundary.
 */
void
cpu_init_private(struct cpu *cp)
{
	spitfire_private_t *sfprp;

	ASSERT(CPU_PRIVATE(cp) == NULL);

	/*
	 * If the sf_private_cache has not been created, create it.
	 */
	if (sf_private_cache == NULL) {
		sf_private_cache = kmem_cache_create("sf_private_cache",
			sizeof (spitfire_private_t), S_ECACHE_MAX_LSIZE, NULL,
			NULL, NULL, NULL, NULL, 0);
		ASSERT(sf_private_cache);
	}

	sfprp = CPU_PRIVATE(cp) = kmem_cache_alloc(sf_private_cache, KM_SLEEP);

	bzero(sfprp, sizeof (spitfire_private_t));

	cpu_init_ecache_scrub_dr(cp);
}

/*
 * Cpu private unitialization.  Uninitialize the Ecache scrubber and
 * deallocate the scrubber data structures and cpu_private data structure.
 * For now, this function just calls cpu_unint_ecache_scrub_dr to uninit
 * the scrubber for the specified cpu.
 */
void
cpu_uninit_private(struct cpu *cp)
{
	ASSERT(CPU_PRIVATE(cp));

	cpu_uninit_ecache_scrub_dr(cp);
	kmem_cache_free(sf_private_cache, CPU_PRIVATE(cp));
	CPU_PRIVATE(cp) = NULL;
}

/*
 * initialize the ecache kstats for each cpu
 */
static void
ecache_kstat_init(struct cpu *cp)
{
	struct kstat *ksp;
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(cp, sfpr_scrub_misc);

	ASSERT(ssmp != NULL);

	if ((ksp = kstat_create("unix", cp->cpu_id, "ecache_kstat", "misc",
	    KSTAT_TYPE_NAMED,
	    sizeof (ecache_kstat_t) / sizeof (kstat_named_t),
	    KSTAT_FLAG_WRITABLE)) == NULL) {
		ssmp->ecache_ksp = NULL;
		cmn_err(CE_NOTE, "!ecache_kstat_init(%d) failed\n", cp->cpu_id);
		return;
	}

	ssmp->ecache_ksp = ksp;
	bcopy(&ec_kstat_template, ksp->ks_data, sizeof (ecache_kstat_t));
	kstat_install(ksp);
}

/*
 * log the bad ecache information
 */
static void
ecache_scrub_log(ec_data_t *ec_data, uint64_t ec_tag, uint64_t paddr, int mpb,
		uint64_t afsr)
{
	spitf_async_flt spf_flt;
	struct async_flt *aflt;
	int i;
	char *class;

	bzero(&spf_flt, sizeof (spitf_async_flt));
	aflt = &spf_flt.cmn_asyncflt;

	for (i = 0; i < 8; i++) {
		spf_flt.flt_ec_data[i] = ec_data[i];
	}

	spf_flt.flt_ec_tag = ec_tag;

	if (mpb < (sizeof (ec_action) / sizeof (ec_action[0]))) {
		spf_flt.flt_type = ec_action[mpb].ec_log_type;
	} else spf_flt.flt_type = (ushort_t)mpb;

	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_addr = paddr;
	aflt->flt_stat = afsr;
	aflt->flt_panic = (uchar_t)ecache_scrub_panic;

	switch (mpb) {
	case CPU_ECACHE_TAG_ERR:
	case CPU_ECACHE_ADDR_PAR_ERR:
	case CPU_ECACHE_ETP_ETS_ERR:
	case CPU_ECACHE_STATE_ERR:
		class = FM_EREPORT_CPU_USII_ESCRUB_TAG;
		break;
	default:
		class = FM_EREPORT_CPU_USII_ESCRUB_DATA;
		break;
	}

	cpu_errorq_dispatch(class, (void *)&spf_flt, sizeof (spf_flt),
	    ue_queue, aflt->flt_panic);

	if (aflt->flt_panic)
		cmn_err(CE_PANIC, "ecache_scrub_panic set and bad E$"
					"line detected");
}

/*
 * Process an ecache error that occured during the E$ scrubbing.
 * We do the ecache scan to find the bad line, flush the bad line
 * and start the memscrubber to find any UE (in memory or in another cache)
 */
static uint64_t
ecache_scrub_misc_err(int type, uint64_t afsr)
{
	spitf_async_flt spf_flt;
	struct async_flt *aflt;
	uint64_t oafsr;

	bzero(&spf_flt, sizeof (spitf_async_flt));
	aflt = &spf_flt.cmn_asyncflt;

	/*
	 * Scan each line in the cache to look for the one
	 * with bad parity
	 */
	aflt->flt_addr = AFLT_INV_ADDR;
	scan_ecache(&aflt->flt_addr, &spf_flt.flt_ec_data[0],
		&spf_flt.flt_ec_tag, &spf_flt.flt_ec_lcnt, &oafsr);

	if (oafsr & P_AFSR_CP) {
		uint64_t *cp_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);
		*cp_afsr |= oafsr;
	}

	/*
	 * If we found a bad PA, update the state to indicate if it is
	 * memory or I/O space.
	 */
	if (aflt->flt_addr != AFLT_INV_ADDR) {
		aflt->flt_in_memory = (pf_is_memory(aflt->flt_addr >>
			MMU_PAGESHIFT)) ? 1 : 0;
	}

	spf_flt.flt_type = (ushort_t)type;

	aflt->flt_inst = CPU->cpu_id;
	aflt->flt_class = CPU_FAULT;
	aflt->flt_id = gethrtime_waitfree();
	aflt->flt_status = afsr;
	aflt->flt_panic = (uchar_t)ecache_scrub_panic;

	/*
	 * We have the bad line, flush that line and start
	 * the memscrubber.
	 */
	if (spf_flt.flt_ec_lcnt > 0) {
		flushecacheline(P2ALIGN(aflt->flt_addr, 64),
			cpunodes[CPU->cpu_id].ecache_size);
		read_all_memscrub = 1;
		memscrub_run();
	}

	cpu_errorq_dispatch((type == CPU_ORPHAN_CP_ERR) ?
	    FM_EREPORT_CPU_USII_CP : FM_EREPORT_CPU_USII_UNKNOWN,
	    (void *)&spf_flt, sizeof (spf_flt), ue_queue, aflt->flt_panic);

	return (oafsr);
}

static void
ecache_scrub_tag_err(uint64_t afsr, uchar_t state, uint32_t index)
{
	ushort_t afsr_ets = (afsr & P_AFSR_ETS) >> P_AFSR_ETS_SHIFT;
	spitfire_scrub_misc_t *ssmp = CPU_PRIVATE_PTR(CPU, sfpr_scrub_misc);
	ecache_kstat_t *ec_ksp = (ecache_kstat_t *)ssmp->ecache_ksp->ks_data;
	uint64_t ec_tag, paddr, oafsr;
	ec_data_t ec_data[8];
	int cpuid = CPU->cpu_id;
	uint32_t ec_set_size = cpunodes[cpuid].ecache_size /
						ecache_associativity;
	uint64_t *cpu_afsr = CPU_PRIVATE_PTR(CPU, sfpr_scrub_afsr);

	get_ecache_dtag(P2ALIGN(index, 64), (uint64_t *)&ec_data[0], &ec_tag,
			&oafsr, cpu_afsr);
	paddr = ((ec_tag & cpu_ec_tag_mask) << cpu_ec_tag_shift) |
						(index % ec_set_size);

	/*
	 * E$ tag state has good parity
	 */
	if ((afsr_ets & cpu_ec_state_parity) == 0) {
		if (afsr_ets & cpu_ec_parity) {
			/*
			 * E$ tag state bits indicate the line is clean,
			 * invalidate the E$ tag and continue.
			 */
			if (!(state & cpu_ec_state_dirty)) {
				/*
				 * Zero the tag and mark the state invalid
				 * with good parity for the tag.
				 */
				if (isus2i || isus2e)
					write_hb_ec_tag_parity(index);
				else
					write_ec_tag_parity(index);

				/* Sync with the dual tag */
				flushecacheline(0,
					cpunodes[CPU->cpu_id].ecache_size);
				ec_ksp->tags_cleared.value.ul++;
				ecache_scrub_log(ec_data, ec_tag, paddr,
					CPU_ECACHE_TAG_ERR, afsr);
				return;
			} else {
				ecache_scrub_log(ec_data, ec_tag, paddr,
					CPU_ECACHE_ADDR_PAR_ERR, afsr);
				cmn_err(CE_PANIC, " E$ tag address has bad"
							" parity");
			}
		} else if ((afsr_ets & cpu_ec_parity) == 0) {
			/*
			 * ETS is zero but ETP is set
			 */
			ecache_scrub_log(ec_data, ec_tag, paddr,
				CPU_ECACHE_ETP_ETS_ERR, afsr);
			cmn_err(CE_PANIC, "AFSR.ETP is set and"
				" AFSR.ETS is zero");
		}
	} else {
		/*
		 * E$ tag state bit has a bad parity
		 */
		ecache_scrub_log(ec_data, ec_tag, paddr,
				CPU_ECACHE_STATE_ERR, afsr);
		cmn_err(CE_PANIC, "E$ tag state has bad parity");
	}
}

static void
ecache_page_retire(void *arg)
{
	uint64_t paddr = (uint64_t)arg;
	(void) page_retire(paddr, PR_UE);
}

void
sticksync_slave(void)
{}

void
sticksync_master(void)
{}

/*ARGSUSED*/
void
cpu_check_ce(int flag, uint64_t pa, caddr_t va, uint_t bpp)
{}

void
cpu_run_bus_error_handlers(struct async_flt *aflt, int expected)
{
	int status;
	ddi_fm_error_t de;

	bzero(&de, sizeof (ddi_fm_error_t));

	de.fme_ena = fm_ena_generate_cpu(aflt->flt_id, aflt->flt_inst,
	    FM_ENA_FMT1);
	de.fme_flag = expected;
	de.fme_bus_specific = (void *)aflt->flt_addr;
	status = ndi_fm_handler_dispatch(ddi_root_node(), NULL, &de);

	if ((aflt->flt_prot == AFLT_PROT_NONE) && (status == DDI_FM_FATAL))
		aflt->flt_panic = 1;
}

/*ARGSUSED*/
void
cpu_errorq_dispatch(char *error_class, void *payload, size_t payload_sz,
    errorq_t *eqp, uint_t flag)
{
	struct async_flt *aflt = (struct async_flt *)payload;

	aflt->flt_erpt_class = error_class;
	errorq_dispatch(eqp, payload, payload_sz, flag);
}

#define	MAX_SIMM	8

struct ce_info {
	char    name[UNUM_NAMLEN];
	uint64_t intermittent_total;
	uint64_t persistent_total;
	uint64_t sticky_total;
	unsigned short leaky_bucket_cnt;
};

/*
 * Separately-defined structure for use in reporting the ce_info
 * to SunVTS without exposing the internal layout and implementation
 * of struct ce_info.
 */
static struct ecc_error_info ecc_error_info_data = {
	{ "version", KSTAT_DATA_UINT32 },
	{ "maxcount", KSTAT_DATA_UINT32 },
	{ "count", KSTAT_DATA_UINT32 }
};
static const size_t ecc_error_info_ndata = sizeof (ecc_error_info_data) /
    sizeof (struct kstat_named);

#if KSTAT_CE_UNUM_NAMLEN < UNUM_NAMLEN
#error "Need to rev ecc_error_info version and update KSTAT_CE_UNUM_NAMLEN"
#endif

struct ce_info  *mem_ce_simm = NULL;
size_t mem_ce_simm_size = 0;

/*
 * Default values for the number of CE's allowed per interval.
 * Interval is defined in minutes
 * SOFTERR_MIN_TIMEOUT is defined in microseconds
 */
#define	SOFTERR_LIMIT_DEFAULT		2
#define	SOFTERR_INTERVAL_DEFAULT	1440		/* This is 24 hours */
#define	SOFTERR_MIN_TIMEOUT		(60 * MICROSEC)	/* This is 1 minute */
#define	TIMEOUT_NONE			((timeout_id_t)0)
#define	TIMEOUT_SET			((timeout_id_t)1)

/*
 * timeout identifer for leaky_bucket
 */
static timeout_id_t leaky_bucket_timeout_id = TIMEOUT_NONE;

/*
 * Tunables for maximum number of allowed CE's in a given time
 */
int ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
int ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;

void
cpu_mp_init(void)
{
	size_t size = cpu_aflt_size();
	size_t i;
	kstat_t *ksp;

	/*
	 * Initialize the CE error handling buffers.
	 */
	mem_ce_simm_size = MAX_SIMM * max_ncpus;
	size = sizeof (struct ce_info) * mem_ce_simm_size;
	mem_ce_simm = kmem_zalloc(size, KM_SLEEP);

	ksp = kstat_create("unix", 0, "ecc-info", "misc",
	    KSTAT_TYPE_NAMED, ecc_error_info_ndata, KSTAT_FLAG_VIRTUAL);
	if (ksp != NULL) {
		ksp->ks_data = (struct kstat_named *)&ecc_error_info_data;
		ecc_error_info_data.version.value.ui32 = KSTAT_CE_INFO_VER;
		ecc_error_info_data.maxcount.value.ui32 = mem_ce_simm_size;
		ecc_error_info_data.count.value.ui32 = 0;
		kstat_install(ksp);
	}

	for (i = 0; i < mem_ce_simm_size; i++) {
		struct kstat_ecc_mm_info *kceip;

		kceip = kmem_zalloc(sizeof (struct kstat_ecc_mm_info),
		    KM_SLEEP);
		ksp = kstat_create("mm", i, "ecc-info", "misc",
		    KSTAT_TYPE_NAMED,
		    sizeof (struct kstat_ecc_mm_info) / sizeof (kstat_named_t),
		    KSTAT_FLAG_VIRTUAL);
		if (ksp != NULL) {
			/*
			 * Re-declare ks_data_size to include room for the
			 * UNUM name since we don't have KSTAT_FLAG_VAR_SIZE
			 * set.
			 */
			ksp->ks_data_size = sizeof (struct kstat_ecc_mm_info) +
			    KSTAT_CE_UNUM_NAMLEN;
			ksp->ks_data = kceip;
			kstat_named_init(&kceip->name,
			    "name", KSTAT_DATA_STRING);
			kstat_named_init(&kceip->intermittent_total,
			    "intermittent_total", KSTAT_DATA_UINT64);
			kstat_named_init(&kceip->persistent_total,
			    "persistent_total", KSTAT_DATA_UINT64);
			kstat_named_init(&kceip->sticky_total,
			    "sticky_total", KSTAT_DATA_UINT64);
			/*
			 * Use the default snapshot routine as it knows how to
			 * deal with named kstats with long strings.
			 */
			ksp->ks_update = ecc_kstat_update;
			kstat_install(ksp);
		} else {
			kmem_free(kceip, sizeof (struct kstat_ecc_mm_info));
		}
	}
}

/*ARGSUSED*/
static void
leaky_bucket_timeout(void *arg)
{
	int i;
	struct ce_info *psimm = mem_ce_simm;

	for (i = 0; i < mem_ce_simm_size; i++) {
		if (psimm[i].leaky_bucket_cnt > 0)
			atomic_add_16(&psimm[i].leaky_bucket_cnt, -1);
	}
	add_leaky_bucket_timeout();
}

static void
add_leaky_bucket_timeout(void)
{
	long timeout_in_microsecs;

	/*
	 * create timeout for next leak.
	 *
	 * The timeout interval is calculated as follows
	 *
	 * (ecc_softerr_interval * 60 * MICROSEC) / ecc_softerr_limit
	 *
	 * ecc_softerr_interval is in minutes, so multiply this by 60 (seconds
	 * in a minute), then multiply this by MICROSEC to get the interval
	 * in microseconds.  Divide this total by ecc_softerr_limit so that
	 * the timeout interval is accurate to within a few microseconds.
	 */

	if (ecc_softerr_limit <= 0)
		ecc_softerr_limit = SOFTERR_LIMIT_DEFAULT;
	if (ecc_softerr_interval <= 0)
		ecc_softerr_interval = SOFTERR_INTERVAL_DEFAULT;

	timeout_in_microsecs = ((int64_t)ecc_softerr_interval * 60 * MICROSEC) /
	    ecc_softerr_limit;

	if (timeout_in_microsecs < SOFTERR_MIN_TIMEOUT)
		timeout_in_microsecs = SOFTERR_MIN_TIMEOUT;

	leaky_bucket_timeout_id = timeout(leaky_bucket_timeout,
	    (void *)NULL, drv_usectohz((clock_t)timeout_in_microsecs));
}

/*
 * Legacy Correctable ECC Error Hash
 *
 * All of the code below this comment is used to implement a legacy array
 * which counted intermittent, persistent, and sticky CE errors by unum,
 * and then was later extended to publish the data as a kstat for SunVTS.
 * All of this code is replaced by FMA, and remains here until such time
 * that the UltraSPARC-I/II CPU code is converted to FMA, or is EOLed.
 *
 * Errors are saved in three buckets per-unum:
 * (1) sticky - scrub was unsuccessful, cannot be scrubbed
 *     This could represent a problem, and is immediately printed out.
 * (2) persistent - was successfully scrubbed
 *     These errors use the leaky bucket algorithm to determine
 *     if there is a serious problem.
 * (3) intermittent - may have originated from the cpu or upa/safari bus,
 *     and does not necessarily indicate any problem with the dimm itself,
 *     is critical information for debugging new hardware.
 *     Because we do not know if it came from the dimm, it would be
 *     inappropriate to include these in the leaky bucket counts.
 *
 * If the E$ line was modified before the scrub operation began, then the
 * displacement flush at the beginning of scrubphys() will cause the modified
 * line to be written out, which will clean up the CE.  Then, any subsequent
 * read will not cause an error, which will cause persistent errors to be
 * identified as intermittent.
 *
 * If a DIMM is going bad, it will produce true persistents as well as
 * false intermittents, so these intermittents can be safely ignored.
 *
 * If the error count is excessive for a DIMM, this function will return
 * PR_MCE, and the CPU module may then decide to remove that page from use.
 */
static int
ce_count_unum(int status, int len, char *unum)
{
	int i;
	struct ce_info *psimm = mem_ce_simm;
	int page_status = PR_OK;

	ASSERT(psimm != NULL);

	if (len <= 0 ||
	    (status & (ECC_STICKY | ECC_PERSISTENT | ECC_INTERMITTENT)) == 0)
		return (page_status);

	/*
	 * Initialize the leaky_bucket timeout
	 */
	if (casptr(&leaky_bucket_timeout_id,
	    TIMEOUT_NONE, TIMEOUT_SET) == TIMEOUT_NONE)
		add_leaky_bucket_timeout();

	for (i = 0; i < mem_ce_simm_size; i++) {
		if (psimm[i].name[0] == '\0') {
			/*
			 * Hit the end of the valid entries, add
			 * a new one.
			 */
			(void) strncpy(psimm[i].name, unum, len);
			if (status & ECC_STICKY) {
				/*
				 * Sticky - the leaky bucket is used to track
				 * soft errors.  Since a sticky error is a
				 * hard error and likely to be retired soon,
				 * we do not count it in the leaky bucket.
				 */
				psimm[i].leaky_bucket_cnt = 0;
				psimm[i].intermittent_total = 0;
				psimm[i].persistent_total = 0;
				psimm[i].sticky_total = 1;
				cmn_err(CE_WARN,
				    "[AFT0] Sticky Softerror encountered "
				    "on Memory Module %s\n", unum);
				page_status = PR_MCE;
			} else if (status & ECC_PERSISTENT) {
				psimm[i].leaky_bucket_cnt = 1;
				psimm[i].intermittent_total = 0;
				psimm[i].persistent_total = 1;
				psimm[i].sticky_total = 0;
			} else {
				/*
				 * Intermittent - Because the scrub operation
				 * cannot find the error in the DIMM, we will
				 * not count these in the leaky bucket
				 */
				psimm[i].leaky_bucket_cnt = 0;
				psimm[i].intermittent_total = 1;
				psimm[i].persistent_total = 0;
				psimm[i].sticky_total = 0;
			}
			ecc_error_info_data.count.value.ui32++;
			break;
		} else if (strncmp(unum, psimm[i].name, len) == 0) {
			/*
			 * Found an existing entry for the current
			 * memory module, adjust the counts.
			 */
			if (status & ECC_STICKY) {
				psimm[i].sticky_total++;
				cmn_err(CE_WARN,
				    "[AFT0] Sticky Softerror encountered "
				    "on Memory Module %s\n", unum);
				page_status = PR_MCE;
			} else if (status & ECC_PERSISTENT) {
				int new_value;

				new_value = atomic_add_16_nv(
				    &psimm[i].leaky_bucket_cnt, 1);
				psimm[i].persistent_total++;
				if (new_value > ecc_softerr_limit) {
					cmn_err(CE_WARN, "[AFT0] Most recent %d"
					    " soft errors from Memory Module"
					    " %s exceed threshold (N=%d,"
					    " T=%dh:%02dm) triggering page"
					    " retire", new_value, unum,
					    ecc_softerr_limit,
					    ecc_softerr_interval / 60,
					    ecc_softerr_interval % 60);
					atomic_add_16(
					    &psimm[i].leaky_bucket_cnt, -1);
					page_status = PR_MCE;
				}
			} else { /* Intermittent */
				psimm[i].intermittent_total++;
			}
			break;
		}
	}

	if (i >= mem_ce_simm_size)
		cmn_err(CE_CONT, "[AFT0] Softerror: mem_ce_simm[] out of "
		    "space.\n");

	return (page_status);
}

/*
 * Function to support counting of IO detected CEs.
 */
void
cpu_ce_count_unum(struct async_flt *ecc, int len, char *unum)
{
	int err;

	err = ce_count_unum(ecc->flt_status, len, unum);
	if (err != PR_OK && automatic_page_removal) {
		(void) page_retire(ecc->flt_addr, err);
	}
}

static int
ecc_kstat_update(kstat_t *ksp, int rw)
{
	struct kstat_ecc_mm_info *kceip = ksp->ks_data;
	struct ce_info *ceip = mem_ce_simm;
	int i = ksp->ks_instance;

	if (rw == KSTAT_WRITE)
		return (EACCES);

	ASSERT(ksp->ks_data != NULL);
	ASSERT(i < mem_ce_simm_size && i >= 0);

	/*
	 * Since we're not using locks, make sure that we don't get partial
	 * data. The name is always copied before the counters are incremented
	 * so only do this update routine if at least one of the counters is
	 * non-zero, which ensures that ce_count_unum() is done, and the
	 * string is fully copied.
	 */
	if (ceip[i].intermittent_total == 0 &&
	    ceip[i].persistent_total == 0 &&
	    ceip[i].sticky_total == 0) {
		/*
		 * Uninitialized or partially initialized. Ignore.
		 * The ks_data buffer was allocated via kmem_zalloc,
		 * so no need to bzero it.
		 */
		return (0);
	}

	kstat_named_setstr(&kceip->name, ceip[i].name);
	kceip->intermittent_total.value.ui64 = ceip[i].intermittent_total;
	kceip->persistent_total.value.ui64 = ceip[i].persistent_total;
	kceip->sticky_total.value.ui64 = ceip[i].sticky_total;

	return (0);
}

#define	VIS_BLOCKSIZE		64

int
dtrace_blksuword32_err(uintptr_t addr, uint32_t *data)
{
	int ret, watched;

	watched = watch_disable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);
	ret = dtrace_blksuword32(addr, data, 0);
	if (watched)
		watch_enable_addr((void *)addr, VIS_BLOCKSIZE, S_WRITE);

	return (ret);
}

/*ARGSUSED*/
void
cpu_faulted_enter(struct cpu *cp)
{
}

/*ARGSUSED*/
void
cpu_faulted_exit(struct cpu *cp)
{
}

static int mmu_disable_ism_large_pages = ((1 << TTE512K) |
	(1 << TTE32M) | (1 << TTE256M));
static int mmu_disable_large_pages = ((1 << TTE32M) | (1 << TTE256M));

/*
 * The function returns the US_II mmu-specific values for the
 * hat's disable_large_pages and disable_ism_large_pages variables.
 */
int
mmu_large_pages_disabled(uint_t flag)
{
	int pages_disable = 0;

	if (flag == HAT_LOAD) {
		pages_disable = mmu_disable_large_pages;
	} else if (flag == HAT_LOAD_SHARE) {
		pages_disable = mmu_disable_ism_large_pages;
	}
	return (pages_disable);
}

/*ARGSUSED*/
void
mmu_init_kernel_pgsz(struct hat *hat)
{
}

size_t
mmu_get_kernel_lpsize(size_t lpsize)
{
	uint_t tte;

	if (lpsize == 0) {
		/* no setting for segkmem_lpsize in /etc/system: use default */
		return (MMU_PAGESIZE4M);
	}

	for (tte = TTE8K; tte <= TTE4M; tte++) {
		if (lpsize == TTEBYTES(tte))
			return (lpsize);
	}

	return (TTEBYTES(TTE8K));
}