1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#pragma ident	"%Z%%M%	%I%	%E% SMI"
28
29#pragma dictionary "AMD"
30
31/*
32 * Eversholt rules for the AMD Opteron CPU/Memory
33 */
34
35fru motherboard;
36fru chip;
37fru dimm;
38
39asru chip/cpu;
40asru dimm;
41asru dimm/rank;
42asru dram-channel;
43asru chip/memory-controller/chip-select;
44
45#define	MAX(x, y) ((x) >= (y) ? (x) : (y))
46#define	MIN(x, y) ((x) <= (y) ? (x) : (y))
47
48/*
49 * GET_ADDR relies on the fact that variables have global scope across an FME.
50 * Thus for each FME the assignment only occurs for the first invocation
51 * but the comparison happens on each. Thus if the new address matches the
52 * address of an existing open FME, then we return true running in the context
53 * of that FME. If the new address doesn't match the address of any existing
54 * open FME, then we return true in the context of a newly opened FME.
55 */
56#define GET_ADDR (defined($addr) ? ($addr == payloadprop("addr")) :	\
57	($addr = payloadprop("addr")))
58
59#define	GET_OFFSET ($offset = payloadprop("resource[0].hc-specific.offset"))
60
61/*
62 * SET_ADDR and SET_OFFSET are used to set a payload value in the fault that
63 * we diagnose for page faults, to record the physical address of the faulting
64 * page.  The "asru-" prefix is hooked in the "rewrite-ASRU" confcalls made on
65 * diagnosis of associated faults when the libtopo mem scheme rewrites the
66 * asru in "mem" scheme.
67 */
68#define	SET_ADDR (setpayloadprop("asru-physaddr", $addr))
69
70#define	SET_OFFSET (setpayloadprop("asru-offset", $offset))
71
72/*
73 * RESOURCE_EXISTS is true if a member with name "resource" exists in the
74 * payload - regardless of type (e.g., nvlist or nvlist array) or value.
75 */
76#define	RESOURCE_EXISTS	(payloadprop_defined("resource"))
77
78/*
79 * CONTAINS_RANK is true if the "resource" nvlist array (as used in memory
80 * ereports) exists and one if its members matches the path for the
81 * rank node.  Our memory propogation are of the form
82 *
83 * "prop foo@chip/memory-controller/dimm/rank -> blah@chip/cpu"
84 *
85 * since cpus detect memory errors;  in eversholt such a propogation, where
86 * the lhs path and rhs path do not match, expands to the cross-product of
87 * all dimms, ranks and cpus on the same chip (since chip appears in the
88 * path on both sides).  We use CONTAINS_RANK to constrain the propogation
89 * such that it only happens if the payload resource matches the rank.
90 */
91#define	CONTAINS_RANK (payloadprop_contains("resource", \
92	asru(chip/memory-controller/dimm/rank)))
93
94/*
95 * The following will tell us whether a syndrome that is known to be
96 * correctable (from a mem_ce ereport) is single-bit or multi-bit.  For a
97 * correctable ChipKill syndrome the number of bits set in the lowest
98 * nibble indicates how many bits were in error.
99 */
100
101#define	CBITMASK(synd) ((synd) & 0xf)
102
103#define	CKSINGLE(synd)							\
104	((synd) == 0 ||							\
105	(CBITMASK(synd) == 0x1 || CBITMASK(synd) == 0x2 ||		\
106	CBITMASK(synd) == 0x4 || CBITMASK(synd) == 0x8))
107
108#define	SINGLE_BIT_CE							\
109	(payloadprop("syndrome-type") == "E" ||				\
110	(payloadprop("syndrome-type") == "C" &&				\
111	CKSINGLE(payloadprop("syndrome"))))
112
113#define	MULTI_BIT_CE							\
114	(payloadprop("syndrome-type") == "C" &&				\
115	!CKSINGLE(payloadprop("syndrome")))
116
117/*
118 * A single bit fault in a memory rank can cause:
119 *
120 *  - mem_ce : reported by nb
121 *  - inf_sys_ecc1: reported by ic or dc; inf_sys_ecc1 errors detected at the
122 *    ic do not record a syndrome; these errors will not be triggered in
123 *    ChipKill ECC mode (the NB corrects all ECC errors in that mode)
124 *  - s_ecc1: reported by bu; this error will not be triggered in ChipKill
125 *    ECC mode (the NB corrects all ECC in that mode)
126 *
127 * Single-bit errors are fed into a per-rank SERD engine; if a SERD engine
128 * trips we diagnose a fault.memory.page so that the response agent can
129 * retire the page that caused the trip.  If the total number of pages
130 * faulted in this way on a single rank exceeds a threshold we will
131 * diagnose a fault.memory.dimm_sb against the containing.
132 *
133 * Multibit ChipKill-correctable errors are treated identically to
134 * single-bit errors, but via separate serd engines to allow distinct
135 * parameters if desired.
136 *
137 * Uncorrectable errors produce an immediate page fault and corresponding
138 * fault.memory.dimm_ue.
139 *
140 * Page faults are essentially internal - action is only required when
141 * they are accompanied by a dimm fault.  As such we include message=0
142 * on page faults.
143 */
144
145event ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu{within(5s)};
146event ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu{within(5s)};
147event ereport.cpu.amd.bu.s_ecc1@chip/cpu{within(5s)};
148event ereport.cpu.amd.nb.mem_ce@chip/cpu{within(5s)};
149
150/*
151 * If the address is not valid then no resource member will be included
152 * in a nb.mem_ce or nb.mem_ue ereport.  These cases should be rare.
153 * We will also discard all inf_sys_ecc1 events detected at the ic since they
154 * have no syndrome and therefore no resource information.
155 * We will discard such ereports.  An alternative may be to SERD them
156 * on a per MC basis and trip if we see too many such events.
157 */
158
159event upset.memory.discard1@chip/cpu;
160
161/*								#PAGE#
162 * Single-bit correctable errors are diagnosed as upsets and feed into per-rank
163 * SERD engines which diagnose fault.memory.page_sb if they trip.
164 *
165 * Multi-bit correctable (via ChipKill) errors are diagnosed as upsets and feed
166 * into additional per-rank SERD engines which diagnose fault.memory.page_ck
167 * if they trip.
168 *
169 * The number of fault.memory.page and fault.memory.page_ck diagnosed is
170 * counted in stat engines for each type.  These are used in deciding
171 * whether to declare a dimm faulty after repeated page faults.
172 */
173
174#define PAGE_FIT		1
175#define PAGE_SB_COUNT		2
176#define PAGE_SB_TIME		72h
177#define	PAGE_CK_COUNT		2
178#define	PAGE_CK_TIME		72h
179
180/*
181 * The fraction of pages on a single rank that must be diagnosed as faulty
182 * with single correctable unit faults before we will fault the rank.
183 * Once we have faulted the rank we will continue to diagnose any further page
184 * faults on the rank up to some maximum multiple of the threshold at which
185 * we faulted the dimm.  This allows us to potentially contain some fairly
186 * far-reaching but still limited-extent fault (such as a partial column
187 * failure) without getting carried away and allowing a single faulty rank to
188 * use up the entire system-imposed page retirenment limit (which, once
189 * reached, causes retirement request to have no effect other than to fill
190 * the fault manager cache and logs).
191 *
192 * This fraction is specified in basis points, where 100 basis points are
193 * equivalent to 1 percent.  It is applied on a per-rank basis.
194 *
195 * The system imposes an absolute maximum on the number of pages it will
196 * retire;  the current value is 10 basis points, or 0.1% of 'physmem'.  Note
197 * that 'physmem' is reduced from installed memory pages by an amount
198 * reflecting permanent kernel memory allocations.  This system page retire
199 * limit bounds the maximum real response to page faults across all ranks
200 * that fault manager response agents can effect, but it should not be confused
201 * with any diagnosis threshold (i.e., the number of faulty pages we are
202 * prepared to tolerate from a single rank before faulting the rank is
203 * distinct from the total number of pages we are prepared to retire from use
204 * in response to that and other faults).  It is, however, desirable to
205 * arrange that the maximum number of pages we are prepared to fault from
206 * any one rank is less than the system-wide quota.
207 */
208#define	PAGE_RETIRE_LIMIT_BPS	5		/* or 0.05%; ~ 131 pages/GB %/
209
210/*
211 * A macro to manipulate the above fraction.  Given a size in bytes convert
212 * this to pages (4K pagesize) and calculate the number of those pages
213 * indicated by PAGE_RETIRE_LIMIT_BPS basis points.
214 */
215#define	_BPS_PGCNT(totalbytes) \
216	((((totalbytes) / 4096 ) * PAGE_RETIRE_LIMIT_BPS) / 10000)
217
218/*
219 * The single-correctable-unit threshold at which number of faulted pages
220 * on a rank we we fault the rank.  We insist that this be at least 128 and
221 * never more than 512.
222 */
223#define	RANK_THRESH MIN(512, MAX(128, \
224	_BPS_PGCNT(confprop(asru(chip/memory-controller/dimm/rank), "size"))))
225
226/*
227 * The maximum number of single-correctable-unit page faults we will diagnose
228 * on a single rank (must be greater than RANK_THRESH).  We set
229 * this at twice the rank fault threshold.
230 */
231#define	RANK_PGFLT_MAX (2 * RANK_THRESH)
232
233engine stat.sbpgflt@chip/memory-controller/dimm/rank;
234engine stat.ckpgflt@chip/memory-controller/dimm/rank;
235
236event fault.memory.page_sb@chip/memory-controller/dimm/rank,
237    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
238    count=stat.sbpgflt@chip/memory-controller/dimm/rank,
239    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
240
241#define	SB_PGFLTS (count(stat.sbpgflt@chip/memory-controller/dimm/rank))
242
243event fault.memory.page_ck@chip/memory-controller/dimm/rank,
244    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
245    count=stat.ckpgflt@chip/memory-controller/dimm/rank,
246    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
247
248#define	CK_PGFLTS (count(stat.ckpgflt@chip/memory-controller/dimm/rank))
249
250#define	RANK_PGFLT_LIMIT_REACHED \
251    (SB_PGFLTS + CK_PGFLTS > RANK_PGFLT_MAX)
252
253event ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
254engine serd.memory.page_sb@chip/memory-controller/dimm/rank,
255    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
256    trip=ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
257event upset.memory.page_sb@chip/memory-controller/dimm/rank,
258    engine=serd.memory.page_sb@chip/memory-controller/dimm/rank;
259
260event ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
261engine serd.memory.page_ck@chip/memory-controller/dimm/rank,
262    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
263    trip=ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
264event upset.memory.page_ck@chip/memory-controller/dimm/rank,
265    engine=serd.memory.page_ck@chip/memory-controller/dimm/rank;
266
267event upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank;
268
269/*
270 * If we have not reached the per-rank limit on faulted pages then
271 * continue to explain ereport observations as upsets which can lead
272 * lead to page fault diagnoses if the serd engine trips.
273 */
274prop upset.memory.page_sb@chip/memory-controller/dimm/rank (0)->
275    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu
276	{ CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED },
277    ereport.cpu.amd.bu.s_ecc1@chip/cpu
278 	{ CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED },
279    ereport.cpu.amd.nb.mem_ce@chip/cpu
280	{ CONTAINS_RANK && SINGLE_BIT_CE && !RANK_PGFLT_LIMIT_REACHED };
281
282prop upset.memory.page_ck@chip/memory-controller/dimm/rank (0)->
283    /* no dc.inf_sys_ecc1 or bu.s_ecc1 in ChipKill mode */
284    ereport.cpu.amd.nb.mem_ce@chip/cpu
285	{ CONTAINS_RANK && MULTI_BIT_CE && !RANK_PGFLT_LIMIT_REACHED };
286
287/*
288 * If we have reached the per-rank limit on faulted pages then diagnose
289 * further observations on the rank to a engine-less upset (i.e., discard
290 * them).
291 */
292prop upset.memory.overpgfltlimit@chip/memory-controller/dimm/rank (1)->
293    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu
294	{ CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED },
295    ereport.cpu.amd.bu.s_ecc1@chip/cpu
296	{ CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED },
297    ereport.cpu.amd.nb.mem_ce@chip/cpu
298	{ CONTAINS_RANK && RANK_PGFLT_LIMIT_REACHED };
299
300prop fault.memory.page_sb@chip/memory-controller/dimm/rank (1)->
301    ereport.memory.page_sb_trip@chip/memory-controller/dimm/rank;
302
303prop fault.memory.page_ck@chip/memory-controller/dimm/rank (1)->
304    ereport.memory.page_ck_trip@chip/memory-controller/dimm/rank;
305
306prop fault.memory.page_sb@chip/memory-controller/dimm/rank
307    { SET_ADDR && SET_OFFSET } (0)->
308    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu
309	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET },
310    ereport.cpu.amd.bu.s_ecc1@chip/cpu
311	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET },
312    ereport.cpu.amd.nb.mem_ce@chip/cpu
313	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET };
314
315prop fault.memory.page_ck@chip/memory-controller/dimm/rank
316    { SET_ADDR && SET_OFFSET } (0)->
317    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu
318	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET },
319    ereport.cpu.amd.bu.s_ecc1@chip/cpu
320	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET },
321    ereport.cpu.amd.nb.mem_ce@chip/cpu
322	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET };
323
324prop upset.memory.discard1@chip/cpu (1)->
325    ereport.cpu.amd.ic.inf_sys_ecc1@chip/cpu; /* always discard - no resource */
326
327prop upset.memory.discard1@chip/cpu (1)->
328    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu { !RESOURCE_EXISTS },
329    ereport.cpu.amd.bu.s_ecc1@chip/cpu { !RESOURCE_EXISTS },
330    ereport.cpu.amd.nb.mem_ce@chip/cpu { !RESOURCE_EXISTS };
331
332/*								#DIMM_SCU#
333 * "Single-correctable-unit" DIMM faults are diagnosed when the total number of
334 * page faults (diagnosed from repeated single-bit or multibit-chipkills)
335 * from any one rank on that DIMM reaches a threshold.  A "correctable unit"
336 * is a single bit in normal 64/8 ECC mode, or a single symbol in ChipKill
337 * 128/16 mode (i.e., nibble-aligned nibble for the code used on Opteron).
338 *
339 * We do not stop diagnosing further single-bit page faults once we have
340 * declared a single-bit DIMM fault - we continue diagnosing them and
341 * response agents can continue to retire those pages up to the system-imposed
342 * retirement limit.
343 *
344 * Two distinct fault types may be diagnosed - fault.memory.dimm_sb and
345 * fault.memory.dimm_ck.  Which one is diagnosed depends on whether we
346 * have reached the threshold for a majority of single-bit page faults or
347 * multibit page faults.
348 *
349 * Implementation: we maintain parallel SERD engines to the page_sb and
350 * page_ck engines, which trip in unison.  On trip it generates a distinct
351 * ereport which we diagnose to a fault if the threshold has been
352 * reached, or to a throwaway upset if not.
353 *
354 */
355
356#define DIMM_SB_FIT		2000
357#define DIMM_CK_FIT		4000
358
359event fault.memory.dimm_sb@chip/memory-controller/dimm/rank,
360    FITrate=DIMM_SB_FIT, FRU=dimm, ASRU=dimm,
361    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
362
363event fault.memory.dimm_ck@chip/memory-controller/dimm/rank,
364    FITrate=DIMM_CK_FIT, FRU=dimm, ASRU=dimm,
365    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
366
367event ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
368	{ within(5s) };
369engine serd.memory.dimm_sb@chip/memory-controller/dimm/rank,
370    N=PAGE_SB_COUNT, T=PAGE_SB_TIME, method=persistent,
371    trip=ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank;
372event upset.memory.dimm_sb@chip/memory-controller/dimm/rank,
373    engine=serd.memory.dimm_sb@chip/memory-controller/dimm/rank;
374
375event ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
376	{ within(5s) };
377engine serd.memory.dimm_ck@chip/memory-controller/dimm/rank,
378    N=PAGE_CK_COUNT, T=PAGE_CK_TIME, method=persistent,
379    trip=ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
380event upset.memory.dimm_ck@chip/memory-controller/dimm/rank,
381    engine=serd.memory.dimm_ck@chip/memory-controller/dimm/rank;
382
383event upset.memory.discard2@chip/memory-controller/dimm/rank;
384
385prop upset.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
386    ereport.cpu.amd.dc.inf_sys_ecc1@chip/cpu { CONTAINS_RANK && SINGLE_BIT_CE },
387    ereport.cpu.amd.bu.s_ecc1@chip/cpu { CONTAINS_RANK && SINGLE_BIT_CE },
388    ereport.cpu.amd.nb.mem_ce@chip/cpu { CONTAINS_RANK && SINGLE_BIT_CE };
389
390prop upset.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
391    ereport.cpu.amd.nb.mem_ce@chip/cpu { CONTAINS_RANK && MULTI_BIT_CE };
392
393/*
394 * The following two propogations diagnose a fault.memory.dimm_sb when
395 * either the dimm_sb or dimm_ck engine trips (for a new page fault)
396 * and the total number of page faults (sb and ck) exceeds the threshold
397 * value with the majority being from sb page faults.
398 */
399prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
400    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
401    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
402
403prop fault.memory.dimm_sb@chip/memory-controller/dimm/rank (0)->
404    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
405    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && SB_PGFLTS > RANK_THRESH / 2 };
406
407/*
408 * The following two propogation diagnose a fault.memory.dimm_ck when
409 * either the dimm_sb or dimm_ck engine trip (for a new page fault)
410 * and the total number of page faults (sb and ck) exceeds the threshold
411 * value with the majority  being from ck page faults.
412 */
413prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
414    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank
415    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
416
417prop fault.memory.dimm_ck@chip/memory-controller/dimm/rank (0)->
418    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank
419    { SB_PGFLTS + CK_PGFLTS > RANK_THRESH && CK_PGFLTS > RANK_THRESH / 2 };
420
421prop upset.memory.discard2@chip/memory-controller/dimm/rank (1)->
422    ereport.memory.dimm_sb_trip@chip/memory-controller/dimm/rank,
423    ereport.memory.dimm_ck_trip@chip/memory-controller/dimm/rank;
424
425/* 								#DIMM_UE#
426 *								#PAGE_UE#
427 * An uncorrectable multi-bit fault in a memory dimm can cause:
428 *
429 *  - mem_ue    	   : reported by nb for an access from a remote cpu
430 *  - inf_sys_eccm : reported by ic or dc; the ic does not report a syndrome
431 *  - s_eccm	   : reported by bu
432 *
433 * Note we use a SERD engine here simply as a way of ensuring that we get
434 * both dimm and page faults reported.
435 *
436 * Since on production systems we force HT Sync Flood on uncorrectable
437 * memory errors (if not already set as such by the BIOS, as it should be)
438 * we won't actually receive these ereports since the system will be reset.
439 */
440
441#define DIMM_UE_FIT		6000
442
443event ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu{within(5s)};
444event ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu{within(5s)};
445event ereport.cpu.amd.bu.s_eccm@chip/cpu{within(5s)};
446event ereport.cpu.amd.nb.mem_ue@chip/cpu{within(5s)};
447
448event fault.memory.dimm_ue@chip/memory-controller/dimm/rank,
449    FITrate=DIMM_UE_FIT, FRU=dimm, ASRU=dimm,
450    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
451
452event fault.memory.page_ue@chip/memory-controller/dimm/rank,
453    FITrate=PAGE_FIT, ASRU=dimm/rank, message=0,
454    action=confcall("rewrite-ASRU"); /* rewrite ASRU to identify page in rank */
455
456event ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
457engine serd.memory.dimm_ue@chip/memory-controller/dimm/rank,
458    N=0, T=1h, method=persistent,
459    trip=ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
460event upset.memory.dimm_ue@chip/memory-controller/dimm/rank,
461    engine=serd.memory.dimm_ue@chip/memory-controller/dimm/rank;
462
463event ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
464engine serd.memory.page_ue@chip/memory-controller/dimm/rank,
465    N=0, T=1h, method=persistent,
466    trip=ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
467event upset.memory.page_ue@chip/memory-controller/dimm/rank,
468    engine=serd.memory.page_ue@chip/memory-controller/dimm/rank;
469
470event upset.memory.discard3@chip/cpu;
471
472prop upset.memory.page_ue@chip/memory-controller/dimm/rank (0)->
473    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu { CONTAINS_RANK },
474    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu { CONTAINS_RANK },
475    ereport.cpu.amd.bu.s_eccm@chip/cpu { CONTAINS_RANK },
476    ereport.cpu.amd.nb.mem_ue@chip/cpu { CONTAINS_RANK };
477
478prop upset.memory.dimm_ue@chip/memory-controller/dimm/rank (0)->
479    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu { CONTAINS_RANK },
480    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu { CONTAINS_RANK },
481    ereport.cpu.amd.bu.s_eccm@chip/cpu { CONTAINS_RANK },
482    ereport.cpu.amd.nb.mem_ue@chip/cpu { CONTAINS_RANK };
483
484prop fault.memory.page_ue@chip/memory-controller/dimm/rank (1)->
485    ereport.memory.page_ue_trip@chip/memory-controller/dimm/rank;
486
487prop fault.memory.page_ue@chip/memory-controller/dimm/rank
488    { SET_ADDR && SET_OFFSET } (0)->
489    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu
490	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET},
491    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu
492	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET},
493    ereport.cpu.amd.bu.s_eccm@chip/cpu
494	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET},
495    ereport.cpu.amd.nb.mem_ue@chip/cpu
496	{ CONTAINS_RANK && GET_ADDR && GET_OFFSET };
497
498prop fault.memory.dimm_ue@chip/memory-controller/dimm/rank (1)->
499    ereport.memory.dimm_ue_trip@chip/memory-controller/dimm/rank;
500
501prop upset.memory.discard3@chip/cpu (1)->
502    ereport.cpu.amd.ic.inf_sys_eccm@chip/cpu { !RESOURCE_EXISTS },
503    ereport.cpu.amd.dc.inf_sys_eccm@chip/cpu { !RESOURCE_EXISTS },
504    ereport.cpu.amd.bu.s_eccm@chip/cpu { !RESOURCE_EXISTS },
505    ereport.cpu.amd.nb.mem_ce@chip/cpu { !RESOURCE_EXISTS };
506
507/*								#CSTESTFAIL#
508 * If the BIOS fails a chip-select during POST, or perhaps after a
509 * sync flood from an uncorrectable error, then on revision F and G it
510 * should mark that chip-select as TestFail in the CS Base register.
511 * When the memory-controller driver discovers all the MC configuration
512 * it notes such failed chip-selects and creates topology nodes for the
513 * chip-select and associated dimms and ranks, and produces an ereport for each
514 * failed chip-select with detector set to the memory-controller node
515 * and resource indicating the failed chip-select.
516 */
517
518event ereport.cpu.amd.mc.cs_testfail@chip/memory-controller;
519
520event fault.memory.dimm_testfail@chip/memory-controller/dimm/rank,
521    FITrate=1000, ASRU=dimm, FRU=dimm,
522    action=confcall("rewrite-ASRU"); /* rewrite non-leaf ASRU in mem scheme */
523
524event error.memory.cs_testfail@chip/memory-controller/chip-select;
525
526#define	CONTAINS_CS (payloadprop_contains("resource", \
527	asru(chip/memory-controller/chip-select)))
528
529prop error.memory.cs_testfail@chip/memory-controller/chip-select ->
530    ereport.cpu.amd.mc.cs_testfail@chip/memory-controller
531    { CONTAINS_CS };
532
533#define CSMATCH(s) \
534	(confprop_defined(asru(chip/memory-controller/chip-select), s) && \
535	confprop(asru(chip/memory-controller/chip-select), s) == \
536	confprop(asru(chip/memory-controller/dimm/rank), "csname"))
537
538prop fault.memory.dimm_testfail@chip/memory-controller/dimm/rank ->
539    error.memory.cs_testfail@chip/memory-controller/chip-select
540    { CSMATCH("dimm1-csname") || CSMATCH("dimm2-csname")};
541
542/*								#ADDRPAR#
543 * DRAM Command/Address Parity Errors.
544 *
545 *  - dramaddr_par : reported by the nb; the NB status register includes
546 *    a bit indicating which dram controller channel (A or B) experienced
547 *    the error.
548 */
549
550event ereport.cpu.amd.nb.dramaddr_par@chip/cpu;
551
552event fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel,
553    FITrate=1000, ASRU=dram-channel;
554
555#define GET_CHANNEL ($chan = (payloadprop("bank-status") >> 32 & 0x200) ? \
556    1 : 0)
557
558prop fault.cpu.amd.dramchannel@chip/memory-controller/dram-channel[y] (0)->
559    ereport.cpu.amd.nb.dramaddr_par@chip/cpu { GET_CHANNEL && $chan == y };
560
561/*
562 * l2 cache data errors.
563 */
564
565#define L2CACHEDATA_FIT		1000
566#define L2CACHEDATA_SB_COUNT	3
567#define L2CACHEDATA_SB_TIME	12h
568
569event fault.cpu.amd.l2cachedata@chip/cpu, FITrate=L2CACHEDATA_FIT,
570	FRU=chip, ASRU=chip/cpu;
571event error.cpu.amd.l2cachedata_sb@chip/cpu;
572event error.cpu.amd.l2cachedata_mb@chip/cpu;
573
574prop fault.cpu.amd.l2cachedata@chip/cpu (1)->
575    error.cpu.amd.l2cachedata_sb@chip/cpu,
576    error.cpu.amd.l2cachedata_mb@chip/cpu;
577
578/* 								#L2D_SINGLE#
579 * A single bit data array fault in an l2 cache can cause:
580 *
581 *  - inf_l2_ecc1 : reported by ic on this cpu
582 *  - inf_l2_ecc1 : reported by dc on this cpu
583 *  - l2d_ecc1 : reported by bu on copyback or on snoop from another cpu
584 *
585 * Single-bit errors are diagnosed to cache upsets.  SERD engines are used
586 * to count upsets resulting from CEs.
587 */
588
589event ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu{within(5s)};
590event ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu{within(5s)};
591event ereport.cpu.amd.bu.l2d_ecc1@chip/cpu{within(5s)};
592event ereport.cpu.amd.l2d_sb_trip@chip/cpu;
593
594engine serd.cpu.amd.l2d_sb@chip/cpu,
595    N=L2CACHEDATA_SB_COUNT, T=L2CACHEDATA_SB_TIME, method=persistent,
596    trip=ereport.cpu.amd.l2d_sb_trip@chip/cpu;
597
598event upset.cpu.amd.l2d_sb@chip/cpu,
599	engine=serd.cpu.amd.l2d_sb@chip/cpu;
600
601prop upset.cpu.amd.l2d_sb@chip/cpu (1)->
602    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
603    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
604    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
605
606prop error.cpu.amd.l2cachedata_sb@chip/cpu (1)->
607    ereport.cpu.amd.l2d_sb_trip@chip/cpu;
608
609prop fault.cpu.amd.l2cachedata@chip/cpu (0)->
610    ereport.cpu.amd.ic.inf_l2_ecc1@chip/cpu,
611    ereport.cpu.amd.dc.inf_l2_ecc1@chip/cpu,
612    ereport.cpu.amd.bu.l2d_ecc1@chip/cpu;
613
614/* 								#L2D_MULTI#
615 * A multi-bit data array fault in an l2 cache can cause:
616 *
617 *  - inf_l2_eccm : reported by ic on this cpu
618 *  - inf_l2_eccm : reported by dc on this cpu
619 *  - l2d_eccm : reported by bu on copyback or on snoop from another cpu
620 */
621
622event ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu;
623event ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu;
624event ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
625
626prop error.cpu.amd.l2cachedata_mb@chip/cpu (1)->
627    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
628    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
629    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
630
631prop fault.cpu.amd.l2cachedata@chip/cpu (0)->
632    ereport.cpu.amd.ic.inf_l2_eccm@chip/cpu,
633    ereport.cpu.amd.dc.inf_l2_eccm@chip/cpu,
634    ereport.cpu.amd.bu.l2d_eccm@chip/cpu;
635
636/*
637 * l2 cache main tag errors
638 */
639
640#define L2CACHETAG_FIT		1000
641#define L2CACHETAG_SB_COUNT	3
642#define L2CACHETAG_SB_TIME	12h
643
644event fault.cpu.amd.l2cachetag@chip/cpu, FITrate=L2CACHETAG_FIT,
645	FRU=chip, ASRU=chip/cpu;
646event error.cpu.amd.l2cachetag_sb@chip/cpu;
647event error.cpu.amd.l2cachetag_mb@chip/cpu;
648
649prop fault.cpu.amd.l2cachetag@chip/cpu (1)->
650    error.cpu.amd.l2cachetag_sb@chip/cpu,
651    error.cpu.amd.l2cachetag_mb@chip/cpu;
652
653/* 								#L2T_SINGLE#
654 * A single bit tag array fault in an l2 cache can cause:
655 *
656 *  - l2t_ecc1 : reported by bu on this cpu when detected during snoop
657 *  - l2t_par : reported by bu on this cpu when detected other than during snoop
658 *
659 * Note that the bu.l2t_par ereport could be due to a single bit or multi bit
660 * event. If the l2t_sb_trip has already triggered it will be treated as another
661 * ce, otherwise it will be treated as a ue event.
662 */
663
664event ereport.cpu.amd.bu.l2t_ecc1@chip/cpu{within(5s)};
665event ereport.cpu.amd.bu.l2t_par@chip/cpu;
666event ereport.cpu.amd.l2t_sb_trip@chip/cpu;
667
668engine serd.cpu.amd.l2t_sb@chip/cpu,
669    N=L2CACHETAG_SB_COUNT, T=L2CACHETAG_SB_TIME, method=persistent,
670    trip=ereport.cpu.amd.l2t_sb_trip@chip/cpu;
671
672event upset.cpu.amd.l2t_sb@chip/cpu,
673	engine=serd.cpu.amd.l2t_sb@chip/cpu;
674
675prop upset.cpu.amd.l2t_sb@chip/cpu (1)->
676    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
677    ereport.cpu.amd.bu.l2t_par@chip/cpu;
678
679prop error.cpu.amd.l2cachetag_sb@chip/cpu (1)->
680    ereport.cpu.amd.l2t_sb_trip@chip/cpu;
681
682prop fault.cpu.amd.l2cachetag@chip/cpu (0)->
683    ereport.cpu.amd.bu.l2t_ecc1@chip/cpu,
684    ereport.cpu.amd.bu.l2t_par@chip/cpu;
685
686/* 								#L2T_MULTI#
687 * A multi-bit tag array fault in an l2 cache can cause:
688 *
689 *  - l2t_eccm : reported by bu on this cpu when detected during snoop
690 *  - l2t_par : reported by bu on this cpu when detected other than during snoop
691 */
692
693event ereport.cpu.amd.bu.l2t_eccm@chip/cpu;
694
695prop error.cpu.amd.l2cachetag_mb@chip/cpu (1)->
696    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
697    ereport.cpu.amd.bu.l2t_par@chip/cpu;
698
699prop fault.cpu.amd.l2cachetag@chip/cpu (0)->
700    ereport.cpu.amd.bu.l2t_eccm@chip/cpu,
701    ereport.cpu.amd.bu.l2t_par@chip/cpu;
702
703/* 								#ICD_PAR#
704 * A data array parity fault in an I cache can cause:
705 *
706 *  - data_par : reported by ic on this cpu
707 */
708
709#define ICACHEDATA_FIT		1000
710#define ICACHEDATA_SB_COUNT	2
711#define ICACHEDATA_SB_TIME	168h
712
713event ereport.cpu.amd.ic.data_par@chip/cpu{within(5s)};
714event ereport.cpu.amd.ic_dp_trip@chip/cpu;
715
716event fault.cpu.amd.icachedata@chip/cpu, FITrate=ICACHEDATA_FIT,
717	FRU=chip, ASRU=chip/cpu;
718
719engine serd.cpu.amd.icachedata@chip/cpu,
720    N=ICACHEDATA_SB_COUNT, T=ICACHEDATA_SB_TIME, method=persistent,
721    trip=ereport.cpu.amd.ic_dp_trip@chip/cpu;
722
723event upset.cpu.amd.icachedata@chip/cpu,
724	engine=serd.cpu.amd.icachedata@chip/cpu;
725
726prop upset.cpu.amd.icachedata@chip/cpu (1)->
727    ereport.cpu.amd.ic.data_par@chip/cpu;
728
729prop fault.cpu.amd.icachedata@chip/cpu (1)->
730    ereport.cpu.amd.ic_dp_trip@chip/cpu;
731
732prop fault.cpu.amd.icachedata@chip/cpu (0)->
733    ereport.cpu.amd.ic.data_par@chip/cpu;
734
735/* 								#ICT_PAR#
736 * A tag array parity fault in an I cache can cause:
737 *
738 *  - tag_par : reported by ic on this cpu
739 */
740
741#define ICACHETAG_FIT		1000
742#define ICACHETAG_SB_COUNT	2
743#define ICACHETAG_SB_TIME	168h
744
745event ereport.cpu.amd.ic.tag_par@chip/cpu{within(5s)};
746event ereport.cpu.amd.ic_tp_trip@chip/cpu;
747
748event fault.cpu.amd.icachetag@chip/cpu, FITrate=ICACHETAG_FIT,
749	FRU=chip, ASRU=chip/cpu;
750
751engine serd.cpu.amd.icachetag@chip/cpu,
752    N=ICACHETAG_SB_COUNT, T=ICACHETAG_SB_TIME, method=persistent,
753    trip=ereport.cpu.amd.ic_tp_trip@chip/cpu;
754
755event upset.cpu.amd.icachetag@chip/cpu,
756	engine=serd.cpu.amd.icachetag@chip/cpu;
757
758prop upset.cpu.amd.icachetag@chip/cpu (1)->
759    ereport.cpu.amd.ic.tag_par@chip/cpu;
760
761prop fault.cpu.amd.icachetag@chip/cpu (1)->
762    ereport.cpu.amd.ic_tp_trip@chip/cpu;
763
764prop fault.cpu.amd.icachetag@chip/cpu (0)->
765    ereport.cpu.amd.ic.tag_par@chip/cpu;
766
767/* 								#ICT_SNOOP#
768 * A snoop tag array parity fault in an I cache can cause:
769 *
770 *  - stag_par : reported by ic on this cpu
771 */
772
773#define ICACHESTAG_FIT		1000
774
775event ereport.cpu.amd.ic.stag_par@chip/cpu{within(5s)};
776
777event fault.cpu.amd.icachestag@chip/cpu, FITrate=ICACHESTAG_FIT,
778	FRU=chip, ASRU=chip/cpu;
779
780prop fault.cpu.amd.icachestag@chip/cpu (1)->
781    ereport.cpu.amd.ic.stag_par@chip/cpu;
782
783/* 								#ICTLB_1#
784 * An l1tlb parity fault in an I cache can cause:
785 *
786 *  - l1tlb_par : reported by ic on this cpu
787 */
788
789#define ICACHEL1TLB_FIT		1000
790#define ICACHEL1TLB_SB_COUNT	2
791#define ICACHEL1TLB_SB_TIME	168h
792
793event ereport.cpu.amd.ic.l1tlb_par@chip/cpu{within(5s)};
794event ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
795
796event fault.cpu.amd.l1itlb@chip/cpu, FITrate=ICACHEL1TLB_FIT,
797	FRU=chip, ASRU=chip/cpu;
798
799engine serd.cpu.amd.l1itlb@chip/cpu,
800    N=ICACHEL1TLB_SB_COUNT, T=ICACHEL1TLB_SB_TIME, method=persistent,
801    trip=ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
802
803event upset.cpu.amd.l1itlb@chip/cpu,
804	engine=serd.cpu.amd.l1itlb@chip/cpu;
805
806prop upset.cpu.amd.l1itlb@chip/cpu (1)->
807    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
808
809prop fault.cpu.amd.l1itlb@chip/cpu (1)->
810    ereport.cpu.amd.ic_l1tlb_trip@chip/cpu;
811
812prop fault.cpu.amd.l1itlb@chip/cpu (0)->
813    ereport.cpu.amd.ic.l1tlb_par@chip/cpu;
814
815/* 								#ICTLB_2#
816 * An l2tlb parity fault in an I cache can cause:
817 *
818 *  - l2tlb_par : reported by ic on this cpu
819 */
820
821#define ICACHEL2TLB_FIT		1000
822#define ICACHEL2TLB_SB_COUNT	2
823#define ICACHEL2TLB_SB_TIME	168h
824
825event ereport.cpu.amd.ic.l2tlb_par@chip/cpu{within(5s)};
826event ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
827
828event fault.cpu.amd.l2itlb@chip/cpu, FITrate=ICACHEL2TLB_FIT,
829	FRU=chip, ASRU=chip/cpu;
830
831engine serd.cpu.amd.l2itlb@chip/cpu,
832    N=ICACHEL2TLB_SB_COUNT, T=ICACHEL2TLB_SB_TIME, method=persistent,
833    trip=ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
834
835event upset.cpu.amd.l2itlb@chip/cpu,
836	engine=serd.cpu.amd.l2itlb@chip/cpu;
837
838prop upset.cpu.amd.l2itlb@chip/cpu (1)->
839    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
840
841prop fault.cpu.amd.l2itlb@chip/cpu (1)->
842    ereport.cpu.amd.ic_l2tlb_trip@chip/cpu;
843
844prop fault.cpu.amd.l2itlb@chip/cpu (0)->
845    ereport.cpu.amd.ic.l2tlb_par@chip/cpu;
846
847/*
848 * dcache data errors
849 */
850
851#define DCACHEDATA_FIT		1000
852#define DCACHEDATA_SB_COUNT	2
853#define DCACHEDATA_SB_TIME	168h
854
855event fault.cpu.amd.dcachedata@chip/cpu, FITrate=DCACHEDATA_FIT,
856	FRU=chip, ASRU=chip/cpu;
857event error.cpu.amd.dcachedata_sb@chip/cpu;
858event error.cpu.amd.dcachedata_mb@chip/cpu;
859
860prop fault.cpu.amd.dcachedata@chip/cpu (1)->
861    error.cpu.amd.dcachedata_sb@chip/cpu,
862    error.cpu.amd.dcachedata_mb@chip/cpu;
863
864/* 								#DCD_SINGLE#
865 * A single bit data array fault in an D cache can cause:
866 *
867 *  - data_ecc1 : reported by dc on this cpu by scrubber
868 *  - data_ecc1_uc : reported by dc on this cpu other than by scrubber
869 *
870 * Make data_ecc1_uc fault immediately as it may have caused a panic
871 */
872
873event ereport.cpu.amd.dc.data_ecc1@chip/cpu{within(5s)};
874event ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu{within(5s)};
875event ereport.cpu.amd.dc_sb_trip@chip/cpu;
876
877engine serd.cpu.amd.dc_sb@chip/cpu,
878    N=DCACHEDATA_SB_COUNT, T=DCACHEDATA_SB_TIME, method=persistent,
879    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
880
881engine serd.cpu.amd.dc_sb_uc@chip/cpu,
882    N=0, T=1hr, method=persistent,
883    trip=ereport.cpu.amd.dc_sb_trip@chip/cpu;
884
885event upset.cpu.amd.dc_sb@chip/cpu,
886	engine=serd.cpu.amd.dc_sb@chip/cpu;
887
888event upset.cpu.amd.dc_sb_uc@chip/cpu,
889	engine=serd.cpu.amd.dc_sb_uc@chip/cpu;
890
891prop upset.cpu.amd.dc_sb@chip/cpu (1)->
892    ereport.cpu.amd.dc.data_ecc1@chip/cpu;
893
894prop upset.cpu.amd.dc_sb_uc@chip/cpu (1)->
895    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
896
897prop error.cpu.amd.dcachedata_sb@chip/cpu (1)->
898    ereport.cpu.amd.dc_sb_trip@chip/cpu;
899
900prop fault.cpu.amd.dcachedata@chip/cpu (0)->
901    ereport.cpu.amd.dc.data_ecc1@chip/cpu,
902    ereport.cpu.amd.dc.data_ecc1_uc@chip/cpu;
903
904/* 								#DCD_MULTI#
905 * A multi-bit data array fault in an D cache can cause:
906 *
907 *  - data_eccm : reported by dc on this cpu
908 */
909
910event ereport.cpu.amd.dc.data_eccm@chip/cpu;
911
912prop error.cpu.amd.dcachedata_mb@chip/cpu (1)->
913    ereport.cpu.amd.dc.data_eccm@chip/cpu;
914
915prop fault.cpu.amd.dcachedata@chip/cpu (0)->
916    ereport.cpu.amd.dc.data_eccm@chip/cpu;
917
918/* 								#DCT_PAR#
919 * A tag array parity fault in an D cache can cause:
920 *
921 *  - tag_par : reported by dc on this cpu
922 */
923
924#define DCACHETAG_FIT		1000
925
926event ereport.cpu.amd.dc.tag_par@chip/cpu{within(5s)};
927
928event fault.cpu.amd.dcachetag@chip/cpu, FITrate=DCACHETAG_FIT,
929	FRU=chip, ASRU=chip/cpu;
930
931prop fault.cpu.amd.dcachetag@chip/cpu (1)->
932    ereport.cpu.amd.dc.tag_par@chip/cpu;
933
934/* 								#DCT_SNOOP#
935 * A snoop tag array parity fault in an D cache can cause:
936 *
937 *  - stag_par : reported by dc on this cpu
938 */
939
940#define DCACHESTAG_FIT		1000
941
942event ereport.cpu.amd.dc.stag_par@chip/cpu{within(5s)};
943
944event fault.cpu.amd.dcachestag@chip/cpu, FITrate=DCACHESTAG_FIT,
945	FRU=chip, ASRU=chip/cpu;
946
947prop fault.cpu.amd.dcachestag@chip/cpu (1)->
948    ereport.cpu.amd.dc.stag_par@chip/cpu;
949
950/* 								#DCTLB_1#
951 * An l1tlb parity fault in an D cache can cause:
952 *
953 *  - l1tlb_par : reported by dc on this cpu
954 */
955
956#define L1DTLB_FIT		1000
957
958event ereport.cpu.amd.dc.l1tlb_par@chip/cpu{within(5s)};
959
960event fault.cpu.amd.l1dtlb@chip/cpu, FITrate=L1DTLB_FIT,
961	FRU=chip, ASRU=chip/cpu;
962
963prop fault.cpu.amd.l1dtlb@chip/cpu (1)->
964    ereport.cpu.amd.dc.l1tlb_par@chip/cpu;
965
966/* 								#DCTLB_2#
967 * An l2tlb parity fault in an D cache can cause:
968 *
969 *  - l2tlb_par : reported by dc on this cpu
970 */
971
972#define L2DTLB_FIT		1000
973
974event ereport.cpu.amd.dc.l2tlb_par@chip/cpu{within(5s)};
975
976event fault.cpu.amd.l2dtlb@chip/cpu, FITrate=L2DTLB_FIT,
977	FRU=chip, ASRU=chip/cpu;
978
979prop fault.cpu.amd.l2dtlb@chip/cpu (1)->
980    ereport.cpu.amd.dc.l2tlb_par@chip/cpu;
981
982/*								#MISC#
983 * Ereports that should not normally happen and which we will discard
984 * without diagnosis if they do.  These fall into a few categories:
985 *
986 *	- the corresponding detector is not enabled, typically because
987 *	  detection/handling of the event is taking place elsewhere
988 *	  (nb.ma, nb.ta, ls.rde, ic.rdde, bu.s_rde, nb.gart_walk)
989 *	- the event is associated with a sync flood so even if the detector is
990 *	  enabled we will never handle the event and generate an ereport *and*
991 *	  even if the ereport did arrive we could perform no useful diagnosis
992 *	  e.g., the NB can be configured for sync flood on nb.mem_eccm
993 *	  but we don't choose to discard that ereport here since we could have
994 *	  made a useful diagnosis from it had it been delivered
995 *	  (nb.ht_sync, nb.ht_crc)
996 *	- events that will be accompanied by an immediate panic and
997 *	  delivery of the ereport during subsequent reboot but from
998 *	  which no useful diagnosis can be made. (nb.rmw, nb.wdog)
999 *
1000 * Ereports for all of these can be generated by error simulation and
1001 * injection.  We will perform a null diagnosos of all these ereports in order
1002 * to avoid "no subscription" complaints during test harness runs.
1003 */
1004
1005event ereport.cpu.amd.nb.ma@cpu;
1006event ereport.cpu.amd.nb.ta@cpu;
1007event ereport.cpu.amd.ls.s_rde@cpu;
1008event ereport.cpu.amd.ic.rdde@cpu;
1009event ereport.cpu.amd.bu.s_rde@cpu;
1010event ereport.cpu.amd.nb.gart_walk@cpu;
1011event ereport.cpu.amd.nb.ht_sync@cpu;
1012event ereport.cpu.amd.nb.ht_crc@cpu;
1013event ereport.cpu.amd.nb.rmw@cpu;
1014event ereport.cpu.amd.nb.wdog@cpu;
1015event ereport.cpu.amd.unknown@cpu;
1016
1017event upset.null_diag@cpu;
1018
1019prop upset.null_diag@cpu (1)->
1020    ereport.cpu.amd.nb.ma@cpu,
1021    ereport.cpu.amd.nb.ta@cpu,
1022    ereport.cpu.amd.ls.s_rde@cpu,
1023    ereport.cpu.amd.ic.rdde@cpu,
1024    ereport.cpu.amd.bu.s_rde@cpu,
1025    ereport.cpu.amd.nb.gart_walk@cpu,
1026    ereport.cpu.amd.nb.ht_sync@cpu,
1027    ereport.cpu.amd.nb.ht_crc@cpu,
1028    ereport.cpu.amd.nb.rmw@cpu,
1029    ereport.cpu.amd.nb.wdog@cpu,
1030    ereport.cpu.amd.unknown@cpu;
1031